From craig.topper at gmail.com Mon Jan 23 00:16:53 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 23 Jan 2012 06:16:53 -0000 Subject: [llvm-commits] [llvm] r148684 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86InstrSSE.td Message-ID: <20120123061653.5F1922A6C12C@llvm.org> Author: ctopper Date: Mon Jan 23 00:16:53 2012 New Revision: 148684 URL: http://llvm.org/viewvc/llvm-project?rev=148684&view=rev Log: Custom lower vector shift intrinsics to target specific nodes and remove the patterns that are no longer needed. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148684&r1=148683&r2=148684&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 23 00:16:53 2012 @@ -64,17 +64,6 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); -static SDValue Insert128BitVector(SDValue Result, - SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, - DebugLoc dl); - -static SDValue Extract128BitVector(SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, - DebugLoc dl); - /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 instruction or a /// simple subregister reference. Idx is an index in the 128 bits we @@ -9157,6 +9146,43 @@ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } +// getTargetVShiftNOde - Handle vector element shifts where the shift amount +// may or may not be a constant. Takes immediate version of shift as input. +static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue SrcOp, SDValue ShAmt, + SelectionDAG &DAG) { + assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); + + if (isa(ShAmt)) { + switch (Opc) { + default: llvm_unreachable("Unknown target vector shift node"); + case X86ISD::VSHLI: + case X86ISD::VSRLI: + case X86ISD::VSRAI: + return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); + } + } + + // Change opcode to non-immediate version + switch (Opc) { + default: llvm_unreachable("Unknown target vector shift node"); + case X86ISD::VSHLI: Opc = X86ISD::VSHL; break; + case X86ISD::VSRLI: Opc = X86ISD::VSRL; break; + case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; + } + + // Need to build a vector containing shift amount + // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 + SDValue ShOps[4]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShOps[2] = DAG.getUNDEF(MVT::i32); + ShOps[3] = DAG.getUNDEF(MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); + ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); + return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); +} + SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -9359,24 +9385,53 @@ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - // Fix vector shift instructions where the last operand is a non-immediate - // i32 value. - case Intrinsic::x86_avx2_pslli_w: - case Intrinsic::x86_avx2_pslli_d: - case Intrinsic::x86_avx2_pslli_q: - case Intrinsic::x86_avx2_psrli_w: - case Intrinsic::x86_avx2_psrli_d: - case Intrinsic::x86_avx2_psrli_q: - case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: + // SSE/AVX shift intrinsics + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: + return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrli_w: case Intrinsic::x86_sse2_psrli_d: case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); + // Fix vector shift instructions where the last operand is a non-immediate + // i32 value. case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -9390,103 +9445,40 @@ return SDValue(); unsigned NewIntNo = 0; - EVT ShAmtVT = MVT::v4i32; switch (IntNo) { - case Intrinsic::x86_sse2_pslli_w: - NewIntNo = Intrinsic::x86_sse2_psll_w; - break; - case Intrinsic::x86_sse2_pslli_d: - NewIntNo = Intrinsic::x86_sse2_psll_d; - break; - case Intrinsic::x86_sse2_pslli_q: - NewIntNo = Intrinsic::x86_sse2_psll_q; - break; - case Intrinsic::x86_sse2_psrli_w: - NewIntNo = Intrinsic::x86_sse2_psrl_w; + case Intrinsic::x86_mmx_pslli_w: + NewIntNo = Intrinsic::x86_mmx_psll_w; break; - case Intrinsic::x86_sse2_psrli_d: - NewIntNo = Intrinsic::x86_sse2_psrl_d; + case Intrinsic::x86_mmx_pslli_d: + NewIntNo = Intrinsic::x86_mmx_psll_d; break; - case Intrinsic::x86_sse2_psrli_q: - NewIntNo = Intrinsic::x86_sse2_psrl_q; + case Intrinsic::x86_mmx_pslli_q: + NewIntNo = Intrinsic::x86_mmx_psll_q; break; - case Intrinsic::x86_sse2_psrai_w: - NewIntNo = Intrinsic::x86_sse2_psra_w; + case Intrinsic::x86_mmx_psrli_w: + NewIntNo = Intrinsic::x86_mmx_psrl_w; break; - case Intrinsic::x86_sse2_psrai_d: - NewIntNo = Intrinsic::x86_sse2_psra_d; + case Intrinsic::x86_mmx_psrli_d: + NewIntNo = Intrinsic::x86_mmx_psrl_d; break; - case Intrinsic::x86_avx2_pslli_w: - NewIntNo = Intrinsic::x86_avx2_psll_w; + case Intrinsic::x86_mmx_psrli_q: + NewIntNo = Intrinsic::x86_mmx_psrl_q; break; - case Intrinsic::x86_avx2_pslli_d: - NewIntNo = Intrinsic::x86_avx2_psll_d; + case Intrinsic::x86_mmx_psrai_w: + NewIntNo = Intrinsic::x86_mmx_psra_w; break; - case Intrinsic::x86_avx2_pslli_q: - NewIntNo = Intrinsic::x86_avx2_psll_q; - break; - case Intrinsic::x86_avx2_psrli_w: - NewIntNo = Intrinsic::x86_avx2_psrl_w; - break; - case Intrinsic::x86_avx2_psrli_d: - NewIntNo = Intrinsic::x86_avx2_psrl_d; - break; - case Intrinsic::x86_avx2_psrli_q: - NewIntNo = Intrinsic::x86_avx2_psrl_q; - break; - case Intrinsic::x86_avx2_psrai_w: - NewIntNo = Intrinsic::x86_avx2_psra_w; - break; - case Intrinsic::x86_avx2_psrai_d: - NewIntNo = Intrinsic::x86_avx2_psra_d; - break; - default: { - ShAmtVT = MVT::v2i32; - switch (IntNo) { - case Intrinsic::x86_mmx_pslli_w: - NewIntNo = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntNo = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntNo = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntNo = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntNo = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntNo = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntNo = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntNo = Intrinsic::x86_mmx_psra_d; - break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - } + case Intrinsic::x86_mmx_psrai_d: + NewIntNo = Intrinsic::x86_mmx_psra_d; break; - } + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits // to be zero. - SDValue ShOps[4]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); - if (ShAmtVT == MVT::v4i32) { - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); - } else { - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt, + DAG.getConstant(0, MVT::i32)); // FIXME this must be lowered to get rid of the invalid type. - } EVT VT = Op.getValueType(); ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); @@ -10006,43 +9998,6 @@ return Res; } -// getTargetVShiftNOde - Handle vector element shifts where the shift amount -// may or may not be a constant. Takes immediate version of shift as input. -static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue SrcOp, SDValue ShAmt, - SelectionDAG &DAG) { - assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); - - if (isa(ShAmt)) { - switch (Opc) { - default: llvm_unreachable("Unknown target vector shift node"); - case X86ISD::VSHLI: - case X86ISD::VSRLI: - case X86ISD::VSRAI: - return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); - } - } - - // Change opcode to non-immediate version - switch (Opc) { - default: llvm_unreachable("Unknown target vector shift node"); - case X86ISD::VSHLI: Opc = X86ISD::VSHL; break; - case X86ISD::VSRLI: Opc = X86ISD::VSRL; break; - case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; - } - - // Need to build a vector containing shift amount - // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 - SDValue ShOps[4]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); - ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); - return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); -} - SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=148684&r1=148683&r2=148684&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Jan 23 00:16:53 2012 @@ -3511,8 +3511,9 @@ } multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, Intrinsic IntId, - Intrinsic IntId2, RegisterClass RC, + string OpcodeStr, SDNode OpNode, + SDNode OpNode2, RegisterClass RC, + ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, bit Is2Addr = 1> { // src2 is always 128-bit def rr : PDI; + [(set RC:$dst, (OpNode (DstVT RC:$src1), (SrcVT VR128:$src2)))]>; def rm : PDI; + [(set RC:$dst, (OpNode (DstVT RC:$src1), + (bc_frag (memopv2i64 addr:$src2))))]>; def ri : PDIi8; + [(set RC:$dst, (OpNode2 (DstVT RC:$src1), (i32 imm:$src2)))]>; } } // ExeDomain = SSEPackedInt @@ -3728,32 +3730,24 @@ //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { -defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, - VR128, 0>, VEX_4V; -defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, - VR128, 0>, VEX_4V; -defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, - VR128, 0>, VEX_4V; - -defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, - VR128, 0>, VEX_4V; -defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, - VR128, 0>, VEX_4V; -defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, - VR128, 0>, VEX_4V; - -defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, - VR128, 0>, VEX_4V; -defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, - VR128, 0>, VEX_4V; +defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { // 128-bit logical shifts. @@ -3774,32 +3768,24 @@ } // Predicates = [HasAVX] let Predicates = [HasAVX2] in { -defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_avx2_psll_w, int_x86_avx2_pslli_w, - VR256, 0>, VEX_4V; -defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_avx2_psll_d, int_x86_avx2_pslli_d, - VR256, 0>, VEX_4V; -defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_avx2_psll_q, int_x86_avx2_pslli_q, - VR256, 0>, VEX_4V; - -defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_avx2_psrl_w, int_x86_avx2_psrli_w, - VR256, 0>, VEX_4V; -defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_avx2_psrl_d, int_x86_avx2_psrli_d, - VR256, 0>, VEX_4V; -defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_avx2_psrl_q, int_x86_avx2_psrli_q, - VR256, 0>, VEX_4V; - -defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_avx2_psra_w, int_x86_avx2_psrai_w, - VR256, 0>, VEX_4V; -defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_avx2_psra_d, int_x86_avx2_psrai_d, - VR256, 0>, VEX_4V; +defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { // 256-bit logical shifts. @@ -3820,32 +3806,24 @@ } // Predicates = [HasAVX2] let Constraints = "$src1 = $dst" in { -defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, - VR128>; -defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, - VR128>; -defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, - VR128>; - -defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, - VR128>; -defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, - VR128>; -defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, - VR128>; - -defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, - VR128>; -defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, - VR128>; +defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32>; +defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64>; + +defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32>; +defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64>; + +defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32>; let ExeDomain = SSEPackedInt in { // 128-bit logical shifts. @@ -3876,60 +3854,6 @@ (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; - - def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))), - (VPSLLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))), - (VPSLLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))), - (VPSLLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (VPSRLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (VPSRLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (VPSRLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (VPSRAWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (VPSRADri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))), - (VPSLLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSLLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))), - (VPSLLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSLLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))), - (VPSLLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))), - (VPSLLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))), - (VPSRLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))), - (VPSRLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))), - (VPSRLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))), - (VPSRLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))), - (VPSRAWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRAWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))), - (VPSRADrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRADrm VR128:$src1, addr:$src2)>; } let Predicates = [HasAVX2] in { @@ -3937,60 +3861,6 @@ (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; - - def : Pat<(v16i16 (X86vshli VR256:$src1, (i32 imm:$src2))), - (VPSLLWYri VR256:$src1, imm:$src2)>; - def : Pat<(v8i32 (X86vshli VR256:$src1, (i32 imm:$src2))), - (VPSLLDYri VR256:$src1, imm:$src2)>; - def : Pat<(v4i64 (X86vshli VR256:$src1, (i32 imm:$src2))), - (VPSLLQYri VR256:$src1, imm:$src2)>; - - def : Pat<(v16i16 (X86vsrli VR256:$src1, (i32 imm:$src2))), - (VPSRLWYri VR256:$src1, imm:$src2)>; - def : Pat<(v8i32 (X86vsrli VR256:$src1, (i32 imm:$src2))), - (VPSRLDYri VR256:$src1, imm:$src2)>; - def : Pat<(v4i64 (X86vsrli VR256:$src1, (i32 imm:$src2))), - (VPSRLQYri VR256:$src1, imm:$src2)>; - - def : Pat<(v16i16 (X86vsrai VR256:$src1, (i32 imm:$src2))), - (VPSRAWYri VR256:$src1, imm:$src2)>; - def : Pat<(v8i32 (X86vsrai VR256:$src1, (i32 imm:$src2))), - (VPSRADYri VR256:$src1, imm:$src2)>; - - def : Pat<(v16i16 (X86vshl VR256:$src1, (v8i16 VR128:$src2))), - (VPSLLWYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v16i16 (X86vshl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSLLWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vshl VR256:$src1, (v4i32 VR128:$src2))), - (VPSLLDYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v8i32 (X86vshl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSLLDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86vshl VR256:$src1, (v2i64 VR128:$src2))), - (VPSLLQYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v4i64 (X86vshl VR256:$src1, (memopv2i64 addr:$src2))), - (VPSLLQYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v16i16 (X86vsrl VR256:$src1, (v8i16 VR128:$src2))), - (VPSRLWYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v16i16 (X86vsrl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRLWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vsrl VR256:$src1, (v4i32 VR128:$src2))), - (VPSRLDYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v8i32 (X86vsrl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRLDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86vsrl VR256:$src1, (v2i64 VR128:$src2))), - (VPSRLQYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v4i64 (X86vsrl VR256:$src1, (memopv2i64 addr:$src2))), - (VPSRLQYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v16i16 (X86vsra VR256:$src1, (v8i16 VR128:$src2))), - (VPSRAWYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v16i16 (X86vsra VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRAWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vsra VR256:$src1, (v4i32 VR128:$src2))), - (VPSRADYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v8i32 (X86vsra VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRADYrm VR256:$src1, addr:$src2)>; } let Predicates = [HasSSE2] in { @@ -4006,60 +3876,6 @@ (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; - - def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))), - (PSLLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))), - (PSLLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))), - (PSLLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (PSRLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (PSRLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (PSRLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (PSRAWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (PSRADri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))), - (PSLLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (PSLLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))), - (PSLLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (PSLLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))), - (PSLLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))), - (PSLLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))), - (PSRLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (PSRLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))), - (PSRLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (PSRLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))), - (PSRLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))), - (PSRLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))), - (PSRAWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (PSRAWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))), - (PSRADrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (PSRADrm VR128:$src1, addr:$src2)>; } //===---------------------------------------------------------------------===// From craig.topper at gmail.com Mon Jan 23 00:46:23 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 23 Jan 2012 06:46:23 -0000 Subject: [llvm-commits] [llvm] r148685 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20120123064623.4E9202A6C12C@llvm.org> Author: ctopper Date: Mon Jan 23 00:46:22 2012 New Revision: 148685 URL: http://llvm.org/viewvc/llvm-project?rev=148685&view=rev Log: Update more places to use target specific nodes for vector shifts instead of intrinsics. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148685&r1=148684&r2=148685&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 23 00:46:22 2012 @@ -9934,12 +9934,10 @@ // AhiBlo = __builtin_ia32_psllqi256( AhiBlo, 32 ); // return AloBlo + AloBhi + AhiBlo; - SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - A, DAG.getConstant(32, MVT::i32)); - SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - B, DAG.getConstant(32, MVT::i32)); + SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, + DAG.getConstant(32, MVT::i32)); + SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, + DAG.getConstant(32, MVT::i32)); SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), A, B); @@ -9949,12 +9947,10 @@ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), Ahi, B); - AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - AloBhi, DAG.getConstant(32, MVT::i32)); - AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - AhiBlo, DAG.getConstant(32, MVT::i32)); + AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, + DAG.getConstant(32, MVT::i32)); + AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, + DAG.getConstant(32, MVT::i32)); SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); return Res; @@ -9972,12 +9968,10 @@ // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 ); // return AloBlo + AloBhi + AhiBlo; - SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), - A, DAG.getConstant(32, MVT::i32)); - SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), - B, DAG.getConstant(32, MVT::i32)); + SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, + DAG.getConstant(32, MVT::i32)); + SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, + DAG.getConstant(32, MVT::i32)); SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), A, B); @@ -9987,12 +9981,10 @@ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), Ahi, B); - AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), - AloBhi, DAG.getConstant(32, MVT::i32)); - AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), - AhiBlo, DAG.getConstant(32, MVT::i32)); + AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, + DAG.getConstant(32, MVT::i32)); + AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, + DAG.getConstant(32, MVT::i32)); SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); return Res; @@ -13688,26 +13680,11 @@ // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b - SDValue SraSrc, SraC; - if (Mask.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { - switch (cast(Mask.getOperand(0))->getZExtValue()) { - case Intrinsic::x86_sse2_psrai_w: - case Intrinsic::x86_sse2_psrai_d: - case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: - break; - default: return SDValue(); - } - - SraSrc = Mask.getOperand(1); - SraC = Mask.getOperand(2); - } else if (Mask.getOpcode() == X86ISD::VSRAI) { - SraSrc = Mask.getOperand(0); - SraC = Mask.getOperand(1); - } else + if (Mask.getOpcode() != X86ISD::VSRAI) return SDValue(); // Check that the SRA is all signbits. + SDValue SraC = Mask.getOperand(1); unsigned SraAmt = cast(SraC)->getZExtValue(); unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); if ((SraAmt + 1) != EltBits) @@ -13725,7 +13702,7 @@ X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); - Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, SraSrc); + Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } // PBLENDVB only available on SSE 4.1 From hfinkel at anl.gov Mon Jan 23 01:47:22 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Mon, 23 Jan 2012 01:47:22 -0600 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub In-Reply-To: References: Message-ID: <1327304842.32397.696.camel@sapling> On Sun, 2012-01-22 at 16:46 -0800, Eli Friedman wrote: > On Sun, Jan 22, 2012 at 4:07 AM, Anton Korobeynikov wrote: > > +//===----------------------------------------------------------------------===// > > +// Fused FP Multiply-Accumulate Operations. > > +// > > +def VFMAD : ADbI<0b11101, 0b10, 0, 0, > > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > > + IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm", > > + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), > > + (f64 DPR:$Ddin)))]>, > > + RegConstraint<"$Ddin = $Dd">, > > + Requires<[HasVFP4]>; > > + > > +def VFMAS : ASbIn<0b11101, 0b10, 0, 0, > > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > > + IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm", > > + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), > > + SPR:$Sdin))]>, > > + RegConstraint<"$Sdin = $Sd">, > > + Requires<[HasVFP4,DontUseNEONForFP]> { > > + // Some single precision VFP instructions may be executed on both NEON and > > + // VFP pipelines. > > +} > > + > > +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), > > + (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, > > + Requires<[HasVFP4]>; > > +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), > > + (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, > > + Requires<[HasVFP4,DontUseNEONForFP]>; > > + > > +def VFMSD : ADbI<0b11101, 0b10, 1, 0, > > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > > + IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm", > > + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), > > + (f64 DPR:$Ddin)))]>, > > + RegConstraint<"$Ddin = $Dd">, > > + Requires<[HasVFP4]>; > > + > > +def VFMSS : ASbIn<0b11101, 0b10, 1, 0, > > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > > + IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm", > > + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), > > + SPR:$Sdin))]>, > > + RegConstraint<"$Sdin = $Sd">, > > + Requires<[HasVFP4,DontUseNEONForFP]> { > > + // Some single precision VFP instructions may be executed on both NEON and > > + // VFP pipelines. > > +} > > + > > +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), > > + (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, > > + Requires<[HasVFP4]>; > > +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), > > + (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, > > + Requires<[HasVFP4,DontUseNEONForFP]>; > > + > > +def VFNMAD : ADbI<0b11101, 0b01, 1, 0, > > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > > + IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", > > + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), > > + (f64 DPR:$Ddin)))]>, > > + RegConstraint<"$Ddin = $Dd">, > > + Requires<[HasVFP4]>; > > + > > +def VFNMAS : ASbI<0b11101, 0b01, 1, 0, > > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > > + IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm", > > + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), > > + SPR:$Sdin))]>, > > + RegConstraint<"$Sdin = $Sd">, > > + Requires<[HasVFP4,DontUseNEONForFP]> { > > + // Some single precision VFP instructions may be executed on both NEON and > > + // VFP pipelines. > > +} > > + > > +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), > > + (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, > > + Requires<[HasVFP4]>; > > +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), > > + (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, > > + Requires<[HasVFP4,DontUseNEONForFP]>; > > + > > +def VFNMSD : ADbI<0b11101, 0b01, 0, 0, > > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > > + IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm", > > + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), > > + (f64 DPR:$Ddin)))]>, > > + RegConstraint<"$Ddin = $Dd">, > > + Requires<[HasVFP4]>; > > + > > +def VFNMSS : ASbI<0b11101, 0b01, 0, 0, > > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > > + IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", > > + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, > > + RegConstraint<"$Sdin = $Sd">, > > + Requires<[HasVFP4,DontUseNEONForFP]> { > > + // Some single precision VFP instructions may be executed on both NEON and > > + // VFP pipelines. > > +} > > + > > +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), > > + (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, > > + Requires<[HasVFP4]>; > > +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), > > + (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, > > + Requires<[HasVFP4,DontUseNEONForFP]>; > > I'm a bit concerned about these patterns: a multiply followed by an > add is not, strictly speaking, the same thing as a fused multiply-add. > We have an FMA intrinsic (http://llvm.org/docs/LangRef.html#int_fma); > that should map onto this instruction, and we should only transform an > unfused multiply+add in fast-math mode. The PowerPC backend has patterns like this (for fmadd and friends), and they are enabled whenever the TargetOptions flag NoExcessFPPrecision is disabled (which is the default). I think that this behavior is reasonable. -Hal > > -Eli > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From eugeni.stepanov at gmail.com Mon Jan 23 01:57:40 2012 From: eugeni.stepanov at gmail.com (Evgeniy Stepanov) Date: Mon, 23 Jan 2012 07:57:40 -0000 Subject: [llvm-commits] [llvm] r148686 - in /llvm/trunk: include/llvm/MC/MCAsmInfo.h lib/CodeGen/AsmPrinter/ARMException.cpp lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp test/CodeGen/ARM/ehabi-unwind.ll Message-ID: <20120123075740.9CD762A6C12C@llvm.org> Author: eugenis Date: Mon Jan 23 01:57:39 2012 New Revision: 148686 URL: http://llvm.org/viewvc/llvm-project?rev=148686&view=rev Log: An option to selectively enable parts of ARM EHABI support. This change adds an new value to the --arm-enable-ehabi option that disables emitting unwinding descriptors. This mode gives a working backtrace() without the (currently broken) exception support. Modified: llvm/trunk/include/llvm/MC/MCAsmInfo.h llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll Modified: llvm/trunk/include/llvm/MC/MCAsmInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmInfo.h?rev=148686&r1=148685&r2=148686&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmInfo.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmInfo.h Mon Jan 23 01:57:39 2012 @@ -30,6 +30,7 @@ namespace ExceptionHandling { enum ExceptionsType { None, DwarfCFI, SjLj, ARM, Win64 }; + enum ARMEHABIMode { ARMEHABIDisabled, ARMEHABIUnwind, ARMEHABIFull }; } namespace LCOMM { Modified: llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp?rev=148686&r1=148685&r2=148686&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp Mon Jan 23 01:57:39 2012 @@ -29,6 +29,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" @@ -36,6 +37,18 @@ #include "llvm/ADT/Twine.h" using namespace llvm; +cl::opt +EnableARMEHABI("arm-enable-ehabi", cl::Hidden, + cl::desc("Generate ARM EHABI tables:"), + cl::values(clEnumValN(ExceptionHandling::ARMEHABIDisabled, "no", + "Do not generate ARM EHABI tables"), + clEnumValN(ExceptionHandling::ARMEHABIUnwind, "unwind", + "Emit unwinding instructions, but not descriptors"), + clEnumValN(ExceptionHandling::ARMEHABIFull, "full", + "Generate full ARM EHABI tables"), + clEnumValEnd)); + + ARMException::ARMException(AsmPrinter *A) : DwarfException(A), shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) @@ -72,13 +85,15 @@ Asm->OutStreamer.EmitPersonality(PerSym); } - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + if (EnableARMEHABI == ExceptionHandling::ARMEHABIFull) { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - Asm->OutStreamer.EmitHandlerData(); + Asm->OutStreamer.EmitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); + // Emit actual exception table + EmitExceptionTable(); + } } Asm->OutStreamer.EmitFnEnd(); Modified: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp?rev=148686&r1=148685&r2=148686&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp Mon Jan 23 01:57:39 2012 @@ -16,10 +16,7 @@ using namespace llvm; -cl::opt -EnableARMEHABI("arm-enable-ehabi", cl::Hidden, - cl::desc("Generate ARM EHABI tables"), - cl::init(false)); +extern cl::opt EnableARMEHABI; static const char *const arm_asm_table[] = { @@ -82,6 +79,6 @@ SupportsDebugInformation = true; // Exceptions handling - if (EnableARMEHABI) + if (EnableARMEHABI != ExceptionHandling::ARMEHABIDisabled) ExceptionsType = ExceptionHandling::ARM; } Modified: llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll?rev=148686&r1=148685&r2=148686&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll (original) +++ llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll Mon Jan 23 01:57:39 2012 @@ -1,7 +1,8 @@ ; Test that the EHABI unwind instruction generator does not encounter any ; unfamiliar instructions. -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi=full -disable-fp-elim +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi=full +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi=unwind define void @_Z1fv() nounwind { entry: From asl at math.spbu.ru Mon Jan 23 02:11:24 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Mon, 23 Jan 2012 12:11:24 +0400 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub In-Reply-To: <1327304842.32397.696.camel@sapling> References: <1327304842.32397.696.camel@sapling> Message-ID: > The PowerPC backend has patterns like this (for fmadd and friends), and > they are enabled whenever the TargetOptions flag NoExcessFPPrecision is > disabled (which is the default). I think that this behavior is > reasonable. We should also match the gcc's behavior here, I think. Ana, will you please do this? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From craig.topper at gmail.com Mon Jan 23 02:18:28 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 23 Jan 2012 08:18:28 -0000 Subject: [llvm-commits] [llvm] r148687 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86InstrSSE.td Message-ID: <20120123081828.8872A2A6C12C@llvm.org> Author: ctopper Date: Mon Jan 23 02:18:28 2012 New Revision: 148687 URL: http://llvm.org/viewvc/llvm-project?rev=148687&view=rev Log: Custom lower PCMPEQ/PCMPGT intrinsics to target specific nodes and remove the intrinsic patterns. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148687&r1=148686&r2=148687&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 23 02:18:28 2012 @@ -9318,6 +9318,26 @@ case Intrinsic::x86_avx2_psrav_d_256: return DAG.getNode(ISD::SRA, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_pcmpeq_b: + case Intrinsic::x86_sse2_pcmpeq_w: + case Intrinsic::x86_sse2_pcmpeq_d: + case Intrinsic::x86_sse41_pcmpeqq: + case Intrinsic::x86_avx2_pcmpeq_b: + case Intrinsic::x86_avx2_pcmpeq_w: + case Intrinsic::x86_avx2_pcmpeq_d: + case Intrinsic::x86_avx2_pcmpeq_q: + return DAG.getNode(X86ISD::PCMPEQ, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_pcmpgt_b: + case Intrinsic::x86_sse2_pcmpgt_w: + case Intrinsic::x86_sse2_pcmpgt_d: + case Intrinsic::x86_sse42_pcmpgtq: + case Intrinsic::x86_avx2_pcmpgt_b: + case Intrinsic::x86_avx2_pcmpgt_w: + case Intrinsic::x86_avx2_pcmpgt_d: + case Intrinsic::x86_avx2_pcmpgt_q: + return DAG.getNode(X86ISD::PCMPGT, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=148687&r1=148686&r2=148687&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Jan 23 02:18:28 2012 @@ -3510,31 +3510,31 @@ [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))]>; } -multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, SDNode OpNode, - SDNode OpNode2, RegisterClass RC, - ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, - bit Is2Addr = 1> { +multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, + string OpcodeStr, SDNode OpNode, + SDNode OpNode2, RegisterClass RC, + ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, + bit Is2Addr = 1> { // src2 is always 128-bit def rr : PDI; + [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>; def rm : PDI; + [(set RC:$dst, (DstVT (OpNode RC:$src1, + (bc_frag (memopv2i64 addr:$src2)))))]>; def ri : PDIi8; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))]>; } } // ExeDomain = SSEPackedInt @@ -3730,24 +3730,24 @@ //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { -defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, - VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; -defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, - VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; -defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, - VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; - -defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, - VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; -defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, - VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; -defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, - VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; - -defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, - VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; -defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, - VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { // 128-bit logical shifts. @@ -3768,24 +3768,24 @@ } // Predicates = [HasAVX] let Predicates = [HasAVX2] in { -defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, - VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; -defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, - VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; -defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, - VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; - -defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, - VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; -defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, - VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; -defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, - VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; - -defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, - VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; -defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, - VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; + +defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { // 256-bit logical shifts. @@ -3806,24 +3806,24 @@ } // Predicates = [HasAVX2] let Constraints = "$src1 = $dst" in { -defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, - VR128, v8i16, v8i16, bc_v8i16>; -defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, - VR128, v4i32, v4i32, bc_v4i32>; -defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, - VR128, v2i64, v2i64, bc_v2i64>; - -defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, - VR128, v8i16, v8i16, bc_v8i16>; -defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, - VR128, v4i32, v4i32, bc_v4i32>; -defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, - VR128, v2i64, v2i64, bc_v2i64>; - -defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, - VR128, v8i16, v8i16, bc_v8i16>; -defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, - VR128, v4i32, v4i32, bc_v4i32>; +defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32>; +defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64>; + +defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32>; +defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64>; + +defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32>; let ExeDomain = SSEPackedInt in { // 128-bit logical shifts. @@ -3883,148 +3883,50 @@ //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { - defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; - defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; - defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; - defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; - defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; - defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; - - def : Pat<(v16i8 (X86pcmpeq VR128:$src1, VR128:$src2)), - (VPCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeq VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (VPCMPEQBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpeq VR128:$src1, VR128:$src2)), - (VPCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeq VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPCMPEQWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpeq VR128:$src1, VR128:$src2)), - (VPCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeq VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPCMPEQDrm VR128:$src1, addr:$src2)>; - - def : Pat<(v16i8 (X86pcmpgt VR128:$src1, VR128:$src2)), - (VPCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgt VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (VPCMPGTBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpgt VR128:$src1, VR128:$src2)), - (VPCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgt VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPCMPGTWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpgt VR128:$src1, VR128:$src2)), - (VPCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgt VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPCMPGTDrm VR128:$src1, addr:$src2)>; + defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPCMPEQBY : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_avx2_pcmpeq_b, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; - defm VPCMPEQWY : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_avx2_pcmpeq_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; - defm VPCMPEQDY : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_avx2_pcmpeq_d, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; - defm VPCMPGTBY : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_avx2_pcmpgt_b, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; - defm VPCMPGTWY : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_avx2_pcmpgt_w, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; - defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; - - def : Pat<(v32i8 (X86pcmpeq VR256:$src1, VR256:$src2)), - (VPCMPEQBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpeq VR256:$src1, - (bc_v32i8 (memopv4i64 addr:$src2)))), - (VPCMPEQBYrm VR256:$src1, addr:$src2)>; - def : Pat<(v16i16 (X86pcmpeq VR256:$src1, VR256:$src2)), - (VPCMPEQWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpeq VR256:$src1, - (bc_v16i16 (memopv4i64 addr:$src2)))), - (VPCMPEQWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86pcmpeq VR256:$src1, VR256:$src2)), - (VPCMPEQDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpeq VR256:$src1, - (bc_v8i32 (memopv4i64 addr:$src2)))), - (VPCMPEQDYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v32i8 (X86pcmpgt VR256:$src1, VR256:$src2)), - (VPCMPGTBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpgt VR256:$src1, - (bc_v32i8 (memopv4i64 addr:$src2)))), - (VPCMPGTBYrm VR256:$src1, addr:$src2)>; - def : Pat<(v16i16 (X86pcmpgt VR256:$src1, VR256:$src2)), - (VPCMPGTWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpgt VR256:$src1, - (bc_v16i16 (memopv4i64 addr:$src2)))), - (VPCMPGTWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86pcmpgt VR256:$src1, VR256:$src2)), - (VPCMPGTDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpgt VR256:$src1, - (bc_v8i32 (memopv4i64 addr:$src2)))), - (VPCMPGTDYrm VR256:$src1, addr:$src2)>; + defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, - VR128, memopv2i64, i128mem, 1>; - defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, - VR128, memopv2i64, i128mem, 1>; - defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, - VR128, memopv2i64, i128mem, 1>; - defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b, - VR128, memopv2i64, i128mem>; - defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w, - VR128, memopv2i64, i128mem>; - defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d, - VR128, memopv2i64, i128mem>; + defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8, + VR128, memopv2i64, i128mem, 1>; + defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16, + VR128, memopv2i64, i128mem, 1>; + defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32, + VR128, memopv2i64, i128mem, 1>; + defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8, + VR128, memopv2i64, i128mem>; + defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16, + VR128, memopv2i64, i128mem>; + defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32, + VR128, memopv2i64, i128mem>; } // Constraints = "$src1 = $dst" -let Predicates = [HasSSE2] in { - def : Pat<(v16i8 (X86pcmpeq VR128:$src1, VR128:$src2)), - (PCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeq VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PCMPEQBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpeq VR128:$src1, VR128:$src2)), - (PCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeq VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PCMPEQWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpeq VR128:$src1, VR128:$src2)), - (PCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeq VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PCMPEQDrm VR128:$src1, addr:$src2)>; - - def : Pat<(v16i8 (X86pcmpgt VR128:$src1, VR128:$src2)), - (PCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgt VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PCMPGTBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpgt VR128:$src1, VR128:$src2)), - (PCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgt VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PCMPGTWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpgt VR128:$src1, VR128:$src2)), - (PCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgt VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PCMPGTDrm VR128:$src1, addr:$src2)>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Pack Instructions //===---------------------------------------------------------------------===// @@ -6372,8 +6274,6 @@ let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 0>, VEX_4V; - defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, - 0>, VEX_4V; defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, 0>, VEX_4V; defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, @@ -6392,19 +6292,12 @@ 0>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 0>, VEX_4V; - - def : Pat<(v2i64 (X86pcmpeq VR128:$src1, VR128:$src2)), - (VPCMPEQQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpeq VR128:$src1, (memop addr:$src2))), - (VPCMPEQQrm VR128:$src1, addr:$src2)>; } let Predicates = [HasAVX2] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", int_x86_avx2_packusdw>, VEX_4V; - defm VPCMPEQQ : SS41I_binop_rm_int_y<0x29, "vpcmpeqq", - int_x86_avx2_pcmpeq_q>, VEX_4V; defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", int_x86_avx2_pmins_b>, VEX_4V; defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", @@ -6423,17 +6316,11 @@ int_x86_avx2_pmaxu_w>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", int_x86_avx2_pmul_dq>, VEX_4V; - - def : Pat<(v4i64 (X86pcmpeq VR256:$src1, VR256:$src2)), - (VPCMPEQQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86pcmpeq VR256:$src1, (memop addr:$src2))), - (VPCMPEQQYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { let isCommutable = 0 in defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; - defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; @@ -6445,57 +6332,46 @@ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; } -let Predicates = [HasSSE41] in { - def : Pat<(v2i64 (X86pcmpeq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpeq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; -} - /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Is2Addr = 1> { + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { let isCommutable = 1 in - def rr : SS48I, - OpSize; - def rm : SS48I, OpSize; + def rm : SS48I, - OpSize; + [(set RC:$dst, + (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)))))]>, OpSize; } -/// SS48I_binop_rm - Simple SSE41 binary operator. -multiclass SS48I_binop_rm_y opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT> { - let isCommutable = 1 in - def Yrr : SS48I, - OpSize; - def Yrm : SS48I, - OpSize; +let Predicates = [HasAVX] in { + defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, + memopv2i64, i128mem, 0>, VEX_4V; +} +let Predicates = [HasAVX2] in { + defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, + memopv4i64, i256mem, 0>, VEX_4V; } -let Predicates = [HasAVX] in - defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; -let Predicates = [HasAVX2] in - defm VPMULLD : SS48I_binop_rm_y<0x40, "vpmulld", mul, v8i32>, VEX_4V; -let Constraints = "$src1 = $dst" in - defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; +let Constraints = "$src1 = $dst" in { + defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, + memopv2i64, i128mem>; + defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, + memopv2i64, i128mem>; +} /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate multiclass SS41I_binop_rmi_int opc, string OpcodeStr, @@ -6730,69 +6606,37 @@ // SSE4.2 - Compare Instructions //===----------------------------------------------------------------------===// -/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -multiclass SS42I_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { - def rr : SS428I opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { + def rr : SS428I, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize; - def rm : SS428I, OpSize; + [(set RC:$dst, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize; } -/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -multiclass SS42I_binop_rm_int_y opc, string OpcodeStr, - Intrinsic IntId256> { - def Yrr : SS428I, - OpSize; - def Yrm : SS428I, OpSize; -} - -let Predicates = [HasAVX] in { - defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, - 0>, VEX_4V; - - def : Pat<(v2i64 (X86pcmpgt VR128:$src1, VR128:$src2)), - (VPCMPGTQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpgt VR128:$src1, (memop addr:$src2))), - (VPCMPGTQrm VR128:$src1, addr:$src2)>; -} - -let Predicates = [HasAVX2] in { - defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>, - VEX_4V; +let Predicates = [HasAVX] in + defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, + memopv2i64, i128mem, 0>, VEX_4V; - def : Pat<(v4i64 (X86pcmpgt VR256:$src1, VR256:$src2)), - (VPCMPGTQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86pcmpgt VR256:$src1, (memop addr:$src2))), - (VPCMPGTQYrm VR256:$src1, addr:$src2)>; -} +let Predicates = [HasAVX2] in + defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, + memopv4i64, i256mem, 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; - -let Predicates = [HasSSE42] in { - def : Pat<(v2i64 (X86pcmpgt VR128:$src1, VR128:$src2)), - (PCMPGTQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpgt VR128:$src1, (memop addr:$src2))), - (PCMPGTQrm VR128:$src1, addr:$src2)>; -} + defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, + memopv2i64, i128mem>; //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions From sabre at nondot.org Mon Jan 23 02:19:57 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 08:19:57 -0000 Subject: [llvm-commits] [llvm] r148688 - /llvm/trunk/include/llvm/ADT/OwningPtr.h Message-ID: <20120123081957.F16BE2A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 02:19:57 2012 New Revision: 148688 URL: http://llvm.org/viewvc/llvm-project?rev=148688&view=rev Log: allow OwningPtr to be copy constructed if null, which is required to make them be a valuetype in a DenseMap. Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/OwningPtr.h?rev=148688&r1=148687&r2=148688&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/OwningPtr.h (original) +++ llvm/trunk/include/llvm/ADT/OwningPtr.h Mon Jan 23 02:19:57 2012 @@ -25,12 +25,15 @@ /// pointee object can be taken away from OwningPtr by using the take method. template class OwningPtr { - OwningPtr(OwningPtr const &); // DO NOT IMPLEMENT - OwningPtr &operator=(OwningPtr const &); // DO NOT IMPLEMENT + OwningPtr &operator=(const OwningPtr &); // DO NOT IMPLEMENT T *Ptr; public: explicit OwningPtr(T *P = 0) : Ptr(P) {} + OwningPtr(const OwningPtr &RHS) : Ptr(0) { + assert(RHS.Ptr == 0 && "Only null OwningPtr's are copyable!"); + } + ~OwningPtr() { delete Ptr; } From sabre at nondot.org Mon Jan 23 02:42:38 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 08:42:38 -0000 Subject: [llvm-commits] [llvm] r148691 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h Message-ID: <20120123084239.1B8B62A6C12D@llvm.org> Author: lattner Date: Mon Jan 23 02:42:38 2012 New Revision: 148691 URL: http://llvm.org/viewvc/llvm-project?rev=148691&view=rev Log: Replace a use of ConstantUniqueMap for CAZ constants with a simple DenseMap. Now that the type system rewrite has landed, there is no need for its complexity and std::map'ness. Modified: llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/ConstantsContext.h llvm/trunk/lib/VMCore/LLVMContextImpl.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.h Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148691&r1=148690&r2=148691&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Jan 23 02:42:38 2012 @@ -993,18 +993,33 @@ //===----------------------------------------------------------------------===// // Factory Function Implementation -ConstantAggregateZero* ConstantAggregateZero::get(Type* Ty) { +ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) { assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) && "Cannot create an aggregate zero of non-aggregate type!"); - LLVMContextImpl *pImpl = Ty->getContext().pImpl; - return pImpl->AggZeroConstants.getOrCreate(Ty, 0); + OwningPtr &Entry = + Ty->getContext().pImpl->CAZConstants[Ty]; + if (Entry == 0) + Entry.reset(new ConstantAggregateZero(Ty)); + + return Entry.get(); } /// destroyConstant - Remove the constant from the constant table... /// void ConstantAggregateZero::destroyConstant() { - getType()->getContext().pImpl->AggZeroConstants.remove(this); + // Drop ownership of the CAZ object before removing the entry so that it + // doesn't get double deleted. + LLVMContextImpl::CAZMapTy &CAZConstants = getContext().pImpl->CAZConstants; + LLVMContextImpl::CAZMapTy::iterator I = CAZConstants.find(getType()); + assert(I != CAZConstants.end() && "CAZ object not in uniquing map"); + I->second.take(); + + // Actually remove the entry from the DenseMap now, which won't free the + // constant. + CAZConstants.erase(I); + + // Free the constant and any dangling references to it. destroyConstantImpl(); } Modified: llvm/trunk/lib/VMCore/ConstantsContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantsContext.h?rev=148691&r1=148690&r2=148691&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantsContext.h (original) +++ llvm/trunk/lib/VMCore/ConstantsContext.h Mon Jan 23 02:42:38 2012 @@ -477,13 +477,6 @@ } }; -// ConstantAggregateZero does not take extra "value" argument... -template -struct ConstantCreator { - static ConstantAggregateZero *create(Type *Ty, const ValType &V){ - return new ConstantAggregateZero(Ty); - } -}; template<> struct ConstantKeyData { @@ -498,14 +491,6 @@ }; template<> -struct ConstantKeyData { - typedef char ValType; - static ValType getValType(ConstantAggregateZero *C) { - return 0; - } -}; - -template<> struct ConstantKeyData { typedef std::vector ValType; static ValType getValType(ConstantArray *CA) { Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.cpp?rev=148691&r1=148690&r2=148691&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.cpp (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.cpp Mon Jan 23 02:42:38 2012 @@ -70,7 +70,7 @@ ArrayConstants.freeConstants(); StructConstants.freeConstants(); VectorConstants.freeConstants(); - AggZeroConstants.freeConstants(); + CAZConstants.clear(); NullPtrConstants.freeConstants(); UndefValueConstants.freeConstants(); InlineAsms.freeConstants(); Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=148691&r1=148690&r2=148691&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Mon Jan 23 02:42:38 2012 @@ -27,6 +27,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include @@ -138,7 +139,8 @@ // on Context destruction. SmallPtrSet NonUniquedMDNodes; - ConstantUniqueMap AggZeroConstants; + typedef DenseMap > CAZMapTy; + CAZMapTy CAZConstants; typedef ConstantUniqueMap, ArrayRef, ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy; From nicholas at mxc.ca Mon Jan 23 02:47:21 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 23 Jan 2012 08:47:21 -0000 Subject: [llvm-commits] [llvm] r148692 - /llvm/trunk/docs/LangRef.html Message-ID: <20120123084721.AB5582A6C12D@llvm.org> Author: nicholas Date: Mon Jan 23 02:47:21 2012 New Revision: 148692 URL: http://llvm.org/viewvc/llvm-project?rev=148692&view=rev Log: Fix broken link. Modified: llvm/trunk/docs/LangRef.html Modified: llvm/trunk/docs/LangRef.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.html?rev=148692&r1=148691&r2=148692&view=diff ============================================================================== --- llvm/trunk/docs/LangRef.html (original) +++ llvm/trunk/docs/LangRef.html Mon Jan 23 02:47:21 2012 @@ -1614,7 +1614,7 @@ synchronize with. These semantics are borrowed from Java and C++0x, but are somewhat more colloquial. If these descriptions aren't precise enough, check those specs (see spec references in the -atomics guide). +atomics guide). fence instructions treat these orderings somewhat differently since they don't take an address. See that instruction's documentation for details.

From sabre at nondot.org Mon Jan 23 02:52:32 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 08:52:32 -0000 Subject: [llvm-commits] [llvm] r148693 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h Message-ID: <20120123085232.DC49D2A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 02:52:32 2012 New Revision: 148693 URL: http://llvm.org/viewvc/llvm-project?rev=148693&view=rev Log: switch UndefValue and ConstantPointerNull over to DenseMap's for uniquing. Modified: llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/ConstantsContext.h llvm/trunk/lib/VMCore/LLVMContextImpl.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.h Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148693&r1=148692&r2=148693&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Jan 23 02:52:32 2012 @@ -1127,13 +1127,29 @@ // ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { - return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0); + OwningPtr &Entry = + Ty->getContext().pImpl->CPNConstants[Ty]; + if (Entry == 0) + Entry.reset(new ConstantPointerNull(Ty)); + + return Entry.get(); } // destroyConstant - Remove the constant from the constant table... // void ConstantPointerNull::destroyConstant() { - getType()->getContext().pImpl->NullPtrConstants.remove(this); + // Drop ownership of the CPN object before removing the entry so that it + // doesn't get double deleted. + LLVMContextImpl::CPNMapTy &CPNConstants = getContext().pImpl->CPNConstants; + LLVMContextImpl::CPNMapTy::iterator I = CPNConstants.find(getType()); + assert(I != CPNConstants.end() && "CPN object not in uniquing map"); + I->second.take(); + + // Actually remove the entry from the DenseMap now, which won't free the + // constant. + CPNConstants.erase(I); + + // Free the constant and any dangling references to it. destroyConstantImpl(); } @@ -1142,13 +1158,28 @@ // UndefValue *UndefValue::get(Type *Ty) { - return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0); + OwningPtr &Entry = Ty->getContext().pImpl->UVConstants[Ty]; + if (Entry == 0) + Entry.reset(new UndefValue(Ty)); + + return Entry.get(); } // destroyConstant - Remove the constant from the constant table. // void UndefValue::destroyConstant() { - getType()->getContext().pImpl->UndefValueConstants.remove(this); + // Drop ownership of the object before removing the entry so that it + // doesn't get double deleted. + LLVMContextImpl::UVMapTy &UVConstants = getContext().pImpl->UVConstants; + LLVMContextImpl::UVMapTy::iterator I = UVConstants.find(getType()); + assert(I != UVConstants.end() && "UV object not in uniquing map"); + I->second.take(); + + // Actually remove the entry from the DenseMap now, which won't free the + // constant. + UVConstants.erase(I); + + // Free the constant and any dangling references to it. destroyConstantImpl(); } Modified: llvm/trunk/lib/VMCore/ConstantsContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantsContext.h?rev=148693&r1=148692&r2=148693&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantsContext.h (original) +++ llvm/trunk/lib/VMCore/ConstantsContext.h Mon Jan 23 02:52:32 2012 @@ -514,37 +514,6 @@ } }; -// ConstantPointerNull does not take extra "value" argument... -template -struct ConstantCreator { - static ConstantPointerNull *create(PointerType *Ty, const ValType &V){ - return new ConstantPointerNull(Ty); - } -}; - -template<> -struct ConstantKeyData { - typedef char ValType; - static ValType getValType(ConstantPointerNull *C) { - return 0; - } -}; - -// UndefValue does not take extra "value" argument... -template -struct ConstantCreator { - static UndefValue *create(Type *Ty, const ValType &V) { - return new UndefValue(Ty); - } -}; - -template<> -struct ConstantKeyData { - typedef char ValType; - static ValType getValType(UndefValue *C) { - return 0; - } -}; template<> struct ConstantCreator { Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.cpp?rev=148693&r1=148692&r2=148693&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.cpp (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.cpp Mon Jan 23 02:52:32 2012 @@ -58,6 +58,8 @@ std::vector Modules(OwnedModules.begin(), OwnedModules.end()); DeleteContainerPointers(Modules); + // Free the constants. This is important to do here to ensure that they are + // freed before the LeakDetector is torn down. std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(), DropReferences()); std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(), @@ -71,8 +73,8 @@ StructConstants.freeConstants(); VectorConstants.freeConstants(); CAZConstants.clear(); - NullPtrConstants.freeConstants(); - UndefValueConstants.freeConstants(); + CPNConstants.clear(); + UVConstants.clear(); InlineAsms.freeConstants(); DeleteContainerSeconds(IntConstants); DeleteContainerSeconds(FPConstants); Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=148693&r1=148692&r2=148693&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Mon Jan 23 02:52:32 2012 @@ -154,9 +154,11 @@ VectorType, ConstantVector> VectorConstantsTy; VectorConstantsTy VectorConstants; - ConstantUniqueMap - NullPtrConstants; - ConstantUniqueMap UndefValueConstants; + typedef DenseMap > CPNMapTy; + CPNMapTy CPNConstants; + + typedef DenseMap > UVMapTy; + UVMapTy UVConstants; DenseMap , BlockAddress*> BlockAddresses; ConstantUniqueMap From STPWORLD at narod.ru Mon Jan 23 03:10:57 2012 From: STPWORLD at narod.ru (Stepan Dyatkovskiy) Date: Mon, 23 Jan 2012 13:10:57 +0400 Subject: [llvm-commits] [LLVM] SwitchInst PATCH: Changes in semantics and usage. In-Reply-To: <4F1A789C.6040808@narod.ru> References: <216291326628464@web57.yandex.ru> <188821326736101@web38.yandex.ru> <179511326997999@web49.yandex.ru> <4F1A789C.6040808@narod.ru> Message-ID: <124601327309857@web136.yandex.ru> Hi all. I also updated serializing of SwitchInst in BitcodeWriter. Done the same like in other files - replaced low level operands usage with SwitchInst analogues. Please find the updated patch in attachment for review. -Stepan. -------------- next part -------------- A non-text attachment was scrubbed... Name: si-cleanup.patch Type: application/octet-stream Size: 40245 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/19daf727/attachment.obj From geek4civic at gmail.com Mon Jan 23 03:14:42 2012 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Mon, 23 Jan 2012 09:14:42 -0000 Subject: [llvm-commits] [llvm] r148694 - /llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Message-ID: <20120123091442.EC64E2A6C12C@llvm.org> Author: chapuni Date: Mon Jan 23 03:14:42 2012 New Revision: 148694 URL: http://llvm.org/viewvc/llvm-project?rev=148694&view=rev Log: ARMAsmPrinter.cpp: Try to fix up r148686. EnableARMEHABI was also here. Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=148694&r1=148693&r2=148694&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Mon Jan 23 03:14:42 2012 @@ -1192,7 +1192,7 @@ } } -extern cl::opt EnableARMEHABI; +extern cl::opt EnableARMEHABI; // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. @@ -1203,7 +1203,8 @@ OutStreamer.EmitCodeRegion(); // Emit unwinding stuff for frame-related instructions - if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) + if (EnableARMEHABI != ExceptionHandling::ARMEHABIDisabled && + MI->getFlag(MachineInstr::FrameSetup)) EmitUnwindingInstruction(MI); // Do any auto-generated pseudo lowerings. From geek4civic at gmail.com Mon Jan 23 03:24:59 2012 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Mon, 23 Jan 2012 18:24:59 +0900 Subject: [llvm-commits] [llvm] r148694 - /llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp In-Reply-To: <20120123091442.EC64E2A6C12C@llvm.org> References: <20120123091442.EC64E2A6C12C@llvm.org> Message-ID: Evgeniy, please confirm whether my fixup would be reasonable or not, thank you. ...Takumi 2012/1/23 NAKAMURA Takumi : > Author: chapuni > Date: Mon Jan 23 03:14:42 2012 > New Revision: 148694 > > URL: http://llvm.org/viewvc/llvm-project?rev=148694&view=rev > Log: > ARMAsmPrinter.cpp: Try to fix up r148686. EnableARMEHABI was also here. > > Modified: > ? ?llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp > > Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=148694&r1=148693&r2=148694&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Mon Jan 23 03:14:42 2012 > @@ -1192,7 +1192,7 @@ > ? } > ?} > > -extern cl::opt EnableARMEHABI; > +extern cl::opt EnableARMEHABI; > > ?// Simple pseudo-instructions have their lowering (with expansion to real > ?// instructions) auto-generated. > @@ -1203,7 +1203,8 @@ > ? ? OutStreamer.EmitCodeRegion(); > > ? // Emit unwinding stuff for frame-related instructions > - ?if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) > + ?if (EnableARMEHABI != ExceptionHandling::ARMEHABIDisabled && > + ? ? ?MI->getFlag(MachineInstr::FrameSetup)) > ? ? EmitUnwindingInstruction(MI); > > ? // Do any auto-generated pseudo lowerings. > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From glider at google.com Mon Jan 23 04:06:14 2012 From: glider at google.com (Alexander Potapenko) Date: Mon, 23 Jan 2012 10:06:14 -0000 Subject: [llvm-commits] [compiler-rt] r148695 - /compiler-rt/trunk/lib/asan/tests/asan_test.cc Message-ID: <20120123100614.7AA7E2A6C12C@llvm.org> Author: glider Date: Mon Jan 23 04:06:14 2012 New Revision: 148695 URL: http://llvm.org/viewvc/llvm-project?rev=148695&view=rev Log: Add a test for CFStringCreateCopy. Normally this function should not create copies of constant strings, but it does when the default CFAllocator is replaced (e.g. under AddressSanitizer) This test is related to http://code.google.com/p/address-sanitizer/issues/detail?id=10 Modified: compiler-rt/trunk/lib/asan/tests/asan_test.cc Modified: compiler-rt/trunk/lib/asan/tests/asan_test.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/asan_test.cc?rev=148695&r1=148694&r2=148695&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/tests/asan_test.cc (original) +++ compiler-rt/trunk/lib/asan/tests/asan_test.cc Mon Jan 23 04:06:14 2012 @@ -29,6 +29,8 @@ #ifndef __APPLE__ #include +#else +#include #endif // __APPLE__ #ifdef __APPLE__ @@ -1894,6 +1896,14 @@ pthread_join(th, NULL); pthread_key_delete(test_key); } + +// Test that CFStringCreateCopy does not copy constant strings. +TEST(AddressSanitizerMac, DISABLED_CFStringCreateCopy) { + CFStringRef str = CFSTR("Hello world!\n"); + CFStringRef str2 = CFStringCreateCopy(0, str); + EXPECT_EQ(str, str2); +} + #endif // __APPLE__ int main(int argc, char **argv) { From glider at google.com Mon Jan 23 04:09:54 2012 From: glider at google.com (Alexander Potapenko) Date: Mon, 23 Jan 2012 10:09:54 -0000 Subject: [llvm-commits] [compiler-rt] r148696 - in /compiler-rt/trunk/lib/asan: asan_interceptors.cc asan_mac.cc asan_mac.h tests/asan_test.cc Message-ID: <20120123100954.82BDD2A6C12C@llvm.org> Author: glider Date: Mon Jan 23 04:09:54 2012 New Revision: 148696 URL: http://llvm.org/viewvc/llvm-project?rev=148696&view=rev Log: Wrap CFStringCreateCopy to prevent copying constant CF strings. This should fix http://code.google.com/p/address-sanitizer/issues/detail?id=10 Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/asan_mac.cc compiler-rt/trunk/lib/asan/asan_mac.h compiler-rt/trunk/lib/asan/tests/asan_test.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=148696&r1=148695&r2=148696&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 23 04:09:54 2012 @@ -94,6 +94,7 @@ dispatch_barrier_async_f_f real_dispatch_barrier_async_f; dispatch_group_async_f_f real_dispatch_group_async_f; pthread_workqueue_additem_np_f real_pthread_workqueue_additem_np; +CFStringCreateCopy_f real_CFStringCreateCopy; #endif sigaction_f real_sigaction; @@ -648,6 +649,14 @@ if (FLAG_v >= 2) { INTERCEPT_FUNCTION(pthread_workqueue_additem_np); } + // Normally CFStringCreateCopy should not copy constant CF strings. + // Replacing the default CFAllocator causes constant strings to be copied + // rather than just returned, which leads to bugs in big applications like + // Chromium and WebKit, see + // http://code.google.com/p/address-sanitizer/issues/detail?id=10 + // Until this problem is fixed we need to check that the string is + // non-constant before calling CFStringCreateCopy. + INTERCEPT_FUNCTION(CFStringCreateCopy); #else // On Darwin siglongjmp tailcalls longjmp, so we don't want to intercept it // there. Modified: compiler-rt/trunk/lib/asan/asan_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.cc?rev=148696&r1=148695&r2=148696&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_mac.cc Mon Jan 23 04:09:54 2012 @@ -42,6 +42,7 @@ extern dispatch_barrier_async_f_f real_dispatch_barrier_async_f; extern dispatch_group_async_f_f real_dispatch_group_async_f; extern pthread_workqueue_additem_np_f real_pthread_workqueue_additem_np; +extern CFStringCreateCopy_f real_CFStringCreateCopy; void GetPcSpBp(void *context, uintptr_t *pc, uintptr_t *sp, uintptr_t *bp) { ucontext_t *ucontext = (ucontext_t*)context; @@ -514,4 +515,45 @@ itemhandlep, gencountp); } +// CF_RC_BITS, the layout of CFRuntimeBase and __CFStrIsConstant are internal +// and subject to change in further CoreFoundation versions. Apple does not +// guarantee any binary compatibility from release to release. + +// See http://opensource.apple.com/source/CF/CF-635.15/CFInternal.h +#if defined(__BIG_ENDIAN__) +#define CF_RC_BITS 0 +#endif + +#if defined(__LITTLE_ENDIAN__) +#define CF_RC_BITS 3 +#endif + +// See http://opensource.apple.com/source/CF/CF-635.15/CFRuntime.h +typedef struct __CFRuntimeBase { + uintptr_t _cfisa; + uint8_t _cfinfo[4]; +#if __LP64__ + uint32_t _rc; +#endif +} CFRuntimeBase; + +// See http://opensource.apple.com/source/CF/CF-635.15/CFString.c +int __CFStrIsConstant(CFStringRef str) { + CFRuntimeBase *base = (CFRuntimeBase*)str; +#if __LP64__ + return base->_rc == 0; +#else + return (base->_cfinfo[CF_RC_BITS]) == 0; +#endif +} + +extern "C" +CFStringRef WRAP(CFStringCreateCopy)(CFAllocatorRef alloc, CFStringRef str) { + if (__CFStrIsConstant(str)) { + return str; + } else { + return real_CFStringCreateCopy(alloc, str); + } +} + #endif // __APPLE__ Modified: compiler-rt/trunk/lib/asan/asan_mac.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.h?rev=148696&r1=148695&r2=148696&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.h (original) +++ compiler-rt/trunk/lib/asan/asan_mac.h Mon Jan 23 04:09:54 2012 @@ -22,6 +22,7 @@ #include #include #include +#include typedef void* pthread_workqueue_t; typedef void* pthread_workitem_handle_t; @@ -44,7 +45,8 @@ typedef int (*pthread_workqueue_additem_np_f)(pthread_workqueue_t workq, void *(*workitem_func)(void *), void * workitem_arg, pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp); - +typedef CFStringRef (*CFStringCreateCopy_f)(CFAllocatorRef alloc, + CFStringRef str); // A wrapper for the ObjC blocks used to support libdispatch. typedef struct { @@ -90,6 +92,7 @@ int WRAP(pthread_workqueue_additem_np)(pthread_workqueue_t workq, void *(*workitem_func)(void *), void * workitem_arg, pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp); +CFStringRef WRAP(CFStringCreateCopy)(CFAllocatorRef alloc, CFStringRef str); } #endif // ASAN_MAC_H Modified: compiler-rt/trunk/lib/asan/tests/asan_test.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/asan_test.cc?rev=148696&r1=148695&r2=148696&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/tests/asan_test.cc (original) +++ compiler-rt/trunk/lib/asan/tests/asan_test.cc Mon Jan 23 04:09:54 2012 @@ -1898,7 +1898,7 @@ } // Test that CFStringCreateCopy does not copy constant strings. -TEST(AddressSanitizerMac, DISABLED_CFStringCreateCopy) { +TEST(AddressSanitizerMac, CFStringCreateCopy) { CFStringRef str = CFSTR("Hello world!\n"); CFStringRef str2 = CFStringCreateCopy(0, str); EXPECT_EQ(str, str2); From eugeni.stepanov at gmail.com Mon Jan 23 04:14:48 2012 From: eugeni.stepanov at gmail.com (Evgeniy Stepanov) Date: Mon, 23 Jan 2012 14:14:48 +0400 Subject: [llvm-commits] [llvm] r148694 - /llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp In-Reply-To: References: <20120123091442.EC64E2A6C12C@llvm.org> Message-ID: Yes, this is correct. Sorry for the trouble and thank you for the fix. On Mon, Jan 23, 2012 at 1:24 PM, NAKAMURA Takumi wrote: > Evgeniy, please confirm whether my fixup would be reasonable or not, thank you. > > ...Takumi > > 2012/1/23 NAKAMURA Takumi : >> Author: chapuni >> Date: Mon Jan 23 03:14:42 2012 >> New Revision: 148694 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148694&view=rev >> Log: >> ARMAsmPrinter.cpp: Try to fix up r148686. EnableARMEHABI was also here. >> >> Modified: >> ? ?llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp >> >> Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=148694&r1=148693&r2=148694&view=diff >> ============================================================================== >> --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) >> +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Mon Jan 23 03:14:42 2012 >> @@ -1192,7 +1192,7 @@ >> ? } >> ?} >> >> -extern cl::opt EnableARMEHABI; >> +extern cl::opt EnableARMEHABI; >> >> ?// Simple pseudo-instructions have their lowering (with expansion to real >> ?// instructions) auto-generated. >> @@ -1203,7 +1203,8 @@ >> ? ? OutStreamer.EmitCodeRegion(); >> >> ? // Emit unwinding stuff for frame-related instructions >> - ?if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) >> + ?if (EnableARMEHABI != ExceptionHandling::ARMEHABIDisabled && >> + ? ? ?MI->getFlag(MachineInstr::FrameSetup)) >> ? ? EmitUnwindingInstruction(MI); >> >> ? // Do any auto-generated pseudo lowerings. >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From glider at google.com Mon Jan 23 05:22:43 2012 From: glider at google.com (Alexander Potapenko) Date: Mon, 23 Jan 2012 11:22:43 -0000 Subject: [llvm-commits] [llvm] r148697 - /llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Message-ID: <20120123112243.BE3122A6C12C@llvm.org> Author: glider Date: Mon Jan 23 05:22:43 2012 New Revision: 148697 URL: http://llvm.org/viewvc/llvm-project?rev=148697&view=rev Log: Implemented AddressSanitizer::getPassName() Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=148697&r1=148696&r2=148697&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original) +++ llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Mon Jan 23 05:22:43 2012 @@ -143,6 +143,7 @@ /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public ModulePass { AddressSanitizer(); + virtual const char *getPassName() const; void instrumentMop(Instruction *I); void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite); @@ -205,6 +206,10 @@ return new AddressSanitizer(); } +const char *AddressSanitizer::getPassName() const { + return "AddressSanitizer"; +} + // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantArray::get(M.getContext(), Str); From glider at google.com Mon Jan 23 05:40:13 2012 From: glider at google.com (Alexander Potapenko) Date: Mon, 23 Jan 2012 15:40:13 +0400 Subject: [llvm-commits] [llvm] r148691 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h In-Reply-To: <20120123084239.1B8B62A6C12D@llvm.org> References: <20120123084239.1B8B62A6C12D@llvm.org> Message-ID: Hi Chris, I believe this patch has broken AddressSanitizer (and probably other patches). I get the following error message from clang r148696: clang: /usr/local/google/asan/asan-llvm-trunk/llvm/include/llvm/ADT/OwningPtr.h:34: llvm::OwningPtr::OwningPtr(const llvm::OwningPtr&) [with T = llvm::ConstantAggregateZero]: Assertion `RHS.Ptr == 0 && "Only null OwningPtr's are copyable!"' failed. 0 clang 0x0000000001dad27f 1 clang 0x0000000001daf4f2 2 libpthread.so.0 0x00007ff3b2e7b8f0 3 libc.so.6 0x00007ff3b216aa75 gsignal + 53 4 libc.so.6 0x00007ff3b216e5c0 abort + 384 5 libc.so.6 0x00007ff3b2163941 __assert_fail + 241 6 clang 0x0000000001c95949 7 clang 0x0000000001c90c15 llvm::ConstantAggregateZero::get(llvm::Type*) + 677 8 clang 0x0000000001c94548 llvm::ConstantStruct::get(llvm::StructType*, ...) + 248 9 clang 0x00000000011d2a41 10 clang 0x00000000011d3d9c 11 clang 0x0000000001d4c5e1 llvm::MPPassManager::runOnModule(llvm::Module&) + 497 12 clang 0x0000000001d4c76b llvm::PassManagerImpl::run(llvm::Module&) + 187 13 clang 0x00000000007a8945 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::Module*, clang::BackendAction, llvm::raw_ostream*) + 1525 14 clang 0x00000000007a5c01 15 clang 0x00000000008ffd2f clang::ParseAST(clang::Sema&, bool) + 511 16 clang 0x00000000007a47d4 clang::CodeGenAction::ExecuteAction() + 68 17 clang 0x000000000064b271 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) + 321 18 clang 0x0000000000634369 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) + 1401 19 clang 0x000000000062a339 cc1_main(char const**, char const**, char const*, void*) + 745 20 clang 0x0000000000633137 main + 7207 21 libc.so.6 0x00007ff3b2155c4d __libc_start_main + 253 22 clang 0x0000000000627d49 Stack dump: 0. Program arguments: /usr/local/google/asan/asan-llvm-trunk/llvm/build/Release+Asserts/bin/clang -cc1 -triple i386-unknown-linux-gnu -emit-obj -disable-free -main-file-name asan_test.cc -mrelocation-model static -masm-verbose -mconstructor-aliases -target-cpu pentium4 -target-linker-version 2.20.1 -momit-leaf-frame-pointer -g -coverage-file bin_linux/asan_test32.o -resource-dir /usr/local/google/asan/asan-llvm-trunk/llvm/build/Release+Asserts/bin/../lib/clang/3.1 -D ASAN_UAR=0 -D ASAN_HAS_EXCEPTIONS=1 -D ASAN_NEEDS_SEGV=1 -D ASAN_HAS_BLACKLIST=1 -I third_party/googletest/include -I . -fmodule-cache-path /var/tmp/clang-module-cache -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/4.4/../../../../include/c++/4.4 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/4.4/../../../../include/c++/4.4/x86_64-linux-gnu/32 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/4.4/../../../../include/c++/4.4/backward -internal-isystem /usr/local/include -internal-isystem /usr/local/google/asan/asan-llvm-trunk/llvm/build/Release+Asserts/bin/../lib/clang/3.1/include -internal-externc-isystem /usr/include/i486-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wall -fdeprecated-macro -faddress-sanitizer -fdebug-compilation-dir /usr/local/google/asan/asan-llvm-trunk/llvm/projects/compiler-rt/lib/asan -ferror-limit 19 -fmessage-length 221 -fvisibility hidden -mstackrealign -fgnu-runtime -fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi -fcxx-exceptions -fexceptions -fdiagnostics-show-option -fcolor-diagnostics -mllvm -asan-blacklist=/usr/local/google/asan/asan-llvm-trunk/llvm/projects/compiler-rt/lib/asan/tests/asan_test.ignore -mllvm -asan-stack=1 -mllvm -asan-globals=1 -mllvm -asan-mapping-scale=0 -mllvm -asan-mapping-offset-log=-1 -mllvm -asan-use-after-return=0 test.ii 1. parser at end of file 2. Per-module optimization passes 3. Running pass 'AddressSanitizer' on module 'test.ii'. (I'll prepare a smaller reproducer and file a bug soon) From glider at google.com Mon Jan 23 05:55:07 2012 From: glider at google.com (Alexander Potapenko) Date: Mon, 23 Jan 2012 15:55:07 +0400 Subject: [llvm-commits] [llvm] r148691 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h In-Reply-To: References: <20120123084239.1B8B62A6C12D@llvm.org> Message-ID: On Mon, Jan 23, 2012 at 3:40 PM, Alexander Potapenko wrote: > Hi Chris, > > I believe this patch has broken AddressSanitizer (and probably other patches). > I get the following error message from clang r148696: > s/patches/passes From geek4civic at gmail.com Mon Jan 23 06:59:32 2012 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Mon, 23 Jan 2012 21:59:32 +0900 Subject: [llvm-commits] [llvm] r148691 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h In-Reply-To: <20120123084239.1B8B62A6C12D@llvm.org> References: <20120123084239.1B8B62A6C12D@llvm.org> Message-ID: 2012/1/23 Chris Lattner : > Author: lattner > Date: Mon Jan 23 02:42:38 2012 > New Revision: 148691 > > URL: http://llvm.org/viewvc/llvm-project?rev=148691&view=rev > Log: > Replace a use of ConstantUniqueMap for CAZ constants with a simple DenseMap. > Now that the type system rewrite has landed, there is no need for its > complexity and std::map'ness. > > Modified: > ? ?llvm/trunk/lib/VMCore/Constants.cpp > ? ?llvm/trunk/lib/VMCore/ConstantsContext.h > ? ?llvm/trunk/lib/VMCore/LLVMContextImpl.cpp > ? ?llvm/trunk/lib/VMCore/LLVMContextImpl.h Chris, it broke stage2 build. I am investigating. ...Takumi From clattner at apple.com Mon Jan 23 09:09:24 2012 From: clattner at apple.com (Chris Lattner) Date: Mon, 23 Jan 2012 07:09:24 -0800 Subject: [llvm-commits] [llvm] r148691 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h In-Reply-To: References: <20120123084239.1B8B62A6C12D@llvm.org> Message-ID: <55ABD2F1-7522-485C-8140-62C80C2D8F78@apple.com> Ah, I forgot that dense map needs to resize at some point. Too bad we done have move semantics to fix this with. I'll correct it sorry for the breakage! -Chris On Jan 23, 2012, at 4:59 AM, NAKAMURA Takumi wrote: > 2012/1/23 Chris Lattner : >> Author: lattner >> Date: Mon Jan 23 02:42:38 2012 >> New Revision: 148691 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148691&view=rev >> Log: >> Replace a use of ConstantUniqueMap for CAZ constants with a simple DenseMap. >> Now that the type system rewrite has landed, there is no need for its >> complexity and std::map'ness. >> >> Modified: >> llvm/trunk/lib/VMCore/Constants.cpp >> llvm/trunk/lib/VMCore/ConstantsContext.h >> llvm/trunk/lib/VMCore/LLVMContextImpl.cpp >> llvm/trunk/lib/VMCore/LLVMContextImpl.h > > Chris, it broke stage2 build. I am investigating. > > ...Takumi > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From glider at google.com Mon Jan 23 09:10:37 2012 From: glider at google.com (Alexander Potapenko) Date: Mon, 23 Jan 2012 19:10:37 +0400 Subject: [llvm-commits] [llvm] r148691 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h In-Reply-To: References: <20120123084239.1B8B62A6C12D@llvm.org> Message-ID: Here's where I got today. run.sh is a script for multidelta that compiles test.ii with -faddress-sanitizer (see http://code.google.com/p/address-sanitizer/wiki/HowToBuild for build instructions) On Mon, Jan 23, 2012 at 4:59 PM, NAKAMURA Takumi wrote: > 2012/1/23 Chris Lattner : >> Author: lattner >> Date: Mon Jan 23 02:42:38 2012 >> New Revision: 148691 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148691&view=rev >> Log: >> Replace a use of ConstantUniqueMap for CAZ constants with a simple DenseMap. >> Now that the type system rewrite has landed, there is no need for its >> complexity and std::map'ness. >> >> Modified: >> ? ?llvm/trunk/lib/VMCore/Constants.cpp >> ? ?llvm/trunk/lib/VMCore/ConstantsContext.h >> ? ?llvm/trunk/lib/VMCore/LLVMContextImpl.cpp >> ? ?llvm/trunk/lib/VMCore/LLVMContextImpl.h > > Chris, it broke stage2 build. I am investigating. > > ...Takumi > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -- Alexander Potapenko Software Engineer Google Moscow -------------- next part -------------- A non-text attachment was scrubbed... Name: run.sh Type: application/x-sh Size: 904 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/634b7329/attachment.sh -------------- next part -------------- A non-text attachment was scrubbed... Name: test.ii Type: application/octet-stream Size: 12817 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/634b7329/attachment.obj From sabre at nondot.org Mon Jan 23 09:09:45 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 15:09:45 -0000 Subject: [llvm-commits] [llvm] r148698 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h Message-ID: <20120123150945.2C7C52A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 09:09:44 2012 New Revision: 148698 URL: http://llvm.org/viewvc/llvm-project?rev=148698&view=rev Log: revert r148691 and 148693 Modified: llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/ConstantsContext.h llvm/trunk/lib/VMCore/LLVMContextImpl.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.h Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148698&r1=148697&r2=148698&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Jan 23 09:09:44 2012 @@ -993,33 +993,18 @@ //===----------------------------------------------------------------------===// // Factory Function Implementation -ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) { +ConstantAggregateZero* ConstantAggregateZero::get(Type* Ty) { assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) && "Cannot create an aggregate zero of non-aggregate type!"); - OwningPtr &Entry = - Ty->getContext().pImpl->CAZConstants[Ty]; - if (Entry == 0) - Entry.reset(new ConstantAggregateZero(Ty)); - - return Entry.get(); + LLVMContextImpl *pImpl = Ty->getContext().pImpl; + return pImpl->AggZeroConstants.getOrCreate(Ty, 0); } /// destroyConstant - Remove the constant from the constant table... /// void ConstantAggregateZero::destroyConstant() { - // Drop ownership of the CAZ object before removing the entry so that it - // doesn't get double deleted. - LLVMContextImpl::CAZMapTy &CAZConstants = getContext().pImpl->CAZConstants; - LLVMContextImpl::CAZMapTy::iterator I = CAZConstants.find(getType()); - assert(I != CAZConstants.end() && "CAZ object not in uniquing map"); - I->second.take(); - - // Actually remove the entry from the DenseMap now, which won't free the - // constant. - CAZConstants.erase(I); - - // Free the constant and any dangling references to it. + getType()->getContext().pImpl->AggZeroConstants.remove(this); destroyConstantImpl(); } @@ -1127,29 +1112,13 @@ // ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { - OwningPtr &Entry = - Ty->getContext().pImpl->CPNConstants[Ty]; - if (Entry == 0) - Entry.reset(new ConstantPointerNull(Ty)); - - return Entry.get(); + return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0); } // destroyConstant - Remove the constant from the constant table... // void ConstantPointerNull::destroyConstant() { - // Drop ownership of the CPN object before removing the entry so that it - // doesn't get double deleted. - LLVMContextImpl::CPNMapTy &CPNConstants = getContext().pImpl->CPNConstants; - LLVMContextImpl::CPNMapTy::iterator I = CPNConstants.find(getType()); - assert(I != CPNConstants.end() && "CPN object not in uniquing map"); - I->second.take(); - - // Actually remove the entry from the DenseMap now, which won't free the - // constant. - CPNConstants.erase(I); - - // Free the constant and any dangling references to it. + getType()->getContext().pImpl->NullPtrConstants.remove(this); destroyConstantImpl(); } @@ -1158,28 +1127,13 @@ // UndefValue *UndefValue::get(Type *Ty) { - OwningPtr &Entry = Ty->getContext().pImpl->UVConstants[Ty]; - if (Entry == 0) - Entry.reset(new UndefValue(Ty)); - - return Entry.get(); + return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0); } // destroyConstant - Remove the constant from the constant table. // void UndefValue::destroyConstant() { - // Drop ownership of the object before removing the entry so that it - // doesn't get double deleted. - LLVMContextImpl::UVMapTy &UVConstants = getContext().pImpl->UVConstants; - LLVMContextImpl::UVMapTy::iterator I = UVConstants.find(getType()); - assert(I != UVConstants.end() && "UV object not in uniquing map"); - I->second.take(); - - // Actually remove the entry from the DenseMap now, which won't free the - // constant. - UVConstants.erase(I); - - // Free the constant and any dangling references to it. + getType()->getContext().pImpl->UndefValueConstants.remove(this); destroyConstantImpl(); } Modified: llvm/trunk/lib/VMCore/ConstantsContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantsContext.h?rev=148698&r1=148697&r2=148698&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantsContext.h (original) +++ llvm/trunk/lib/VMCore/ConstantsContext.h Mon Jan 23 09:09:44 2012 @@ -477,6 +477,13 @@ } }; +// ConstantAggregateZero does not take extra "value" argument... +template +struct ConstantCreator { + static ConstantAggregateZero *create(Type *Ty, const ValType &V){ + return new ConstantAggregateZero(Ty); + } +}; template<> struct ConstantKeyData { @@ -491,6 +498,14 @@ }; template<> +struct ConstantKeyData { + typedef char ValType; + static ValType getValType(ConstantAggregateZero *C) { + return 0; + } +}; + +template<> struct ConstantKeyData { typedef std::vector ValType; static ValType getValType(ConstantArray *CA) { @@ -514,6 +529,37 @@ } }; +// ConstantPointerNull does not take extra "value" argument... +template +struct ConstantCreator { + static ConstantPointerNull *create(PointerType *Ty, const ValType &V){ + return new ConstantPointerNull(Ty); + } +}; + +template<> +struct ConstantKeyData { + typedef char ValType; + static ValType getValType(ConstantPointerNull *C) { + return 0; + } +}; + +// UndefValue does not take extra "value" argument... +template +struct ConstantCreator { + static UndefValue *create(Type *Ty, const ValType &V) { + return new UndefValue(Ty); + } +}; + +template<> +struct ConstantKeyData { + typedef char ValType; + static ValType getValType(UndefValue *C) { + return 0; + } +}; template<> struct ConstantCreator { Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.cpp?rev=148698&r1=148697&r2=148698&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.cpp (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.cpp Mon Jan 23 09:09:44 2012 @@ -58,8 +58,6 @@ std::vector Modules(OwnedModules.begin(), OwnedModules.end()); DeleteContainerPointers(Modules); - // Free the constants. This is important to do here to ensure that they are - // freed before the LeakDetector is torn down. std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(), DropReferences()); std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(), @@ -72,9 +70,9 @@ ArrayConstants.freeConstants(); StructConstants.freeConstants(); VectorConstants.freeConstants(); - CAZConstants.clear(); - CPNConstants.clear(); - UVConstants.clear(); + AggZeroConstants.freeConstants(); + NullPtrConstants.freeConstants(); + UndefValueConstants.freeConstants(); InlineAsms.freeConstants(); DeleteContainerSeconds(IntConstants); DeleteContainerSeconds(FPConstants); Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=148698&r1=148697&r2=148698&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Mon Jan 23 09:09:44 2012 @@ -27,7 +27,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include @@ -139,8 +138,7 @@ // on Context destruction. SmallPtrSet NonUniquedMDNodes; - typedef DenseMap > CAZMapTy; - CAZMapTy CAZConstants; + ConstantUniqueMap AggZeroConstants; typedef ConstantUniqueMap, ArrayRef, ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy; @@ -154,11 +152,9 @@ VectorType, ConstantVector> VectorConstantsTy; VectorConstantsTy VectorConstants; - typedef DenseMap > CPNMapTy; - CPNMapTy CPNConstants; - - typedef DenseMap > UVMapTy; - UVMapTy UVConstants; + ConstantUniqueMap + NullPtrConstants; + ConstantUniqueMap UndefValueConstants; DenseMap , BlockAddress*> BlockAddresses; ConstantUniqueMap From sabre at nondot.org Mon Jan 23 09:10:41 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 15:10:41 -0000 Subject: [llvm-commits] [llvm] r148699 - /llvm/trunk/include/llvm/ADT/OwningPtr.h Message-ID: <20120123151041.82CFF2A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 09:10:41 2012 New Revision: 148699 URL: http://llvm.org/viewvc/llvm-project?rev=148699&view=rev Log: revert r148688 too, this isn't safe for DenseMap use. When DenseMap resizes, it will need to copy around arbitrary pointers Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/OwningPtr.h?rev=148699&r1=148698&r2=148699&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/OwningPtr.h (original) +++ llvm/trunk/include/llvm/ADT/OwningPtr.h Mon Jan 23 09:10:41 2012 @@ -25,15 +25,12 @@ /// pointee object can be taken away from OwningPtr by using the take method. template class OwningPtr { - OwningPtr &operator=(const OwningPtr &); // DO NOT IMPLEMENT + OwningPtr(OwningPtr const &); // DO NOT IMPLEMENT + OwningPtr &operator=(OwningPtr const &); // DO NOT IMPLEMENT T *Ptr; public: explicit OwningPtr(T *P = 0) : Ptr(P) {} - OwningPtr(const OwningPtr &RHS) : Ptr(0) { - assert(RHS.Ptr == 0 && "Only null OwningPtr's are copyable!"); - } - ~OwningPtr() { delete Ptr; } From sabre at nondot.org Mon Jan 23 09:20:12 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 15:20:12 -0000 Subject: [llvm-commits] [llvm] r148700 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h Message-ID: <20120123152012.DE3322A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 09:20:12 2012 New Revision: 148700 URL: http://llvm.org/viewvc/llvm-project?rev=148700&view=rev Log: convert CAZ, UndefValue, and CPN to use DenseMap's again, this time without using OwningPtr. OwningPtr would barf when the densemap had to reallocate, which doesn't appear to happen on the regression test suite, but obviously happens in real life :) Modified: llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/ConstantsContext.h llvm/trunk/lib/VMCore/LLVMContextImpl.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.h Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148700&r1=148699&r2=148700&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Jan 23 09:20:12 2012 @@ -993,18 +993,21 @@ //===----------------------------------------------------------------------===// // Factory Function Implementation -ConstantAggregateZero* ConstantAggregateZero::get(Type* Ty) { +ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) { assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) && "Cannot create an aggregate zero of non-aggregate type!"); - LLVMContextImpl *pImpl = Ty->getContext().pImpl; - return pImpl->AggZeroConstants.getOrCreate(Ty, 0); + ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty]; + if (Entry == 0) + Entry = new ConstantAggregateZero(Ty); + + return Entry; } /// destroyConstant - Remove the constant from the constant table... /// void ConstantAggregateZero::destroyConstant() { - getType()->getContext().pImpl->AggZeroConstants.remove(this); + getContext().pImpl->CAZConstants.erase(getType()); destroyConstantImpl(); } @@ -1112,13 +1115,18 @@ // ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { - return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0); + ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty]; + if (Entry == 0) + Entry = new ConstantPointerNull(Ty); + + return Entry; } // destroyConstant - Remove the constant from the constant table... // void ConstantPointerNull::destroyConstant() { - getType()->getContext().pImpl->NullPtrConstants.remove(this); + getContext().pImpl->CPNConstants.erase(getType()); + // Free the constant and any dangling references to it. destroyConstantImpl(); } @@ -1127,13 +1135,18 @@ // UndefValue *UndefValue::get(Type *Ty) { - return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0); + UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty]; + if (Entry == 0) + Entry = new UndefValue(Ty); + + return Entry; } // destroyConstant - Remove the constant from the constant table. // void UndefValue::destroyConstant() { - getType()->getContext().pImpl->UndefValueConstants.remove(this); + // Free the constant and any dangling references to it. + getContext().pImpl->UVConstants.erase(getType()); destroyConstantImpl(); } Modified: llvm/trunk/lib/VMCore/ConstantsContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantsContext.h?rev=148700&r1=148699&r2=148700&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantsContext.h (original) +++ llvm/trunk/lib/VMCore/ConstantsContext.h Mon Jan 23 09:20:12 2012 @@ -477,13 +477,6 @@ } }; -// ConstantAggregateZero does not take extra "value" argument... -template -struct ConstantCreator { - static ConstantAggregateZero *create(Type *Ty, const ValType &V){ - return new ConstantAggregateZero(Ty); - } -}; template<> struct ConstantKeyData { @@ -498,14 +491,6 @@ }; template<> -struct ConstantKeyData { - typedef char ValType; - static ValType getValType(ConstantAggregateZero *C) { - return 0; - } -}; - -template<> struct ConstantKeyData { typedef std::vector ValType; static ValType getValType(ConstantArray *CA) { @@ -529,37 +514,6 @@ } }; -// ConstantPointerNull does not take extra "value" argument... -template -struct ConstantCreator { - static ConstantPointerNull *create(PointerType *Ty, const ValType &V){ - return new ConstantPointerNull(Ty); - } -}; - -template<> -struct ConstantKeyData { - typedef char ValType; - static ValType getValType(ConstantPointerNull *C) { - return 0; - } -}; - -// UndefValue does not take extra "value" argument... -template -struct ConstantCreator { - static UndefValue *create(Type *Ty, const ValType &V) { - return new UndefValue(Ty); - } -}; - -template<> -struct ConstantKeyData { - typedef char ValType; - static ValType getValType(UndefValue *C) { - return 0; - } -}; template<> struct ConstantCreator { Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.cpp?rev=148700&r1=148699&r2=148700&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.cpp (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.cpp Mon Jan 23 09:20:12 2012 @@ -58,6 +58,8 @@ std::vector Modules(OwnedModules.begin(), OwnedModules.end()); DeleteContainerPointers(Modules); + // Free the constants. This is important to do here to ensure that they are + // freed before the LeakDetector is torn down. std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(), DropReferences()); std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(), @@ -70,9 +72,9 @@ ArrayConstants.freeConstants(); StructConstants.freeConstants(); VectorConstants.freeConstants(); - AggZeroConstants.freeConstants(); - NullPtrConstants.freeConstants(); - UndefValueConstants.freeConstants(); + DeleteContainerSeconds(CAZConstants); + DeleteContainerSeconds(CPNConstants); + DeleteContainerSeconds(UVConstants); InlineAsms.freeConstants(); DeleteContainerSeconds(IntConstants); DeleteContainerSeconds(FPConstants); Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=148700&r1=148699&r2=148700&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Mon Jan 23 09:20:12 2012 @@ -138,7 +138,7 @@ // on Context destruction. SmallPtrSet NonUniquedMDNodes; - ConstantUniqueMap AggZeroConstants; + DenseMap CAZConstants; typedef ConstantUniqueMap, ArrayRef, ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy; @@ -152,9 +152,9 @@ VectorType, ConstantVector> VectorConstantsTy; VectorConstantsTy VectorConstants; - ConstantUniqueMap - NullPtrConstants; - ConstantUniqueMap UndefValueConstants; + DenseMap CPNConstants; + + DenseMap UVConstants; DenseMap , BlockAddress*> BlockAddresses; ConstantUniqueMap From sabre at nondot.org Mon Jan 23 09:24:35 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 07:24:35 -0800 Subject: [llvm-commits] [llvm] r148691 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h LLVMContextImpl.cpp LLVMContextImpl.h In-Reply-To: References: <20120123084239.1B8B62A6C12D@llvm.org> Message-ID: On Jan 23, 2012, at 7:10 AM, Alexander Potapenko wrote: > Here's where I got today. > run.sh is a script for multidelta that compiles test.ii with > -faddress-sanitizer (see > http://code.google.com/p/address-sanitizer/wiki/HowToBuild for build > instructions) Please try with r148700, thanks! -Chris > > > On Mon, Jan 23, 2012 at 4:59 PM, NAKAMURA Takumi wrote: >> 2012/1/23 Chris Lattner : >>> Author: lattner >>> Date: Mon Jan 23 02:42:38 2012 >>> New Revision: 148691 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=148691&view=rev >>> Log: >>> Replace a use of ConstantUniqueMap for CAZ constants with a simple DenseMap. >>> Now that the type system rewrite has landed, there is no need for its >>> complexity and std::map'ness. >>> >>> Modified: >>> llvm/trunk/lib/VMCore/Constants.cpp >>> llvm/trunk/lib/VMCore/ConstantsContext.h >>> llvm/trunk/lib/VMCore/LLVMContextImpl.cpp >>> llvm/trunk/lib/VMCore/LLVMContextImpl.h >> >> Chris, it broke stage2 build. I am investigating. >> >> ...Takumi >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > > -- > Alexander Potapenko > Software Engineer > Google Moscow > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From eirc.lew at gmail.com Mon Jan 23 11:07:16 2012 From: eirc.lew at gmail.com (eirc.lew at gmail.com) Date: Mon, 23 Jan 2012 12:07:16 -0500 Subject: [llvm-commits] FW: static taint analysis in LLVM Message-ID: <4f1d93c6.5121e00a.7716.6f4a@mx.google.com> Hi, All I want to know if LLVM support static taint analysis ? if it can, does it support interprocedural taint anslysis ? Thanks in advance! Xingjing Lu -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/85153e34/attachment.html From stoklund at 2pi.dk Mon Jan 23 12:35:32 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 23 Jan 2012 10:35:32 -0800 Subject: [llvm-commits] [llvm] r147286 - in /llvm/trunk: lib/Transforms/Utils/SimplifyCFG.cpp test/Transforms/SimplifyCFG/preserve-branchweights.ll In-Reply-To: <4F1CE13B.2090908@mxc.ca> References: <20111227043152.F2AE42A6C12C@llvm.org> <4F1CE13B.2090908@mxc.ca> Message-ID: <2D02DA62-8677-4481-8127-9952932D187C@2pi.dk> On Jan 22, 2012, at 8:25 PM, Nick Lewycky wrote: > So, I've tried to implement this and ended up with the patch attached. Any suggestions for improvement would be appreciated. Thanks, Nick. The patch looks good to me. BTW, are you actually benefiting from computing everything in APInt? We know the ranges of these values. > You still need the GCD stuff, otherwise you could end up with branch weights 'i32 5', 'i32 5'. That might be legal, but it's certainly not optimal. I don't think it actually hurts, but you get some really weird rounding behavior based on number theoretic happenstance. These weights really should have been floats, but we had to approximate with ints because we need reproducible results across platforms. They are approximations of real numbers, they are not intended as 'mathematical integers'. Let me put it this way: If the weights were floats, would you be computing the GCD of the mantissas? /jakob From dpatel at apple.com Mon Jan 23 12:31:58 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 23 Jan 2012 18:31:58 -0000 Subject: [llvm-commits] [llvm] r148712 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp test/MC/X86/intel-syntax.s Message-ID: <20120123183159.09D092A6C12C@llvm.org> Author: dpatel Date: Mon Jan 23 12:31:58 2012 New Revision: 148712 URL: http://llvm.org/viewvc/llvm-project?rev=148712&view=rev Log: Intel syntax: Parse segment registers. Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/test/MC/X86/intel-syntax.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=148712&r1=148711&r2=148712&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Mon Jan 23 12:31:58 2012 @@ -54,7 +54,7 @@ X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); X86Operand *ParseIntelMemOperand(); - X86Operand *ParseIntelBracExpression(unsigned Size); + X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); @@ -593,8 +593,9 @@ return Size; } -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned Size) { - unsigned SegReg = 0, BaseReg = 0, IndexReg = 0, Scale = 1; +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, + unsigned Size) { + unsigned BaseReg = 0, IndexReg = 0, Scale = 1; SMLoc Start = Parser.getTok().getLoc(), End; const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); @@ -669,6 +670,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand() { const AsmToken &Tok = Parser.getTok(); SMLoc Start = Parser.getTok().getLoc(), End; + unsigned SegReg = 0; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { @@ -678,7 +680,17 @@ } if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(Size); + return ParseIntelBracExpression(SegReg, Size); + + if (!ParseRegister(SegReg, Start, End)) { + // Handel SegReg : [ ... ] + if (getLexer().isNot(AsmToken::Colon)) + return ErrorOperand(Start, "Expected ':' token!"); + Parser.Lex(); // Eat : + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(Start, "Expected '[' token!"); + return ParseIntelBracExpression(SegReg, Size); + } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getParser().ParseExpression(Disp, End)) return 0; Modified: llvm/trunk/test/MC/X86/intel-syntax.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax.s?rev=148712&r1=148711&r2=148712&view=diff ============================================================================== --- llvm/trunk/test/MC/X86/intel-syntax.s (original) +++ llvm/trunk/test/MC/X86/intel-syntax.s Mon Jan 23 12:31:58 2012 @@ -55,4 +55,6 @@ and rax, -257 // CHECK: fld %st(0) fld ST(0) +// CHECK: movl %fs:(%rdi), %eax +mov EAX, DWORD PTR FS:[RDI] ret From grosbach at apple.com Mon Jan 23 12:37:41 2012 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 23 Jan 2012 10:37:41 -0800 Subject: [llvm-commits] [llvm] r148653 - in /llvm/trunk: include/llvm/Object/ObjectFile.h include/llvm/Support/Endian.h lib/Object/ELFObjectFile.cpp In-Reply-To: <20120122090104.4CD0B2A6C12C@llvm.org> References: <20120122090104.4CD0B2A6C12C@llvm.org> Message-ID: <14B0BD87-2FF4-417D-B932-3F18E7E939B4@apple.com> Hi Eli, This patch uses std::vector quite a lot. Have you considered SmallVector? It seems likely that may be a better fit in at least some cases. -Jim On Jan 22, 2012, at 1:01 AM, Eli Bendersky wrote: > Author: eliben > Date: Sun Jan 22 03:01:03 2012 > New Revision: 148653 > > URL: http://llvm.org/viewvc/llvm-project?rev=148653&view=rev > Log: > Basic runtime dynamic loading capabilities added to ELFObjectFile, implemented > in a subclass named DyldELFObject. This class supports rebasing the object file > it represents by re-mapping section addresses to the actual memory addresses > the object was placed in. This is required for MC-JIT implementation on ELF with > debugging support. > > Patch reviewed on llvm-commits. > > Developed together with Ashok Thirumurthi and Andrew Kaylor. > > > Modified: > llvm/trunk/include/llvm/Object/ObjectFile.h > llvm/trunk/include/llvm/Support/Endian.h > llvm/trunk/lib/Object/ELFObjectFile.cpp > > Modified: llvm/trunk/include/llvm/Object/ObjectFile.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/ObjectFile.h?rev=148653&r1=148652&r2=148653&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/Object/ObjectFile.h (original) > +++ llvm/trunk/include/llvm/Object/ObjectFile.h Sun Jan 22 03:01:03 2012 > @@ -20,6 +20,7 @@ > #include "llvm/Support/ErrorHandling.h" > #include "llvm/Support/MemoryBuffer.h" > #include > +#include > > namespace llvm { > namespace object { > @@ -337,7 +338,8 @@ > > public: > static ObjectFile *createCOFFObjectFile(MemoryBuffer *Object); > - static ObjectFile *createELFObjectFile(MemoryBuffer *Object); > + static ObjectFile *createELFObjectFile(MemoryBuffer *Object, > + bool doDyld = false, std::vector *MemoryMap = 0); > static ObjectFile *createMachOObjectFile(MemoryBuffer *Object); > }; > > > Modified: llvm/trunk/include/llvm/Support/Endian.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Endian.h?rev=148653&r1=148652&r2=148653&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/Support/Endian.h (original) > +++ llvm/trunk/include/llvm/Support/Endian.h Sun Jan 22 03:01:03 2012 > @@ -98,6 +98,9 @@ > operator value_type() const { > return endian::read_le(Value); > } > + void operator=(value_type newValue) { > + endian::write_le((void *)&Value, newValue); > + } > private: > uint8_t Value[sizeof(value_type)]; > }; > @@ -108,6 +111,9 @@ > operator value_type() const { > return endian::read_be(Value); > } > + void operator=(value_type newValue) { > + endian::write_be((void *)&Value, newValue); > + } > private: > uint8_t Value[sizeof(value_type)]; > }; > @@ -118,6 +124,9 @@ > operator value_type() const { > return endian::read_le(&Value); > } > + void operator=(value_type newValue) { > + endian::write_le((void *)&Value, newValue); > + } > private: > value_type Value; > }; > @@ -128,6 +137,9 @@ > operator value_type() const { > return endian::read_be(&Value); > } > + void operator=(value_type newValue) { > + endian::write_be((void *)&Value, newValue); > + } > private: > value_type Value; > }; > > Modified: llvm/trunk/lib/Object/ELFObjectFile.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/ELFObjectFile.cpp?rev=148653&r1=148652&r2=148653&view=diff > ============================================================================== > --- llvm/trunk/lib/Object/ELFObjectFile.cpp (original) > +++ llvm/trunk/lib/Object/ELFObjectFile.cpp Sun Jan 22 03:01:03 2012 > @@ -7,7 +7,7 @@ > // > //===----------------------------------------------------------------------===// > // > -// This file defines the ELFObjectFile class. > +// This file defines the ELFObjectFile and DyldELFObject classes. > // > //===----------------------------------------------------------------------===// > > @@ -16,6 +16,7 @@ > #include "llvm/ADT/Triple.h" > #include "llvm/ADT/DenseMap.h" > #include "llvm/Object/ObjectFile.h" > +#include "llvm/Support/Casting.h" > #include "llvm/Support/ELF.h" > #include "llvm/Support/Endian.h" > #include "llvm/Support/ErrorHandling.h" > @@ -53,20 +54,22 @@ > template > struct ELFDataTypeTypedefHelper > : ELFDataTypeTypedefHelperCommon { > + typedef uint32_t value_type; > typedef support::detail::packed_endian_specific_integral > - Elf_Addr; > + Elf_Addr; > typedef support::detail::packed_endian_specific_integral > - Elf_Off; > + Elf_Off; > }; > > /// ELF 64bit types. > template > struct ELFDataTypeTypedefHelper > : ELFDataTypeTypedefHelperCommon{ > + typedef uint64_t value_type; > typedef support::detail::packed_endian_specific_integral > - Elf_Addr; > + Elf_Addr; > typedef support::detail::packed_endian_specific_integral > - Elf_Off; > + Elf_Off; > }; > } > > @@ -263,6 +266,7 @@ > typedef Elf_Rel_Impl Elf_Rel; > typedef Elf_Rel_Impl Elf_Rela; > > +protected: > struct Elf_Ehdr { > unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes > Elf_Half e_type; // Type of file (see ET_*) > @@ -285,7 +289,12 @@ > unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; } > unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; } > }; > + // This flag is used for classof, to distinguish ELFObjectFile from > + // its subclass. If more subclasses will be created, this flag will > + // have to become an enum. > + bool isDyldELFObject; > > +private: > typedef SmallVector Sections_t; > typedef DenseMap IndexMap_t; > typedef DenseMap > RelocMap_t; > @@ -307,13 +316,11 @@ > return getSection(Rel.w.b); > } > > - void validateSymbol(DataRefImpl Symb) const; > bool isRelocationHasAddend(DataRefImpl Rel) const; > template > const T *getEntry(uint16_t Section, uint32_t Entry) const; > template > const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const; > - const Elf_Sym *getSymbol(DataRefImpl Symb) const; > const Elf_Shdr *getSection(DataRefImpl index) const; > const Elf_Shdr *getSection(uint32_t index) const; > const Elf_Rel *getRel(DataRefImpl Rel) const; > @@ -323,6 +330,10 @@ > error_code getSymbolName(const Elf_Sym *Symb, StringRef &Res) const; > > protected: > + const Elf_Sym *getSymbol(DataRefImpl Symb) const; // FIXME: Should be private? > + void validateSymbol(DataRefImpl Symb) const; > + > +protected: > virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; > virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; > virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const; > @@ -384,8 +395,10 @@ > ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const; > const Elf_Shdr *getSection(const Elf_Sym *symb) const; > > + // Methods for type inquiry through isa, cast, and dyn_cast > + bool isDyldType() const { return isDyldELFObject; } > static inline bool classof(const Binary *v) { > - return v->getType() == isELF; > + return v->getType() == Binary::isELF; > } > static inline bool classof(const ELFObjectFile *v) { return true; } > }; > @@ -471,7 +484,7 @@ > const Elf_Shdr *Section; > switch (getSymbolTableIndex(symb)) { > case ELF::SHN_COMMON: > - // Undefined symbols have no address yet. > + // Unintialized symbols have no offset in the object file > case ELF::SHN_UNDEF: > Result = UnknownAddressOrSize; > return object_error::success; > @@ -489,7 +502,7 @@ > case ELF::STT_OBJECT: > case ELF::STT_NOTYPE: > Result = symb->st_value + > - (Section ? Section->sh_offset - Section->sh_addr : 0); > + (Section ? Section->sh_offset : 0); > return object_error::success; > default: > Result = UnknownAddressOrSize; > @@ -506,7 +519,6 @@ > const Elf_Shdr *Section; > switch (getSymbolTableIndex(symb)) { > case ELF::SHN_COMMON: > - // Undefined symbols have no address yet. > case ELF::SHN_UNDEF: > Result = UnknownAddressOrSize; > return object_error::success; > @@ -523,7 +535,7 @@ > case ELF::STT_FUNC: > case ELF::STT_OBJECT: > case ELF::STT_NOTYPE: > - Result = symb->st_value; > + Result = symb->st_value + (Section ? Section->sh_addr : 0); > return object_error::success; > default: > Result = UnknownAddressOrSize; > @@ -1157,6 +1169,7 @@ > ELFObjectFile::ELFObjectFile(MemoryBuffer *Object > , error_code &ec) > : ObjectFile(Binary::isELF, Object, ec) > + , isDyldELFObject(false) > , SectionHeaderTable(0) > , dot_shstrtab_sec(0) > , dot_strtab_sec(0) { > @@ -1168,10 +1181,12 @@ > SectionHeaderTable = > reinterpret_cast(base() + Header->e_shoff); > uint64_t SectionTableSize = getNumSections() * Header->e_shentsize; > - if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize > - <= base() + Data->getBufferSize())) > + > + if ((const uint8_t *)SectionHeaderTable + SectionTableSize > + > base() + Data->getBufferSize()) { > // FIXME: Proper error handling. > report_fatal_error("Section table goes past end of file!"); > + } > > > // To find the symbol tables we walk the section table to find SHT_SYMTAB. > @@ -1466,21 +1481,226 @@ > , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]); > } > > + > +namespace { > + template > + class DyldELFObject : public ELFObjectFile { > + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) > + > + typedef Elf_Shdr_Impl Elf_Shdr; > + typedef Elf_Sym_Impl Elf_Sym; > + typedef Elf_Rel_Impl Elf_Rel; > + typedef Elf_Rel_Impl Elf_Rela; > + > + typedef typename ELFObjectFile:: > + Elf_Ehdr Elf_Ehdr; > + Elf_Ehdr *Header; > + > + // Update section headers according to the current location in memory > + virtual void rebaseObject(std::vector *MemoryMap); > + // Record memory addresses for cleanup > + virtual void saveAddress(std::vector *MemoryMap, uint8_t *addr); > + > + protected: > + virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; > + > + public: > + DyldELFObject(MemoryBuffer *Object, std::vector *MemoryMap, > + error_code &ec); > + > + // Methods for type inquiry through isa, cast, and dyn_cast > + static inline bool classof(const Binary *v) { > + return (isa >(v) > + && classof(cast >(v))); > + } > + static inline bool classof( > + const ELFObjectFile *v) { > + return v->isDyldType(); > + } > + static inline bool classof(const DyldELFObject *v) { > + return true; > + } > + }; > +} // end anonymous namespace > + > +template > +DyldELFObject::DyldELFObject(MemoryBuffer *Object, > + std::vector *MemoryMap, error_code &ec) > + : ELFObjectFile(Object, ec) > + , Header(0) { > + this->isDyldELFObject = true; > + Header = const_cast( > + reinterpret_cast(this->base())); > + if (Header->e_shoff == 0) > + return; > + > + // Mark the image as a dynamic shared library > + Header->e_type = ELF::ET_DYN; > + > + rebaseObject(MemoryMap); > +} > + > +// Walk through the ELF headers, updating virtual addresses to reflect where > +// the object is currently loaded in memory > +template > +void DyldELFObject::rebaseObject( > + std::vector *MemoryMap) { > + typedef typename ELFDataTypeTypedefHelper< > + target_endianness, is64Bits>::value_type addr_type; > + > + uint8_t *base_p = const_cast(this->base()); > + Elf_Shdr *sectionTable = > + reinterpret_cast(base_p + Header->e_shoff); > + uint64_t numSections = this->getNumSections(); > + > + // Allocate memory space for NOBITS sections (such as .bss), which only exist > + // in memory, but don't occupy space in the object file. > + // Update the address in the section headers to reflect this allocation. > + for (uint64_t index = 0; index < numSections; index++) { > + Elf_Shdr *sec = reinterpret_cast( > + reinterpret_cast(sectionTable) + index * Header->e_shentsize); > + > + // Only update sections that are meant to be present in program memory > + if (sec->sh_flags & ELF::SHF_ALLOC) { > + uint8_t *addr = base_p + sec->sh_offset; > + if (sec->sh_type == ELF::SHT_NOBITS) { > + addr = static_cast(calloc(sec->sh_size, 1)); > + saveAddress(MemoryMap, addr); > + } > + else { > + // FIXME: Currently memory with RWX permissions is allocated. In the > + // future, make sure that permissions are as necessary > + if (sec->sh_flags & ELF::SHF_WRITE) { > + // see FIXME above > + } > + if (sec->sh_flags & ELF::SHF_EXECINSTR) { > + // see FIXME above > + } > + } > + assert(sizeof(addr_type) == sizeof(intptr_t) && > + "Cross-architecture ELF dy-load is not supported!"); > + sec->sh_addr = static_cast(intptr_t(addr)); > + } > + } > + > + // Now allocate actual space for COMMON symbols, which also don't occupy > + // space in the object file. > + // We want to allocate space for all COMMON symbols at once, so the flow is: > + // 1. Go over all symbols, find those that are in COMMON. For each such > + // symbol, record its size and the value field in its symbol header in a > + // special vector. > + // 2. Allocate memory for all COMMON symbols in one fell swoop. > + // 3. Using the recorded information from (1), update the address fields in > + // the symbol headers of the COMMON symbols to reflect their allocated > + // address. > + uint64_t TotalSize = 0; > + std::vector > SymbAddrInfo; > + error_code ec = object_error::success; > + for (symbol_iterator si = this->begin_symbols(), > + se = this->end_symbols(); si != se; si.increment(ec)) { > + uint64_t Size = 0; > + ec = si->getSize(Size); > + Elf_Sym* symb = const_cast( > + this->getSymbol(si->getRawDataRefImpl())); > + if (ec == object_error::success && > + this->getSymbolTableIndex(symb) == ELF::SHN_COMMON && Size > 0) { > + SymbAddrInfo.push_back(std::make_pair(&(symb->st_value), Size)); > + TotalSize += Size; > + } > + } > + > + uint8_t* SectionPtr = (uint8_t *)calloc(TotalSize, 1); > + saveAddress(MemoryMap, SectionPtr); > + > + typedef typename std::vector >::iterator > + AddrInfoIterator; > + AddrInfoIterator EndIter = SymbAddrInfo.end(); > + for (AddrInfoIterator AddrIter = SymbAddrInfo.begin(); > + AddrIter != EndIter; ++AddrIter) { > + assert(sizeof(addr_type) == sizeof(intptr_t) && > + "Cross-architecture ELF dy-load is not supported!"); > + *(AddrIter->first) = static_cast(intptr_t(SectionPtr)); > + SectionPtr += AddrIter->second; > + } > +} > + > +// Record memory addresses for callers > +template > +void DyldELFObject::saveAddress( > + std::vector *MemoryMap, uint8_t* addr) { > + if (MemoryMap) > + MemoryMap->push_back(addr); > + else > + errs() << "WARNING: Memory leak - cannot record memory for ELF dyld."; > +} > + > +template > +error_code DyldELFObject::getSymbolAddress( > + DataRefImpl Symb, uint64_t &Result) const { > + this->validateSymbol(Symb); > + const Elf_Sym *symb = this->getSymbol(Symb); > + if (this->getSymbolTableIndex(symb) == ELF::SHN_COMMON) { > + Result = symb->st_value; > + return object_error::success; > + } > + else { > + return ELFObjectFile::getSymbolAddress( > + Symb, Result); > + } > +} > + > namespace llvm { > > - ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) { > + // Creates an in-memory object-file by default: createELFObjectFile(Buffer) > + // Set doDyld to true to create a live (executable/debug-worthy) image > + // If doDyld is true, any memory allocated for non-resident sections and > + // symbols is recorded in MemoryMap. > + ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object, > + bool doDyld, std::vector *MemoryMap) { > std::pair Ident = getElfArchType(Object); > error_code ec; > + > + if (doDyld) { > + if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) > + return new DyldELFObject(Object, MemoryMap, ec); > + else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) > + return new DyldELFObject(Object, MemoryMap, ec); > + else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) > + return new DyldELFObject(Object, MemoryMap, ec); > + else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { > + DyldELFObject *result = > + new DyldELFObject(Object, MemoryMap, ec); > + > + // Unit testing for type inquiry > + bool isBinary = isa(result); > + bool isDyld = isa >(result); > + bool isFile = isa >(result); > + assert(isBinary && isDyld && isFile && > + "Type inquiry failed for ELF object!"); > + return result; > + } > + } > + > if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) > return new ELFObjectFile(Object, ec); > else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) > return new ELFObjectFile(Object, ec); > - else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) > - return new ELFObjectFile(Object, ec); > else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) > return new ELFObjectFile(Object, ec); > - // FIXME: Proper error handling. > - report_fatal_error("Not an ELF object file!"); > + else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { > + ELFObjectFile *result = > + new ELFObjectFile(Object, ec); > + > + // Unit testing for type inquiry > + bool isBinary = isa(result); > + bool isDyld = isa >(result); > + bool isFile = isa >(result); > + assert(isBinary && isFile && !isDyld && > + "Type inquiry failed for ELF object!"); > + return result; > + } > + > + report_fatal_error("Buffer is not an ELF object file!"); > } > > } // end namespace llvm > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From kcc at google.com Mon Jan 23 12:44:35 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 23 Jan 2012 18:44:35 -0000 Subject: [llvm-commits] [compiler-rt] r148714 - in /compiler-rt/trunk/lib/asan/tests: deep_tail_call.cc deep_tail_call.tmpl test_output.sh Message-ID: <20120123184435.432DE2A6C12C@llvm.org> Author: kcc Date: Mon Jan 23 12:44:34 2012 New Revision: 148714 URL: http://llvm.org/viewvc/llvm-project?rev=148714&view=rev Log: [asan] test that -fno-optimize-sibling-calls helps to get sane stack traces Added: compiler-rt/trunk/lib/asan/tests/deep_tail_call.cc compiler-rt/trunk/lib/asan/tests/deep_tail_call.tmpl Modified: compiler-rt/trunk/lib/asan/tests/test_output.sh Added: compiler-rt/trunk/lib/asan/tests/deep_tail_call.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/deep_tail_call.cc?rev=148714&view=auto ============================================================================== --- compiler-rt/trunk/lib/asan/tests/deep_tail_call.cc (added) +++ compiler-rt/trunk/lib/asan/tests/deep_tail_call.cc Mon Jan 23 12:44:34 2012 @@ -0,0 +1,13 @@ +int global[10]; +__attribute__((noinline)) +void call4(int i) { global[i+10]++; } +__attribute__((noinline)) +void call3(int i) { call4(i); } +__attribute__((noinline)) +void call2(int i) { call3(i); } +__attribute__((noinline)) +void call1(int i) { call2(i); } +int main(int argc, char **argv) { + call1(argc); + return global[0]; +} Added: compiler-rt/trunk/lib/asan/tests/deep_tail_call.tmpl URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/deep_tail_call.tmpl?rev=148714&view=auto ============================================================================== --- compiler-rt/trunk/lib/asan/tests/deep_tail_call.tmpl (added) +++ compiler-rt/trunk/lib/asan/tests/deep_tail_call.tmpl Mon Jan 23 12:44:34 2012 @@ -0,0 +1,6 @@ +AddressSanitizer global-buffer-overflow + #0.*call4 + #1.*call3 + #2.*call2 + #3.*call1 + #4.*main Modified: compiler-rt/trunk/lib/asan/tests/test_output.sh URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/test_output.sh?rev=148714&r1=148713&r2=148714&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/tests/test_output.sh (original) +++ compiler-rt/trunk/lib/asan/tests/test_output.sh Mon Jan 23 12:44:34 2012 @@ -5,7 +5,7 @@ OS=`uname` CXX=$1 CC=$2 -CXXFLAGS="-mno-omit-leaf-frame-pointer -fno-omit-frame-pointer" +CXXFLAGS="-mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls" SYMBOLIZER=../scripts/asan_symbolize.py C_TEST=use-after-free From matthewbg at google.com Mon Jan 23 12:46:04 2012 From: matthewbg at google.com (Matt Beaumont-Gay) Date: Mon, 23 Jan 2012 18:46:04 -0000 Subject: [llvm-commits] [llvm] r148715 - /llvm/trunk/lib/Object/ELFObjectFile.cpp Message-ID: <20120123184604.9348B2A6C12C@llvm.org> Author: matthewbg Date: Mon Jan 23 12:46:04 2012 New Revision: 148715 URL: http://llvm.org/viewvc/llvm-project?rev=148715&view=rev Log: Silence warnings in -asserts build Modified: llvm/trunk/lib/Object/ELFObjectFile.cpp Modified: llvm/trunk/lib/Object/ELFObjectFile.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/ELFObjectFile.cpp?rev=148715&r1=148714&r2=148715&view=diff ============================================================================== --- llvm/trunk/lib/Object/ELFObjectFile.cpp (original) +++ llvm/trunk/lib/Object/ELFObjectFile.cpp Mon Jan 23 12:46:04 2012 @@ -1675,6 +1675,9 @@ bool isBinary = isa(result); bool isDyld = isa >(result); bool isFile = isa >(result); + (void)isBinary; + (void)isDyld; + (void)isFile; assert(isBinary && isDyld && isFile && "Type inquiry failed for ELF object!"); return result; @@ -1695,6 +1698,9 @@ bool isBinary = isa(result); bool isDyld = isa >(result); bool isFile = isa >(result); + (void)isBinary; + (void)isDyld; + (void)isFile; assert(isBinary && isFile && !isDyld && "Type inquiry failed for ELF object!"); return result; From kcc at google.com Mon Jan 23 13:02:58 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 23 Jan 2012 11:02:58 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h Message-ID: My previous change in include/llvm-c/Core.h that introduced 64-bit Attributes (r148553) caused a warning while building with MSVC. http://llvm.org/bugs/show_bug.cgi?id=11828 The following patch fixes the problem (use "static const uint64_t" instead of enum). Ok to commit? --kcc Index: include/llvm-c/Core.h =================================================================== --- include/llvm-c/Core.h (revision 148708) +++ include/llvm-c/Core.h (working copy) @@ -92,7 +92,7 @@ /** Used to get the users and usees of a Value. See the llvm::Use class. */ typedef struct LLVMOpaqueUse *LLVMUseRef; -typedef enum { +static const uint64_t LLVMZExtAttribute = 1<<0, LLVMSExtAttribute = 1<<1, LLVMNoReturnAttribute = 1<<2, @@ -119,8 +119,8 @@ LLVMReturnsTwice = 1 << 29, LLVMUWTable = 1 << 30, LLVMNonLazyBind = 1U << 31, - LLVMAddressSafety = 1ULL << 32 -} LLVMAttribute; + LLVMAddressSafety = 1ULL << 32; +typedef uint64_t LLVMAttribute; typedef enum { /* Terminator Instructions */ -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/9b5ffec8/attachment.html From grosbach at apple.com Mon Jan 23 13:39:08 2012 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 23 Jan 2012 19:39:08 -0000 Subject: [llvm-commits] [llvm] r148718 - in /llvm/trunk: lib/Target/ARM/ARMInstrFormats.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/neon-vst-encoding.s Message-ID: <20120123193909.12F072A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 13:39:08 2012 New Revision: 148718 URL: http://llvm.org/viewvc/llvm-project?rev=148718&view=rev Log: Simplify some NEON assembly pseudo definitions. Let the generic token alias definitions handle the data subtype suffices. We don't need explicit versions for each. Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/neon-vst-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=148718&r1=148717&r2=148718&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Mon Jan 23 13:39:08 2012 @@ -2029,75 +2029,6 @@ // for instalias defs. class NEONDataTypeAsmPseudoInst : AsmPseudoInst, Requires<[HasNEON]>; -multiclass NEONDT8ReqAsmPseudoInst { - def I8 : NEONDataTypeAsmPseudoInst; - def S8 : NEONDataTypeAsmPseudoInst; - def U8 : NEONDataTypeAsmPseudoInst; - def P8 : NEONDataTypeAsmPseudoInst; -} -// NEONDT8ReqAsmPseudoInst plus plain ".8" -multiclass NEONDT8AsmPseudoInst { - def _8 : NEONDataTypeAsmPseudoInst; - defm _ : NEONDT8ReqAsmPseudoInst; -} -multiclass NEONDT16ReqAsmPseudoInst { - def I16 : NEONDataTypeAsmPseudoInst; - def S16 : NEONDataTypeAsmPseudoInst; - def U16 : NEONDataTypeAsmPseudoInst; - def P16 : NEONDataTypeAsmPseudoInst; -} -// NEONDT16ReqAsmPseudoInst plus plain ".16" -multiclass NEONDT16AsmPseudoInst { - def _16 : NEONDataTypeAsmPseudoInst; - defm _ : NEONDT16ReqAsmPseudoInst; -} -multiclass NEONDT32ReqAsmPseudoInst { - def I32 : NEONDataTypeAsmPseudoInst; - def S32 : NEONDataTypeAsmPseudoInst; - def U32 : NEONDataTypeAsmPseudoInst; - def F32 : NEONDataTypeAsmPseudoInst; - def F : NEONDataTypeAsmPseudoInst; -} -// NEONDT32ReqAsmPseudoInst plus plain ".32" -multiclass NEONDT32AsmPseudoInst { - def _32 : NEONDataTypeAsmPseudoInst; - defm _ : NEONDT32ReqAsmPseudoInst; -} -multiclass NEONDT64ReqAsmPseudoInst { - def I64 : NEONDataTypeAsmPseudoInst; - def S64 : NEONDataTypeAsmPseudoInst; - def U64 : NEONDataTypeAsmPseudoInst; - def F64 : NEONDataTypeAsmPseudoInst; - def D : NEONDataTypeAsmPseudoInst; -} -// NEONDT64ReqAsmPseudoInst plus plain ".64" -multiclass NEONDT64AsmPseudoInst { - def _64 : NEONDataTypeAsmPseudoInst; - defm _ : NEONDT64ReqAsmPseudoInst; -} -multiclass NEONDT64NoF64ReqAsmPseudoInst { - def I64 : NEONDataTypeAsmPseudoInst; - def S64 : NEONDataTypeAsmPseudoInst; - def U64 : NEONDataTypeAsmPseudoInst; - def D : NEONDataTypeAsmPseudoInst; -} -// NEONDT64ReqAsmPseudoInst plus plain ".64" -multiclass NEONDT64NoF64AsmPseudoInst { - def _64 : NEONDataTypeAsmPseudoInst; - defm _ : NEONDT64ReqAsmPseudoInst; -} -multiclass NEONDTAnyAsmPseudoInst { - defm _ : NEONDT8AsmPseudoInst; - defm _ : NEONDT16AsmPseudoInst; - defm _ : NEONDT32AsmPseudoInst; - defm _ : NEONDT64AsmPseudoInst; -} -multiclass NEONDTAnyNoF64AsmPseudoInst { - defm _ : NEONDT8AsmPseudoInst; - defm _ : NEONDT16AsmPseudoInst; - defm _ : NEONDT32AsmPseudoInst; - defm _ : NEONDT64NoF64AsmPseudoInst; -} // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148718&r1=148717&r2=148718&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 13:39:08 2012 @@ -5753,150 +5753,167 @@ // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VST1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VLD2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNqAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNqAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNqWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNqWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNqWB_register_Asm : - NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNqWB_register_Asm : - NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VST2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNqAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNqAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNqWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNqWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNqWB_register_Asm : - NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNqWB_register_Asm : - NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; + // VMOV takes an optional datatype suffix defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148718&r1=148717&r2=148718&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 13:39:08 2012 @@ -5147,128 +5147,80 @@ switch(Opc) { default: assert(0 && "unexpected opcode!"); // VST1LN - case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: - case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: - case ARM::VST1LNdWB_fixed_Asm_U8: + case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16: - case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16: - case ARM::VST1LNdWB_fixed_Asm_U16: + case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: - case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: - case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: + case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; - case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: - case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: - case ARM::VST1LNdWB_register_Asm_U8: + case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16: - case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16: - case ARM::VST1LNdWB_register_Asm_U16: + case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: - case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: - case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: + case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; - case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: - case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8: - case ARM::VST1LNdAsm_U8: + case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8; - case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16: - case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: - case ARM::VST1LNdAsm_U16: + case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16; - case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: - case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: - case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32: + case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32; // VST2LN - case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: - case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: - case ARM::VST2LNdWB_fixed_Asm_U8: + case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16: - case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16: - case ARM::VST2LNdWB_fixed_Asm_U16: + case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: - case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: - case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: + case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; - case ARM::VST2LNqWB_fixed_Asm_16: case ARM::VST2LNqWB_fixed_Asm_P16: - case ARM::VST2LNqWB_fixed_Asm_I16: case ARM::VST2LNqWB_fixed_Asm_S16: - case ARM::VST2LNqWB_fixed_Asm_U16: + case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; - case ARM::VST2LNqWB_fixed_Asm_32: case ARM::VST2LNqWB_fixed_Asm_F: - case ARM::VST2LNqWB_fixed_Asm_F32: case ARM::VST2LNqWB_fixed_Asm_I32: - case ARM::VST2LNqWB_fixed_Asm_S32: case ARM::VST2LNqWB_fixed_Asm_U32: + case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; - case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: - case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: - case ARM::VST2LNdWB_register_Asm_U8: + case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16: - case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16: - case ARM::VST2LNdWB_register_Asm_U16: + case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: - case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: - case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: + case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; - case ARM::VST2LNqWB_register_Asm_16: case ARM::VST2LNqWB_register_Asm_P16: - case ARM::VST2LNqWB_register_Asm_I16: case ARM::VST2LNqWB_register_Asm_S16: - case ARM::VST2LNqWB_register_Asm_U16: + case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; - case ARM::VST2LNqWB_register_Asm_32: case ARM::VST2LNqWB_register_Asm_F: - case ARM::VST2LNqWB_register_Asm_F32: case ARM::VST2LNqWB_register_Asm_I32: - case ARM::VST2LNqWB_register_Asm_S32: case ARM::VST2LNqWB_register_Asm_U32: + case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; - case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: - case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8: - case ARM::VST2LNdAsm_U8: + case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8; - case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16: - case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: - case ARM::VST2LNdAsm_U16: + case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16; - case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: - case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: - case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32: + case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32; - case ARM::VST2LNqAsm_16: case ARM::VST2LNqAsm_P16: - case ARM::VST2LNqAsm_I16: case ARM::VST2LNqAsm_S16: - case ARM::VST2LNqAsm_U16: + case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16; - case ARM::VST2LNqAsm_32: case ARM::VST2LNqAsm_F: - case ARM::VST2LNqAsm_F32: case ARM::VST2LNqAsm_I32: - case ARM::VST2LNqAsm_S32: case ARM::VST2LNqAsm_U32: + case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32; } @@ -5278,126 +5230,78 @@ switch(Opc) { default: assert(0 && "unexpected opcode!"); // VLD1LN - case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: - case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: - case ARM::VLD1LNdWB_fixed_Asm_U8: + case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16: - case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16: - case ARM::VLD1LNdWB_fixed_Asm_U16: + case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: - case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: - case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: + case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: - case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: - case ARM::VLD1LNdWB_register_Asm_U8: + case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16: - case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16: - case ARM::VLD1LNdWB_register_Asm_U16: + case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: - case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: - case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: + case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: - case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8: - case ARM::VLD1LNdAsm_U8: + case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8; - case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16: - case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: - case ARM::VLD1LNdAsm_U16: + case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16; - case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: - case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: - case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32: + case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32; // VLD2LN - case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: - case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: - case ARM::VLD2LNdWB_fixed_Asm_U8: + case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16: - case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16: - case ARM::VLD2LNdWB_fixed_Asm_U16: + case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: - case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: - case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: + case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNqWB_fixed_Asm_16: case ARM::VLD2LNqWB_fixed_Asm_P16: - case ARM::VLD2LNqWB_fixed_Asm_I16: case ARM::VLD2LNqWB_fixed_Asm_S16: - case ARM::VLD2LNqWB_fixed_Asm_U16: + case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD; - case ARM::VLD2LNqWB_fixed_Asm_32: case ARM::VLD2LNqWB_fixed_Asm_F: - case ARM::VLD2LNqWB_fixed_Asm_F32: case ARM::VLD2LNqWB_fixed_Asm_I32: - case ARM::VLD2LNqWB_fixed_Asm_S32: case ARM::VLD2LNqWB_fixed_Asm_U32: + case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; - case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: - case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: - case ARM::VLD2LNdWB_register_Asm_U8: + case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16: - case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16: - case ARM::VLD2LNdWB_register_Asm_U16: + case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: - case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: - case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: + case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNqWB_register_Asm_16: case ARM::VLD2LNqWB_register_Asm_P16: - case ARM::VLD2LNqWB_register_Asm_I16: case ARM::VLD2LNqWB_register_Asm_S16: - case ARM::VLD2LNqWB_register_Asm_U16: + case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD; - case ARM::VLD2LNqWB_register_Asm_32: case ARM::VLD2LNqWB_register_Asm_F: - case ARM::VLD2LNqWB_register_Asm_F32: case ARM::VLD2LNqWB_register_Asm_I32: - case ARM::VLD2LNqWB_register_Asm_S32: case ARM::VLD2LNqWB_register_Asm_U32: + case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; - case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: - case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8: - case ARM::VLD2LNdAsm_U8: + case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8; - case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16: - case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: - case ARM::VLD2LNdAsm_U16: + case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16; - case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: - case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: - case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32: + case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32; - case ARM::VLD2LNqAsm_16: case ARM::VLD2LNqAsm_P16: - case ARM::VLD2LNqAsm_I16: case ARM::VLD2LNqAsm_S16: - case ARM::VLD2LNqAsm_U16: + case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16; - case ARM::VLD2LNqAsm_32: case ARM::VLD2LNqAsm_F: - case ARM::VLD2LNqAsm_F32: case ARM::VLD2LNqAsm_I32: - case ARM::VLD2LNqAsm_S32: case ARM::VLD2LNqAsm_U32: + case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; } @@ -5424,14 +5328,9 @@ Inst.setOpcode(ARM::t2LDRSHpci); return true; // Handle NEON VST complex aliases. - case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: - case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: - case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16: - case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16: - case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16: - case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: - case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: - case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: { + case ARM::VST1LNdWB_register_Asm_8: + case ARM::VST1LNdWB_register_Asm_16: + case ARM::VST1LNdWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5449,20 +5348,11 @@ return true; } - case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: - case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: - case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16: - case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16: - case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16: - case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: - case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: - case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: - case ARM::VST2LNqWB_register_Asm_16: case ARM::VST2LNqWB_register_Asm_P16: - case ARM::VST2LNqWB_register_Asm_I16: case ARM::VST2LNqWB_register_Asm_S16: - case ARM::VST2LNqWB_register_Asm_U16: case ARM::VST2LNqWB_register_Asm_32: - case ARM::VST2LNqWB_register_Asm_F: case ARM::VST2LNqWB_register_Asm_F32: - case ARM::VST2LNqWB_register_Asm_I32: case ARM::VST2LNqWB_register_Asm_S32: - case ARM::VST2LNqWB_register_Asm_U32: { + case ARM::VST2LNdWB_register_Asm_8: + case ARM::VST2LNdWB_register_Asm_16: + case ARM::VST2LNdWB_register_Asm_32: + case ARM::VST2LNqWB_register_Asm_16: + case ARM::VST2LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5481,14 +5371,9 @@ Inst = TmpInst; return true; } - case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: - case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: - case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16: - case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16: - case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16: - case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: - case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: - case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: { + case ARM::VST1LNdWB_fixed_Asm_8: + case ARM::VST1LNdWB_fixed_Asm_16: + case ARM::VST1LNdWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5506,20 +5391,11 @@ return true; } - case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: - case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: - case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16: - case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16: - case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16: - case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: - case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: - case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: - case ARM::VST2LNqWB_fixed_Asm_16: case ARM::VST2LNqWB_fixed_Asm_P16: - case ARM::VST2LNqWB_fixed_Asm_I16: case ARM::VST2LNqWB_fixed_Asm_S16: - case ARM::VST2LNqWB_fixed_Asm_U16: case ARM::VST2LNqWB_fixed_Asm_32: - case ARM::VST2LNqWB_fixed_Asm_F: case ARM::VST2LNqWB_fixed_Asm_F32: - case ARM::VST2LNqWB_fixed_Asm_I32: case ARM::VST2LNqWB_fixed_Asm_S32: - case ARM::VST2LNqWB_fixed_Asm_U32: { + case ARM::VST2LNdWB_fixed_Asm_8: + case ARM::VST2LNdWB_fixed_Asm_16: + case ARM::VST2LNdWB_fixed_Asm_32: + case ARM::VST2LNqWB_fixed_Asm_16: + case ARM::VST2LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5538,12 +5414,9 @@ Inst = TmpInst; return true; } - case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8: - case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16: - case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: - case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: - case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32: - case ARM::VST1LNdAsm_U32: { + case ARM::VST1LNdAsm_8: + case ARM::VST1LNdAsm_16: + case ARM::VST1LNdAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5559,15 +5432,11 @@ return true; } - case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8: - case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16: - case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: - case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: - case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32: - case ARM::VST2LNdAsm_U32: case ARM::VST2LNqAsm_16: case ARM::VST2LNqAsm_P16: - case ARM::VST2LNqAsm_I16: case ARM::VST2LNqAsm_S16: case ARM::VST2LNqAsm_U16: - case ARM::VST2LNqAsm_32: case ARM::VST2LNqAsm_F: case ARM::VST2LNqAsm_F32: - case ARM::VST2LNqAsm_I32: case ARM::VST2LNqAsm_S32: case ARM::VST2LNqAsm_U32:{ + case ARM::VST2LNdAsm_8: + case ARM::VST2LNdAsm_16: + case ARM::VST2LNdAsm_32: + case ARM::VST2LNqAsm_16: + case ARM::VST2LNqAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5585,14 +5454,9 @@ return true; } // Handle NEON VLD complex aliases. - case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: - case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: - case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16: - case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16: - case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16: - case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: - case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: - case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: { + case ARM::VLD1LNdWB_register_Asm_8: + case ARM::VLD1LNdWB_register_Asm_16: + case ARM::VLD1LNdWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5611,20 +5475,11 @@ return true; } - case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: - case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: - case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16: - case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16: - case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16: - case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: - case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: - case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: - case ARM::VLD2LNqWB_register_Asm_16: case ARM::VLD2LNqWB_register_Asm_P16: - case ARM::VLD2LNqWB_register_Asm_I16: case ARM::VLD2LNqWB_register_Asm_S16: - case ARM::VLD2LNqWB_register_Asm_U16: case ARM::VLD2LNqWB_register_Asm_32: - case ARM::VLD2LNqWB_register_Asm_F: case ARM::VLD2LNqWB_register_Asm_F32: - case ARM::VLD2LNqWB_register_Asm_I32: case ARM::VLD2LNqWB_register_Asm_S32: - case ARM::VLD2LNqWB_register_Asm_U32: { + case ARM::VLD2LNdWB_register_Asm_8: + case ARM::VLD2LNdWB_register_Asm_16: + case ARM::VLD2LNdWB_register_Asm_32: + case ARM::VLD2LNqWB_register_Asm_16: + case ARM::VLD2LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5647,14 +5502,9 @@ return true; } - case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: - case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: - case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16: - case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16: - case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16: - case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: - case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: - case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: { + case ARM::VLD1LNdWB_fixed_Asm_8: + case ARM::VLD1LNdWB_fixed_Asm_16: + case ARM::VLD1LNdWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5673,20 +5523,11 @@ return true; } - case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: - case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: - case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16: - case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16: - case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16: - case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: - case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: - case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: - case ARM::VLD2LNqWB_fixed_Asm_16: case ARM::VLD2LNqWB_fixed_Asm_P16: - case ARM::VLD2LNqWB_fixed_Asm_I16: case ARM::VLD2LNqWB_fixed_Asm_S16: - case ARM::VLD2LNqWB_fixed_Asm_U16: case ARM::VLD2LNqWB_fixed_Asm_32: - case ARM::VLD2LNqWB_fixed_Asm_F: case ARM::VLD2LNqWB_fixed_Asm_F32: - case ARM::VLD2LNqWB_fixed_Asm_I32: case ARM::VLD2LNqWB_fixed_Asm_S32: - case ARM::VLD2LNqWB_fixed_Asm_U32: { + case ARM::VLD2LNdWB_fixed_Asm_8: + case ARM::VLD2LNdWB_fixed_Asm_16: + case ARM::VLD2LNdWB_fixed_Asm_32: + case ARM::VLD2LNqWB_fixed_Asm_16: + case ARM::VLD2LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5709,12 +5550,9 @@ return true; } - case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8: - case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16: - case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: - case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: - case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32: - case ARM::VLD1LNdAsm_U32: { + case ARM::VLD1LNdAsm_8: + case ARM::VLD1LNdAsm_16: + case ARM::VLD1LNdAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. @@ -5731,16 +5569,11 @@ return true; } - case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8: - case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16: - case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: - case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: - case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32: - case ARM::VLD2LNdAsm_U32: case ARM::VLD2LNqAsm_16: case ARM::VLD2LNqAsm_P16: - case ARM::VLD2LNqAsm_I16: case ARM::VLD2LNqAsm_S16: case ARM::VLD2LNqAsm_U16: - case ARM::VLD2LNqAsm_32: case ARM::VLD2LNqAsm_F: case ARM::VLD2LNqAsm_F32: - case ARM::VLD2LNqAsm_I32: case ARM::VLD2LNqAsm_S32: - case ARM::VLD2LNqAsm_U32: { + case ARM::VLD2LNdAsm_8: + case ARM::VLD2LNdAsm_16: + case ARM::VLD2LNdAsm_32: + case ARM::VLD2LNqAsm_16: + case ARM::VLD2LNqAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. Modified: llvm/trunk/test/MC/ARM/neon-vst-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vst-encoding.s?rev=148718&r1=148717&r2=148718&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vst-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vst-encoding.s Mon Jan 23 13:39:08 2012 @@ -99,21 +99,21 @@ vst2.8 {d16[1], d17[1]}, [r0, :16] - vst2.16 {d16[1], d17[1]}, [r0, :32] - vst2.32 {d16[1], d17[1]}, [r0] - vst2.16 {d17[1], d19[1]}, [r0] - vst2.32 {d17[0], d19[0]}, [r0, :64] + vst2.p16 {d16[1], d17[1]}, [r0, :32] + vst2.i32 {d16[1], d17[1]}, [r0] + vst2.u16 {d17[1], d19[1]}, [r0] + vst2.f32 {d17[0], d19[0]}, [r0, :64] vst2.8 {d2[4], d3[4]}, [r2], r3 - vst2.8 {d2[4], d3[4]}, [r2]! - vst2.8 {d2[4], d3[4]}, [r2] + vst2.u8 {d2[4], d3[4]}, [r2]! + vst2.p8 {d2[4], d3[4]}, [r2] vst2.16 {d17[1], d19[1]}, [r0] vst2.32 {d17[0], d19[0]}, [r0, :64] - vst2.16 {d7[1], d9[1]}, [r1]! + vst2.i16 {d7[1], d9[1]}, [r1]! vst2.32 {d6[0], d8[0]}, [r2, :64]! vst2.16 {d2[1], d4[1]}, [r3], r5 - vst2.32 {d5[0], d7[0]}, [r4, :64], r7 + vst2.u32 {d5[0], d7[0]}, [r4, :64], r7 @ CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4] @ CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4] From eli.friedman at gmail.com Mon Jan 23 14:01:30 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Mon, 23 Jan 2012 12:01:30 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 11:02 AM, Kostya Serebryany wrote: > My previous change in include/llvm-c/Core.h that introduced 64-bit > Attributes (r148553) caused a warning > while building with MSVC.?http://llvm.org/bugs/show_bug.cgi?id=11828 > The following patch fixes the problem (use "static const uint64_t" instead > of enum). > Ok to commit? > > --kcc > > Index: include/llvm-c/Core.h > =================================================================== > --- include/llvm-c/Core.h ?(revision 148708) > +++ include/llvm-c/Core.h ?(working copy) > @@ -92,7 +92,7 @@ > ?/** Used to get the users and usees of a Value. See the llvm::Use class. */ > ?typedef struct LLVMOpaqueUse *LLVMUseRef; > > -typedef enum { > +static const uint64_t > ? ? ?LLVMZExtAttribute ? ? ? = 1<<0, > ? ? ?LLVMSExtAttribute ? ? ? = 1<<1, > ? ? ?LLVMNoReturnAttribute ? = 1<<2, > @@ -119,8 +119,8 @@ > ? ? ?LLVMReturnsTwice = 1 << 29, > ? ? ?LLVMUWTable = 1 << 30, > ? ? ?LLVMNonLazyBind = 1U << 31, > - ? ?LLVMAddressSafety = 1ULL << 32 > -} LLVMAttribute; > + ? ?LLVMAddressSafety = 1ULL << 32; > +typedef uint64_t LLVMAttribute; > > ?typedef enum { > ? ?/* Terminator Instructions */ Hmm... actually, I'm not sure this is okay; it's a non-binary-compatible change to the C API. -Eli From kcc at google.com Mon Jan 23 14:05:17 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 23 Jan 2012 12:05:17 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 12:01 PM, Eli Friedman wrote: > On Mon, Jan 23, 2012 at 11:02 AM, Kostya Serebryany > wrote: > > My previous change in include/llvm-c/Core.h that introduced 64-bit > > Attributes (r148553) caused a warning > > while building with MSVC. http://llvm.org/bugs/show_bug.cgi?id=11828 > > The following patch fixes the problem (use "static const uint64_t" > instead > > of enum). > > Ok to commit? > > > > --kcc > > > > Index: include/llvm-c/Core.h > > =================================================================== > > --- include/llvm-c/Core.h (revision 148708) > > +++ include/llvm-c/Core.h (working copy) > > @@ -92,7 +92,7 @@ > > /** Used to get the users and usees of a Value. See the llvm::Use > class. */ > > typedef struct LLVMOpaqueUse *LLVMUseRef; > > > > -typedef enum { > > +static const uint64_t > > LLVMZExtAttribute = 1<<0, > > LLVMSExtAttribute = 1<<1, > > LLVMNoReturnAttribute = 1<<2, > > @@ -119,8 +119,8 @@ > > LLVMReturnsTwice = 1 << 29, > > LLVMUWTable = 1 << 30, > > LLVMNonLazyBind = 1U << 31, > > - LLVMAddressSafety = 1ULL << 32 > > -} LLVMAttribute; > > + LLVMAddressSafety = 1ULL << 32; > > +typedef uint64_t LLVMAttribute; > > > > typedef enum { > > /* Terminator Instructions */ > > Hmm... actually, I'm not sure this is okay; it's a > non-binary-compatible change to the C API. > Any other suggestion? It's not easy to keep compatibility once the new (beyond 32-bits) attributes start getting used. Maybe add "enum LLVMAttribute2" for attributes in bits 33-64? --kcc > > -Eli > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/e714d0d7/attachment-0001.html From dpatel at apple.com Mon Jan 23 14:20:06 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 23 Jan 2012 20:20:06 -0000 Subject: [llvm-commits] [llvm] r148721 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp test/MC/X86/intel-syntax.s Message-ID: <20120123202006.80ABE2A6C12C@llvm.org> Author: dpatel Date: Mon Jan 23 14:20:06 2012 New Revision: 148721 URL: http://llvm.org/viewvc/llvm-project?rev=148721&view=rev Log: Intel syntax: Parse memory operand with empty base reg, e.g. DWORD PTR [4*RDI] Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/test/MC/X86/intel-syntax.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=148721&r1=148720&r2=148721&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Mon Jan 23 14:20:06 2012 @@ -618,13 +618,23 @@ return X86Operand::CreateMem(Disp, Start, End, Size); } } else if (getLexer().is(AsmToken::Integer)) { - // Handle '[' number ']' - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - if (getParser().ParseExpression(Disp, End)) return 0; - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(Start, "Expected ']' token!"); + int64_t Val = Parser.getTok().getIntVal(); Parser.Lex(); - return X86Operand::CreateMem(Disp, Start, End, Size); + SMLoc Loc = Parser.getTok().getLoc(); + if (getLexer().is(AsmToken::RBrac)) { + // Handle '[' number ']' + Parser.Lex(); + return X86Operand::CreateMem(MCConstantExpr::Create(Val, getContext()), + Start, End, Size); + } else if (getLexer().is(AsmToken::Star)) { + // Handle '[' Scale*IndexReg ']' + Parser.Lex(); + SMLoc IdxRegLoc = Parser.getTok().getLoc(); + if (ParseRegister(IndexReg, IdxRegLoc, End)) + return ErrorOperand(IdxRegLoc, "Expected register"); + Scale = Val; + } else + return ErrorOperand(Loc, "Unepxeted token"); } if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { Modified: llvm/trunk/test/MC/X86/intel-syntax.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax.s?rev=148721&r1=148720&r2=148721&view=diff ============================================================================== --- llvm/trunk/test/MC/X86/intel-syntax.s (original) +++ llvm/trunk/test/MC/X86/intel-syntax.s Mon Jan 23 14:20:06 2012 @@ -56,5 +56,7 @@ // CHECK: fld %st(0) fld ST(0) // CHECK: movl %fs:(%rdi), %eax -mov EAX, DWORD PTR FS:[RDI] +mov EAX, DWORD PTR FS:[RDI] +// CHECK: leal (,%rdi,4), %r8d +lea R8D, DWORD PTR [4*RDI] ret From stoklund at 2pi.dk Mon Jan 23 15:01:16 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 23 Jan 2012 21:01:16 -0000 Subject: [llvm-commits] [llvm] r148724 - in /llvm/trunk: lib/CodeGen/MachineLICM.cpp test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll Message-ID: <20120123210116.1858B2A6C12D@llvm.org> Author: stoklund Date: Mon Jan 23 15:01:15 2012 New Revision: 148724 URL: http://llvm.org/viewvc/llvm-project?rev=148724&view=rev Log: Fix PR11829. PostRA LICM was too aggressive. This fixes a typo in r148589. Added: llvm/trunk/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=148724&r1=148723&r2=148724&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Mon Jan 23 15:01:15 2012 @@ -462,13 +462,13 @@ // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. for (const unsigned *AS = TRI->getOverlaps(Reg); *AS; ++AS) { - if (PhysRegDefs.test(Reg)) - PhysRegClobbers.set(Reg); - if (PhysRegClobbers.test(Reg)) + if (PhysRegDefs.test(*AS)) + PhysRegClobbers.set(*AS); + if (PhysRegClobbers.test(*AS)) // MI defined register is seen defined by another instruction in // the loop, it cannot be a LICM candidate. RuledOut = true; - PhysRegDefs.set(Reg); + PhysRegDefs.set(*AS); } } Added: llvm/trunk/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll?rev=148724&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll (added) +++ llvm/trunk/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll Mon Jan 23 15:01:15 2012 @@ -0,0 +1,105 @@ +; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs +; PR11829 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 { +bb: + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %bb + unreachable + +bb2: ; preds = %bb + br label %bb3 + +bb3: ; preds = %bb4, %bb2 + %tmp = icmp slt i32 undef, undef + br i1 %tmp, label %bb4, label %bb67 + +bb4: ; preds = %bb3 + %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp6 = and <4 x i32> %tmp5, + %tmp7 = or <4 x i32> %tmp6, + %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float> + %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> to i128) to i64) to i128)) to <4 x float>) + %tmp10 = fmul <4 x float> undef, %tmp9 + %tmp11 = fadd <4 x float> undef, %tmp10 + %tmp12 = bitcast <4 x float> zeroinitializer to i128 + %tmp13 = lshr i128 %tmp12, 64 + %tmp14 = trunc i128 %tmp13 to i64 + %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1 + %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind + %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind + %tmp18 = fmul <4 x float> %tmp17, %tmp16 + %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind + %tmp20 = fmul <4 x float> %tmp19, %tmp18 + %tmp21 = fmul <4 x float> %tmp20, zeroinitializer + %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind + call arm_aapcs_vfpcc void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind + %tmp23 = bitcast <4 x float> %tmp22 to i128 + %tmp24 = trunc i128 %tmp23 to i64 + %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0 + %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1 + %tmp27 = load float* undef, align 4, !tbaa !2 + %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3 + %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp30 = and <4 x i32> %tmp29, + %tmp31 = or <4 x i32> %tmp30, + %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float> + %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> to i128) to i64) to i128)) to <4 x float>) + %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind + %tmp35 = fmul <4 x float> %tmp34, undef + %tmp36 = fmul <4 x float> %tmp35, undef + %tmp37 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp38 = load float* undef, align 4, !tbaa !2 + %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0 + %tmp40 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp41 = load float* undef, align 4, !tbaa !2 + %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3 + %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer + %tmp44 = fmul <4 x float> %tmp33, %tmp43 + %tmp45 = fadd <4 x float> %tmp42, %tmp44 + %tmp46 = fsub <4 x float> %tmp45, undef + %tmp47 = fmul <4 x float> %tmp46, %tmp36 + %tmp48 = fadd <4 x float> undef, %tmp47 + %tmp49 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp50 = load float* undef, align 4, !tbaa !2 + %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3 + %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind + %tmp54 = load float* %tmp52, align 4, !tbaa !2 + %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3 + %tmp56 = fsub <4 x float> , %tmp22 + %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind + %tmp58 = fmul <4 x float> undef, %tmp57 + %tmp59 = fsub <4 x float> %tmp51, %tmp48 + %tmp60 = fsub <4 x float> , %tmp58 + %tmp61 = fmul <4 x float> %tmp59, %tmp60 + %tmp62 = fadd <4 x float> %tmp48, %tmp61 + call arm_aapcs_vfpcc void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef) + %tmp63 = bitcast <4 x float> %tmp62 to i128 + %tmp64 = lshr i128 %tmp63, 64 + %tmp65 = trunc i128 %tmp64 to i64 + %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1 + call arm_aapcs_vfpcc void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef) + br label %bb3 + +bb67: ; preds = %bb3 + ret void +} + +declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64]) + +declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2 + +declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2 + +declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} +!2 = metadata !{metadata !"float", metadata !0} From stoklund at 2pi.dk Mon Jan 23 15:01:12 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 23 Jan 2012 21:01:12 -0000 Subject: [llvm-commits] [llvm] r148723 - /llvm/trunk/lib/CodeGen/MachineLICM.cpp Message-ID: <20120123210112.383ED2A6C12C@llvm.org> Author: stoklund Date: Mon Jan 23 15:01:11 2012 New Revision: 148723 URL: http://llvm.org/viewvc/llvm-project?rev=148723&view=rev Log: Simplify debug output. Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=148723&r1=148722&r2=148723&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Mon Jan 23 15:01:11 2012 @@ -582,16 +582,8 @@ // Now move the instructions to the predecessor, inserting it before any // terminator instructions. - DEBUG({ - dbgs() << "Hoisting " << *MI; - if (Preheader->getBasicBlock()) - dbgs() << " to MachineBasicBlock " - << Preheader->getName(); - if (MI->getParent()->getBasicBlock()) - dbgs() << " from MachineBasicBlock " - << MI->getParent()->getName(); - dbgs() << "\n"; - }); + DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" + << MI->getParent()->getNumber() << ": " << *MI); // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); From lhames at gmail.com Mon Jan 23 15:15:02 2012 From: lhames at gmail.com (Lang Hames) Date: Mon, 23 Jan 2012 21:15:02 -0000 Subject: [llvm-commits] [llvm] r148725 - /llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Message-ID: <20120123211502.19FB62A6C12C@llvm.org> Author: lhames Date: Mon Jan 23 15:15:01 2012 New Revision: 148725 URL: http://llvm.org/viewvc/llvm-project?rev=148725&view=rev Log: copyImplicitOps is redundant here - the loop above already copies these ops. Modified: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Modified: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp?rev=148725&r1=148724&r2=148725&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp (original) +++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Mon Jan 23 15:15:01 2012 @@ -850,7 +850,6 @@ RemoveCopyFlag(MO.getReg(), CopyMI); } - NewMI->copyImplicitOps(CopyMI); LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); From kcc at google.com Mon Jan 23 15:20:05 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 23 Jan 2012 21:20:05 -0000 Subject: [llvm-commits] [compiler-rt] r148726 - in /compiler-rt/trunk/lib/asan: asan_interceptors.cc asan_interceptors.h Message-ID: <20120123212005.D60AF2A6C12C@llvm.org> Author: kcc Date: Mon Jan 23 15:20:05 2012 New Revision: 148726 URL: http://llvm.org/viewvc/llvm-project?rev=148726&view=rev Log: [asan] use internal_strcmp before asan_init is done. *may* fix asan issue #30 Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/asan_interceptors.h Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=148726&r1=148725&r2=148726&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 23 15:20:05 2012 @@ -235,6 +235,18 @@ return dst; } +int internal_strcmp(const char *s1, const char *s2) { + while (true) { + unsigned c1 = *s1; + unsigned c2 = *s2; + if (c1 != c2) return (c1 < c2) ? -1 : 1; + if (c1 == 0) break; + s1++; + s2++; + } + return 0; +} + } // namespace __asan // ---------------------- Wrappers ---------------- {{{1 @@ -490,10 +502,8 @@ extern "C" int WRAP(strcmp)(const char *s1, const char *s2) { - // strcmp is called from malloc_default_purgeable_zone() - // in __asan::ReplaceSystemAlloc() on Mac. - if (asan_init_is_running) { - return real_strcmp(s1, s2); + if (!asan_inited) { + return internal_strcmp(s1, s2); } unsigned char c1, c2; size_t i; Modified: compiler-rt/trunk/lib/asan/asan_interceptors.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.h?rev=148726&r1=148725&r2=148726&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.h (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.h Mon Jan 23 15:20:05 2012 @@ -70,6 +70,7 @@ int internal_memcmp(const void* s1, const void* s2, size_t n); char *internal_strstr(const char *haystack, const char *needle); char *internal_strncat(char *dst, const char *src, size_t n); +int internal_strcmp(const char *s1, const char *s2); void InitializeAsanInterceptors(); From mcrosier at apple.com Mon Jan 23 15:26:53 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 23 Jan 2012 21:26:53 -0000 Subject: [llvm-commits] [test-suite] r148727 - /test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Message-ID: <20120123212653.83A842A6C12C@llvm.org> Author: mcrosier Date: Mon Jan 23 15:26:53 2012 New Revision: 148727 URL: http://llvm.org/viewvc/llvm-project?rev=148727&view=rev Log: nbench exceeds the 500s default, so bump it a bit. Modified: test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Modified: test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile?rev=148727&r1=148726&r2=148727&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile (original) +++ test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Mon Jan 23 15:26:53 2012 @@ -6,6 +6,9 @@ include ../../Makefile.multisrc +# This test can take more than the default 500s timeout at -O0. +RUNTIMELIMIT:=750 + # Always copy NNET.DAT so it's available with SRCDIR != OBJDIR builds. # FIXME: Hack $(shell cp -n $(PROJ_SRC_DIR)/NNET.DAT .) From kcc at google.com Mon Jan 23 15:45:46 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 23 Jan 2012 13:45:46 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: There are 8 functions dealing with LLVMAttribute in include/llvm-c/Core.h. Do you suggest to add 8 more functions that will deal with uint64_t? Like this? void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); void LLVMAddFunctionAttr64(LLVMValueRef Fn, uint64_t PA); --kcc On Mon, Jan 23, 2012 at 1:40 PM, Paul Robinson wrote: > On Mon, Jan 23, 2012 at 12:05 PM, Kostya Serebryany > wrote: > > > > > > On Mon, Jan 23, 2012 at 12:01 PM, Eli Friedman > > wrote: > >> > >> On Mon, Jan 23, 2012 at 11:02 AM, Kostya Serebryany > >> wrote: > >> > My previous change in include/llvm-c/Core.h that introduced 64-bit > >> > Attributes (r148553) caused a warning > >> > while building with MSVC. http://llvm.org/bugs/show_bug.cgi?id=11828 > >> > The following patch fixes the problem (use "static const uint64_t" > >> > instead > >> > of enum). > >> > Ok to commit? > >> > > >> > --kcc > >> > > >> > Index: include/llvm-c/Core.h > >> > =================================================================== > >> > --- include/llvm-c/Core.h (revision 148708) > >> > +++ include/llvm-c/Core.h (working copy) > >> > @@ -92,7 +92,7 @@ > >> > /** Used to get the users and usees of a Value. See the llvm::Use > >> > class. */ > >> > typedef struct LLVMOpaqueUse *LLVMUseRef; > >> > > >> > -typedef enum { > >> > +static const uint64_t > >> > LLVMZExtAttribute = 1<<0, > >> > LLVMSExtAttribute = 1<<1, > >> > LLVMNoReturnAttribute = 1<<2, > >> > @@ -119,8 +119,8 @@ > >> > LLVMReturnsTwice = 1 << 29, > >> > LLVMUWTable = 1 << 30, > >> > LLVMNonLazyBind = 1U << 31, > >> > - LLVMAddressSafety = 1ULL << 32 > >> > -} LLVMAttribute; > >> > + LLVMAddressSafety = 1ULL << 32; > >> > +typedef uint64_t LLVMAttribute; > >> > > >> > typedef enum { > >> > /* Terminator Instructions */ > >> > >> Hmm... actually, I'm not sure this is okay; it's a > >> non-binary-compatible change to the C API. > > > > > > Any other suggestion? > > It's not easy to keep compatibility once the new (beyond 32-bits) > attributes > > start getting used. > > Maybe add "enum LLVMAttribute2" for attributes in bits 33-64? > > > > --kcc > > > >> > >> > >> -Eli > > > > > > > > _______________________________________________ > > llvm-commits mailing list > > llvm-commits at cs.uiuc.edu > > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > > I've run into this kind of problem before. > > A separate enum for attributes >= 32 is a royal pain. > It's an implementation artifact that has no relevance to the > semantics of the attributes. > You'd need to add a new API to take the upper attributes, > and the caller has to remember (or look up, every time) > which attributes go with which enum. :-P > > Defining a new 64-bit type that understands all attributes > is better, as all attributes can be handled the same way. > You still need a new API, but the caller does not have to > understand some arbitrary split between two different enums. > > Pogo > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/40132b93/attachment.html From sabre at nondot.org Mon Jan 23 15:42:53 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 21:42:53 -0000 Subject: [llvm-commits] [llvm] r148732 - /llvm/trunk/include/llvm/ADT/StringMap.h Message-ID: <20120123214253.21E4E2A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 15:42:52 2012 New Revision: 148732 URL: http://llvm.org/viewvc/llvm-project?rev=148732&view=rev Log: Various public StringMap methods take or return "MapEntryTy", make it public. Modified: llvm/trunk/include/llvm/ADT/StringMap.h Modified: llvm/trunk/include/llvm/ADT/StringMap.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringMap.h?rev=148732&r1=148731&r2=148732&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/StringMap.h (original) +++ llvm/trunk/include/llvm/ADT/StringMap.h Mon Jan 23 15:42:52 2012 @@ -229,8 +229,9 @@ template class StringMap : public StringMapImpl { AllocatorTy Allocator; - typedef StringMapEntry MapEntryTy; public: + typedef StringMapEntry MapEntryTy; + StringMap() : StringMapImpl(static_cast(sizeof(MapEntryTy))) {} explicit StringMap(unsigned InitialSize) : StringMapImpl(InitialSize, static_cast(sizeof(MapEntryTy))) {} From pogo.work at gmail.com Mon Jan 23 15:40:47 2012 From: pogo.work at gmail.com (Paul Robinson) Date: Mon, 23 Jan 2012 13:40:47 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 12:05 PM, Kostya Serebryany wrote: > > > On Mon, Jan 23, 2012 at 12:01 PM, Eli Friedman > wrote: >> >> On Mon, Jan 23, 2012 at 11:02 AM, Kostya Serebryany >> wrote: >> > My previous change in include/llvm-c/Core.h that introduced 64-bit >> > Attributes (r148553) caused a warning >> > while building with MSVC.?http://llvm.org/bugs/show_bug.cgi?id=11828 >> > The following patch fixes the problem (use "static const uint64_t" >> > instead >> > of enum). >> > Ok to commit? >> > >> > --kcc >> > >> > Index: include/llvm-c/Core.h >> > =================================================================== >> > --- include/llvm-c/Core.h ?(revision 148708) >> > +++ include/llvm-c/Core.h ?(working copy) >> > @@ -92,7 +92,7 @@ >> > ?/** Used to get the users and usees of a Value. See the llvm::Use >> > class. */ >> > ?typedef struct LLVMOpaqueUse *LLVMUseRef; >> > >> > -typedef enum { >> > +static const uint64_t >> > ? ? ?LLVMZExtAttribute ? ? ? = 1<<0, >> > ? ? ?LLVMSExtAttribute ? ? ? = 1<<1, >> > ? ? ?LLVMNoReturnAttribute ? = 1<<2, >> > @@ -119,8 +119,8 @@ >> > ? ? ?LLVMReturnsTwice = 1 << 29, >> > ? ? ?LLVMUWTable = 1 << 30, >> > ? ? ?LLVMNonLazyBind = 1U << 31, >> > - ? ?LLVMAddressSafety = 1ULL << 32 >> > -} LLVMAttribute; >> > + ? ?LLVMAddressSafety = 1ULL << 32; >> > +typedef uint64_t LLVMAttribute; >> > >> > ?typedef enum { >> > ? ?/* Terminator Instructions */ >> >> Hmm... actually, I'm not sure this is okay; it's a >> non-binary-compatible change to the C API. > > > Any other suggestion? > It's not easy to keep compatibility once the new (beyond 32-bits) attributes > start getting used. > Maybe add "enum?LLVMAttribute2" for attributes in bits 33-64? > > --kcc > >> >> >> -Eli > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > I've run into this kind of problem before. A separate enum for attributes >= 32 is a royal pain. It's an implementation artifact that has no relevance to the semantics of the attributes. You'd need to add a new API to take the upper attributes, and the caller has to remember (or look up, every time) which attributes go with which enum. :-P Defining a new 64-bit type that understands all attributes is better, as all attributes can be handled the same way. You still need a new API, but the caller does not have to understand some arbitrary split between two different enums. Pogo From rafael.espindola at gmail.com Mon Jan 23 15:51:52 2012 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Mon, 23 Jan 2012 21:51:52 -0000 Subject: [llvm-commits] [llvm] r148733 - in /llvm/trunk: include/llvm/MC/MCDwarf.h include/llvm/MC/MCStreamer.h lib/MC/MCAsmStreamer.cpp lib/MC/MCDwarf.cpp lib/MC/MCParser/AsmParser.cpp lib/MC/MCStreamer.cpp test/MC/ELF/cfi-signal-frame.s Message-ID: <20120123215152.B15462A6C12C@llvm.org> Author: rafael Date: Mon Jan 23 15:51:52 2012 New Revision: 148733 URL: http://llvm.org/viewvc/llvm-project?rev=148733&view=rev Log: Add support for .cfi_signal_frame. Fixes pr11762. Added: llvm/trunk/test/MC/ELF/cfi-signal-frame.s Modified: llvm/trunk/include/llvm/MC/MCDwarf.h llvm/trunk/include/llvm/MC/MCStreamer.h llvm/trunk/lib/MC/MCAsmStreamer.cpp llvm/trunk/lib/MC/MCDwarf.cpp llvm/trunk/lib/MC/MCParser/AsmParser.cpp llvm/trunk/lib/MC/MCStreamer.cpp Modified: llvm/trunk/include/llvm/MC/MCDwarf.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCDwarf.h?rev=148733&r1=148732&r2=148733&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCDwarf.h (original) +++ llvm/trunk/include/llvm/MC/MCDwarf.h Mon Jan 23 15:51:52 2012 @@ -312,7 +312,8 @@ struct MCDwarfFrameInfo { MCDwarfFrameInfo() : Begin(0), End(0), Personality(0), Lsda(0), Function(0), Instructions(), PersonalityEncoding(), - LsdaEncoding(0), CompactUnwindEncoding(0) {} + LsdaEncoding(0), CompactUnwindEncoding(0), + IsSignalFrame(false) {} MCSymbol *Begin; MCSymbol *End; const MCSymbol *Personality; @@ -322,6 +323,7 @@ unsigned PersonalityEncoding; unsigned LsdaEncoding; uint32_t CompactUnwindEncoding; + bool IsSignalFrame; }; class MCDwarfFrameEmitter { Modified: llvm/trunk/include/llvm/MC/MCStreamer.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCStreamer.h?rev=148733&r1=148732&r2=148733&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCStreamer.h (original) +++ llvm/trunk/include/llvm/MC/MCStreamer.h Mon Jan 23 15:51:52 2012 @@ -555,6 +555,7 @@ virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset); virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment); virtual void EmitCFIEscape(StringRef Values); + virtual void EmitCFISignalFrame(); virtual void EmitWin64EHStartProc(const MCSymbol *Symbol); virtual void EmitWin64EHEndProc(); Modified: llvm/trunk/lib/MC/MCAsmStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAsmStreamer.cpp?rev=148733&r1=148732&r2=148733&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAsmStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCAsmStreamer.cpp Mon Jan 23 15:51:52 2012 @@ -222,6 +222,7 @@ virtual void EmitCFISameValue(int64_t Register); virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset); virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment); + virtual void EmitCFISignalFrame(); virtual void EmitWin64EHStartProc(const MCSymbol *Symbol); virtual void EmitWin64EHEndProc(); @@ -993,6 +994,16 @@ EmitEOL(); } +void MCAsmStreamer::EmitCFISignalFrame() { + MCStreamer::EmitCFISignalFrame(); + + if (!UseCFI) + return; + + OS << "\t.cif_signal_frame"; + EmitEOL(); +} + void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) { MCStreamer::EmitWin64EHStartProc(Symbol); Modified: llvm/trunk/lib/MC/MCDwarf.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCDwarf.cpp?rev=148733&r1=148732&r2=148733&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCDwarf.cpp (original) +++ llvm/trunk/lib/MC/MCDwarf.cpp Mon Jan 23 15:51:52 2012 @@ -840,6 +840,7 @@ const MCSymbol *personality, unsigned personalityEncoding, const MCSymbol *lsda, + bool IsSignalFrame, unsigned lsdaEncoding); MCSymbol *EmitFDE(MCStreamer &streamer, const MCSymbol &cieStart, @@ -1111,6 +1112,7 @@ const MCSymbol *personality, unsigned personalityEncoding, const MCSymbol *lsda, + bool IsSignalFrame, unsigned lsdaEncoding) { MCContext &context = streamer.getContext(); const MCRegisterInfo &MRI = context.getRegisterInfo(); @@ -1153,6 +1155,8 @@ if (lsda) Augmentation += "L"; Augmentation += "R"; + if (IsSignalFrame) + Augmentation += "S"; streamer.EmitBytes(Augmentation.str(), 0); } streamer.EmitIntValue(0, 1); @@ -1312,17 +1316,18 @@ namespace { struct CIEKey { - static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); } - static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); } + static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1, false); } + static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0, false); } CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_, - unsigned LsdaEncoding_) : Personality(Personality_), - PersonalityEncoding(PersonalityEncoding_), - LsdaEncoding(LsdaEncoding_) { + unsigned LsdaEncoding_, bool IsSignalFrame_) : + Personality(Personality_), PersonalityEncoding(PersonalityEncoding_), + LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_) { } const MCSymbol* Personality; unsigned PersonalityEncoding; unsigned LsdaEncoding; + bool IsSignalFrame; }; } @@ -1340,13 +1345,15 @@ ID.AddPointer(Key.Personality); ID.AddInteger(Key.PersonalityEncoding); ID.AddInteger(Key.LsdaEncoding); + ID.AddBoolean(Key.IsSignalFrame); return ID.ComputeHash(); } static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) { return LHS.Personality == RHS.Personality && LHS.PersonalityEncoding == RHS.PersonalityEncoding && - LHS.LsdaEncoding == RHS.LsdaEncoding; + LHS.LsdaEncoding == RHS.LsdaEncoding && + LHS.IsSignalFrame == RHS.IsSignalFrame; } }; } @@ -1382,11 +1389,12 @@ for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) { const MCDwarfFrameInfo &Frame = FrameArray[i]; CIEKey Key(Frame.Personality, Frame.PersonalityEncoding, - Frame.LsdaEncoding); + Frame.LsdaEncoding, Frame.IsSignalFrame); const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey; if (!CIEStart) CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality, Frame.PersonalityEncoding, Frame.Lsda, + Frame.IsSignalFrame, Frame.LsdaEncoding); FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame); Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=148733&r1=148732&r2=148733&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original) +++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Mon Jan 23 15:51:52 2012 @@ -306,6 +306,8 @@ &GenericAsmParser::ParseDirectiveCFIRestore>(".cfi_restore"); AddDirectiveHandler< &GenericAsmParser::ParseDirectiveCFIEscape>(".cfi_escape"); + AddDirectiveHandler< + &GenericAsmParser::ParseDirectiveCFISignalFrame>(".cfi_signal_frame"); // Macro directives. AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( @@ -341,6 +343,7 @@ bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIRestore(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIEscape(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveCFISignalFrame(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc); @@ -2855,6 +2858,19 @@ return false; } +/// ParseDirectiveCFISignalFrame +/// ::= .cfi_signal_frame +bool GenericAsmParser::ParseDirectiveCFISignalFrame(StringRef Directive, + SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(getLexer().getLoc(), + "unexpected token in '" + Directive + "' directive"); + + getStreamer().EmitCFISignalFrame(); + + return false; +} + /// ParseDirectiveMacrosOnOff /// ::= .macros_on /// ::= .macros_off Modified: llvm/trunk/lib/MC/MCStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCStreamer.cpp?rev=148733&r1=148732&r2=148733&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCStreamer.cpp Mon Jan 23 15:51:52 2012 @@ -439,6 +439,12 @@ CurFrame->Instructions.push_back(Instruction); } +void MCStreamer::EmitCFISignalFrame() { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + CurFrame->IsSignalFrame = true; +} + void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) { W64UnwindInfos.push_back(Frame); CurrentW64UnwindInfo = W64UnwindInfos.back(); Added: llvm/trunk/test/MC/ELF/cfi-signal-frame.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ELF/cfi-signal-frame.s?rev=148733&view=auto ============================================================================== --- llvm/trunk/test/MC/ELF/cfi-signal-frame.s (added) +++ llvm/trunk/test/MC/ELF/cfi-signal-frame.s Mon Jan 23 15:51:52 2012 @@ -0,0 +1,23 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s + +f: + .cfi_startproc + .cfi_signal_frame + .cfi_endproc + +g: + .cfi_startproc + .cfi_endproc + +// CHECK: (('sh_name', 0x00000011) # '.eh_frame' +// CHECK-NEXT: ('sh_type', 0x00000001) +// CHECK-NEXT: ('sh_flags', 0x0000000000000002) +// CHECK-NEXT: ('sh_addr', 0x0000000000000000) +// CHECK-NEXT: ('sh_offset', 0x0000000000000040) +// CHECK-NEXT: ('sh_size', 0x0000000000000058) +// CHECK-NEXT: ('sh_link', 0x00000000) +// CHECK-NEXT: ('sh_info', 0x00000000) +// CHECK-NEXT: ('sh_addralign', 0x0000000000000008) +// CHECK-NEXT: ('sh_entsize', 0x0000000000000000) +// CHECK-NEXT: ('_section_data', '14000000 00000000 017a5253 00017810 011b0c07 08900100 10000000 1c000000 00000000 00000000 00000000 14000000 00000000 017a5200 01781001 1b0c0708 90010000 10000000 1c000000 00000000 00000000 00000000') +// CHECK-NEXT: ), From grosbach at apple.com Mon Jan 23 15:53:26 2012 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 23 Jan 2012 21:53:26 -0000 Subject: [llvm-commits] [llvm] r148734 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/neon-vst-encoding.s Message-ID: <20120123215326.837A02A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 15:53:26 2012 New Revision: 148734 URL: http://llvm.org/viewvc/llvm-project?rev=148734&view=rev Log: NEON VLD3 lane-indexed assembly parsing and encoding. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/neon-vst-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148734&r1=148733&r2=148734&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 15:53:26 2012 @@ -184,6 +184,7 @@ let ParserMatchClass = VecListOneDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } + // Register list of two D registers with byte lane subscripting. def VecListTwoDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoDByteIndexed"; @@ -235,6 +236,59 @@ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } + +// Register list of three D registers with byte lane subscripting. +def VecListThreeDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDByteIndexed : Operand { + let ParserMatchClass = VecListThreeDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDHWordIndexed : Operand { + let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDWordIndexed : Operand { + let ParserMatchClass = VecListThreeDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of three Q registers with half-word lane subscripting. +def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQHWordIndexed : Operand { + let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQWordIndexed : Operand { + let ParserMatchClass = VecListThreeQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -5914,6 +5968,55 @@ rGPR:$Rm, pred:$p)>; +// VLD3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VMOV takes an optional datatype suffix defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148734&r1=148733&r2=148734&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 15:53:26 2012 @@ -1168,6 +1168,31 @@ return VectorList.Count == 2 && VectorList.LaneIndex <= 1; } + bool isVecListThreeDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 7; + } + + bool isVecListThreeDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + + bool isVecListThreeQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -5304,6 +5329,53 @@ case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; + + // VLD3LN + case ARM::VLD3LNdWB_fixed_Asm_8: + Spacing = 1; + return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_fixed_Asm_16: + Spacing = 1; + return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_fixed_Asm_32: + Spacing = 1; + return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_fixed_Asm_16: + Spacing = 1; + return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_fixed_Asm_32: + Spacing = 2; + return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdWB_register_Asm_8: + Spacing = 1; + return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_register_Asm_16: + Spacing = 1; + return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_register_Asm_32: + Spacing = 1; + return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_register_Asm_16: + Spacing = 2; + return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_register_Asm_32: + Spacing = 2; + return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdAsm_8: + Spacing = 1; + return ARM::VLD3LNd8; + case ARM::VLD3LNdAsm_16: + Spacing = 1; + return ARM::VLD3LNd16; + case ARM::VLD3LNdAsm_32: + Spacing = 1; + return ARM::VLD3LNd32; + case ARM::VLD3LNqAsm_16: + Spacing = 2; + return ARM::VLD3LNq16; + case ARM::VLD3LNqAsm_32: + Spacing = 2; + return ARM::VLD3LNq32; } } @@ -5502,6 +5574,37 @@ return true; } + case ARM::VLD3LNdWB_register_Asm_8: + case ARM::VLD3LNdWB_register_Asm_16: + case ARM::VLD3LNdWB_register_Asm_32: + case ARM::VLD3LNqWB_register_Asm_16: + case ARM::VLD3LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_32: { @@ -5550,6 +5653,37 @@ return true; } + case ARM::VLD3LNdWB_fixed_Asm_8: + case ARM::VLD3LNdWB_fixed_Asm_16: + case ARM::VLD3LNdWB_fixed_Asm_32: + case ARM::VLD3LNqWB_fixed_Asm_16: + case ARM::VLD3LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_32: { @@ -5593,6 +5727,36 @@ Inst = TmpInst; return true; } + + case ARM::VLD3LNdAsm_8: + case ARM::VLD3LNdAsm_16: + case ARM::VLD3LNdAsm_32: + case ARM::VLD3LNqAsm_16: + case ARM::VLD3LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: case ARM::t2MOVSsr: { Modified: llvm/trunk/test/MC/ARM/neon-vst-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vst-encoding.s?rev=148734&r1=148733&r2=148734&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vst-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vst-encoding.s Mon Jan 23 15:53:26 2012 @@ -133,17 +133,39 @@ @ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4] -@ vst3.8 {d16[1], d17[1], d18[1]}, [r0] -@ vst3.16 {d16[1], d17[1], d18[1]}, [r0] -@ vst3.32 {d16[1], d17[1], d18[1]}, [r0] -@ vst3.16 {d17[2], d19[2], d21[2]}, [r0] -@ vst3.32 {d16[0], d18[0], d20[0]}, [r0] - -@ FIXME: vst3.8 {d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf4] -@ FIXME: vst3.16 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x4f,0x06,0xc0,0xf4] -@ FIXME: vst3.32 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x8f,0x0a,0xc0,0xf4] -@ FIXME: vst3.16 {d17[2], d19[2], d21[2]}, [r0]@ encoding: [0xaf,0x16,0xc0,0xf4] -@ FIXME: vst3.32 {d16[0], d18[0], d20[0]}, [r0]@ encoding: [0x4f,0x0a,0xc0,0xf4] + vld3.8 {d16[1], d17[1], d18[1]}, [r1] + vld3.16 {d6[1], d7[1], d8[1]}, [r2] + vld3.32 {d1[1], d2[1], d3[1]}, [r3] + vld3.u16 {d27[2], d29[2], d31[2]}, [r4] + vld3.i32 {d6[0], d8[0], d10[0]}, [r5] + + vld3.i8 {d12[3], d13[3], d14[3]}, [r6], r1 + vld3.i16 {d11[2], d12[2], d13[2]}, [r7], r2 + vld3.u32 {d2[1], d3[1], d4[1]}, [r8], r3 + vld3.u16 {d14[2], d16[2], d18[2]}, [r9], r4 + vld3.i32 {d16[0], d18[0], d20[0]}, [r10], r5 + + vld3.p8 {d6[6], d7[6], d8[6]}, [r8]! + vld3.16 {d9[2], d10[2], d11[2]}, [r7]! + vld3.f32 {d1[1], d2[1], d3[1]}, [r6]! + vld3.p16 {d20[2], d22[2], d24[2]}, [r5]! + vld3.32 {d5[0], d7[0], d9[0]}, [r4]! + +@ CHECK: vld3.8 {d16[1], d17[1], d17[1]}, [r1] @ encoding: [0x2f,0x02,0xe1,0xf4] +@ CHECK: vld3.16 {d6[1], d7[1], d7[1]}, [r2] @ encoding: [0x4f,0x66,0xa2,0xf4] +@ CHECK: vld3.32 {d1[1], d2[1], d2[1]}, [r3] @ encoding: [0x8f,0x1a,0xa3,0xf4] +@ CHECK: vld3.16 {d27[2], d29[2], d29[2]}, [r4] @ encoding: [0xaf,0xb6,0xe4,0xf4] +@ CHECK: vld3.32 {d6[0], d8[0], d8[0]}, [r5] @ encoding: [0x4f,0x6a,0xa5,0xf4] +@ CHECK: vld3.8 {d12[3], d13[3], d13[3]}, [r6], r1 @ encoding: [0x61,0xc2,0xa6,0xf4] +@ CHECK: vld3.16 {d11[2], d12[2], d12[2]}, [r7], r2 @ encoding: [0x82,0xb6,0xa7,0xf4] +@ CHECK: vld3.32 {d2[1], d3[1], d3[1]}, [r8], r3 @ encoding: [0x83,0x2a,0xa8,0xf4] +@ CHECK: vld3.16 {d14[2], d16[2], d16[2]}, [r9], r4 @ encoding: [0xa4,0xe6,0xa9,0xf4] +@ CHECK: vld3.32 {d16[0], d18[0], d18[0]}, [r10], r5 @ encoding: [0x45,0x0a,0xea,0xf4] +@ CHECK: vld3.8 {d6[6], d7[6], d7[6]}, [r8]! @ encoding: [0xcd,0x62,0xa8,0xf4] +@ CHECK: vld3.16 {d9[2], d10[2], d10[2]}, [r7]! @ encoding: [0x8d,0x96,0xa7,0xf4] +@ CHECK: vld3.32 {d1[1], d2[1], d2[1]}, [r6]! @ encoding: [0x8d,0x1a,0xa6,0xf4] +@ CHECK: vld3.16 {d20[2], d21[2], d21[2]}, [r5]! @ encoding: [0xad,0x46,0xe5,0xf4] +@ CHECK: vld3.32 {d5[0], d7[0], d7[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4] @ vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] From dblaikie at gmail.com Mon Jan 23 16:22:45 2012 From: dblaikie at gmail.com (David Blaikie) Date: Mon, 23 Jan 2012 22:22:45 -0000 Subject: [llvm-commits] [llvm] r148736 - /llvm/trunk/include/llvm/Support/CommandLine.h Message-ID: <20120123222245.222082A6C12C@llvm.org> Author: dblaikie Date: Mon Jan 23 16:22:44 2012 New Revision: 148736 URL: http://llvm.org/viewvc/llvm-project?rev=148736&view=rev Log: Simplify llvm::cl::Option by using a bit field instead of manual bit packing. This still preserves the same total layout. Previously it looked like: *** Dumping AST Record Layout 0 | class llvm::cl::Option 0 | (Option vtable pointer) 8 | int NumOccurrences 12 | int Flags 16 | unsigned int Position 20 | unsigned int AdditionalVals 24 | class llvm::cl::Option * NextRegistered 32 | const char * ArgStr 40 | const char * HelpStr 48 | const char * ValueStr sizeof=56, dsize=56, align=8 nvsize=56, nvalign=8 Now it looks like: *** Dumping AST Record Layout 0 | class llvm::cl::Option 0 | (Option vtable pointer) 8 | int NumOccurrences 12 | enum NumOccurrencesFlag Occurrences 12 | unsigned int Value 12 | enum OptionHidden HiddenFlag 12 | enum FormattingFlags Formatting 13 | unsigned int Misc 16 | unsigned int Position 20 | unsigned int AdditionalVals 24 | class llvm::cl::Option * NextRegistered 32 | const char * ArgStr 40 | const char * HelpStr 48 | const char * ValueStr sizeof=56, dsize=56, align=8 nvsize=56, nvalign=8 Modified: llvm/trunk/include/llvm/Support/CommandLine.h Modified: llvm/trunk/include/llvm/Support/CommandLine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/CommandLine.h?rev=148736&r1=148735&r2=148736&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/CommandLine.h (original) +++ llvm/trunk/include/llvm/Support/CommandLine.h Mon Jan 23 16:22:44 2012 @@ -83,10 +83,10 @@ // enum NumOccurrencesFlag { // Flags for the number of occurrences allowed - Optional = 0x01, // Zero or One occurrence - ZeroOrMore = 0x02, // Zero or more occurrences allowed - Required = 0x03, // One occurrence required - OneOrMore = 0x04, // One or more occurrences required + Optional = 0x00, // Zero or One occurrence + ZeroOrMore = 0x01, // Zero or more occurrences allowed + Required = 0x02, // One occurrence required + OneOrMore = 0x03, // One or more occurrences required // ConsumeAfter - Indicates that this option is fed anything that follows the // last positional argument required by the application (it is an error if @@ -95,23 +95,20 @@ // found. Once a filename is found, all of the succeeding arguments are // passed, unprocessed, to the ConsumeAfter option. // - ConsumeAfter = 0x05, - - OccurrencesMask = 0x07 + ConsumeAfter = 0x04 }; enum ValueExpected { // Is a value required for the option? - ValueOptional = 0x08, // The value can appear... or not - ValueRequired = 0x10, // The value is required to appear! - ValueDisallowed = 0x18, // A value may not be specified (for flags) - ValueMask = 0x18 + // zero reserved for the unspecified value + ValueOptional = 0x01, // The value can appear... or not + ValueRequired = 0x02, // The value is required to appear! + ValueDisallowed = 0x03 // A value may not be specified (for flags) }; enum OptionHidden { // Control whether -help shows this option - NotHidden = 0x20, // Option included in -help & -help-hidden - Hidden = 0x40, // -help doesn't, but -help-hidden does - ReallyHidden = 0x60, // Neither -help nor -help-hidden show this arg - HiddenMask = 0x60 + NotHidden = 0x00, // Option included in -help & -help-hidden + Hidden = 0x01, // -help doesn't, but -help-hidden does + ReallyHidden = 0x02 // Neither -help nor -help-hidden show this arg }; // Formatting flags - This controls special features that the option might have @@ -130,18 +127,16 @@ // enum FormattingFlags { - NormalFormatting = 0x000, // Nothing special - Positional = 0x080, // Is a positional argument, no '-' required - Prefix = 0x100, // Can this option directly prefix its value? - Grouping = 0x180, // Can this option group with other options? - FormattingMask = 0x180 // Union of the above flags. + NormalFormatting = 0x00, // Nothing special + Positional = 0x01, // Is a positional argument, no '-' required + Prefix = 0x02, // Can this option directly prefix its value? + Grouping = 0x03 // Can this option group with other options? }; enum MiscFlags { // Miscellaneous flags to adjust argument - CommaSeparated = 0x200, // Should this cl::list split between commas? - PositionalEatsArgs = 0x400, // Should this positional cl::list eat -args? - Sink = 0x800, // Should this cl::list eat all unknown options? - MiscMask = 0xE00 // Union of the above flags. + CommaSeparated = 0x01, // Should this cl::list split between commas? + PositionalEatsArgs = 0x02, // Should this positional cl::list eat -args? + Sink = 0x04 // Should this cl::list eat all unknown options? }; @@ -168,7 +163,13 @@ virtual void anchor(); int NumOccurrences; // The number of times specified - int Flags; // Flags for the argument + enum NumOccurrencesFlag Occurrences : 3; + // not using the enum type for 'Value' because zero is an implementation + // detail representing the non-value + unsigned Value : 2; + enum OptionHidden HiddenFlag : 2; + enum FormattingFlags Formatting : 2; + unsigned Misc : 3; unsigned Position; // Position of last occurrence of the option unsigned AdditionalVals;// Greater than 0 for multi-valued option. Option *NextRegistered; // Singly linked list of registered options. @@ -178,21 +179,20 @@ const char *ValueStr; // String describing what the value of this option is inline enum NumOccurrencesFlag getNumOccurrencesFlag() const { - return static_cast(Flags & OccurrencesMask); + return Occurrences; } inline enum ValueExpected getValueExpectedFlag() const { - int VE = Flags & ValueMask; - return VE ? static_cast(VE) + return Value ? static_cast(Value) : getValueExpectedFlagDefault(); } inline enum OptionHidden getOptionHiddenFlag() const { - return static_cast(Flags & HiddenMask); + return HiddenFlag; } inline enum FormattingFlags getFormattingFlag() const { - return static_cast(Flags & FormattingMask); + return Formatting; } inline unsigned getMiscFlags() const { - return Flags & MiscMask; + return Misc; } inline unsigned getPosition() const { return Position; } inline unsigned getNumAdditionalVals() const { return AdditionalVals; } @@ -206,27 +206,21 @@ void setArgStr(const char *S) { ArgStr = S; } void setDescription(const char *S) { HelpStr = S; } void setValueStr(const char *S) { ValueStr = S; } - - void setFlag(unsigned Flag, unsigned FlagMask) { - Flags &= ~FlagMask; - Flags |= Flag; - } - void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) { - setFlag(Val, OccurrencesMask); + Occurrences = Val; } - void setValueExpectedFlag(enum ValueExpected Val) { setFlag(Val, ValueMask); } - void setHiddenFlag(enum OptionHidden Val) { setFlag(Val, HiddenMask); } - void setFormattingFlag(enum FormattingFlags V) { setFlag(V, FormattingMask); } - void setMiscFlag(enum MiscFlags M) { setFlag(M, M); } + void setValueExpectedFlag(enum ValueExpected Val) { Value = Val; } + void setHiddenFlag(enum OptionHidden Val) { HiddenFlag = Val; } + void setFormattingFlag(enum FormattingFlags V) { Formatting = V; } + void setMiscFlag(enum MiscFlags M) { Misc |= M; } void setPosition(unsigned pos) { Position = pos; } protected: - explicit Option(unsigned DefaultFlags) - : NumOccurrences(0), Flags(DefaultFlags | NormalFormatting), Position(0), + explicit Option(enum NumOccurrencesFlag Occurrences, + enum OptionHidden Hidden) + : NumOccurrences(0), Occurrences(Occurrences), HiddenFlag(Hidden), + Formatting(NormalFormatting), Position(0), AdditionalVals(0), NextRegistered(0), ArgStr(""), HelpStr(""), ValueStr("") { - assert(getNumOccurrencesFlag() != 0 && - getOptionHiddenFlag() != 0 && "Not all default flags specified!"); } inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; } @@ -1177,14 +1171,14 @@ // One option... template - explicit opt(const M0t &M0) : Option(Optional | NotHidden) { + explicit opt(const M0t &M0) : Option(Optional, NotHidden) { apply(M0, this); done(); } // Two options... template - opt(const M0t &M0, const M1t &M1) : Option(Optional | NotHidden) { + opt(const M0t &M0, const M1t &M1) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); done(); } @@ -1192,21 +1186,21 @@ // Three options... template opt(const M0t &M0, const M1t &M1, - const M2t &M2) : Option(Optional | NotHidden) { + const M2t &M2) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template opt(const M0t &M0, const M1t &M1, const M2t &M2, - const M3t &M3) : Option(Optional | NotHidden) { + const M3t &M3) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } // Five options... template opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4) : Option(Optional | NotHidden) { + const M4t &M4) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); done(); @@ -1215,7 +1209,7 @@ template opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4, const M5t &M5) : Option(Optional | NotHidden) { + const M4t &M4, const M5t &M5) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); done(); @@ -1225,7 +1219,7 @@ class M4t, class M5t, class M6t> opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, - const M6t &M6) : Option(Optional | NotHidden) { + const M6t &M6) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); done(); @@ -1235,7 +1229,7 @@ class M4t, class M5t, class M6t, class M7t> opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6, - const M7t &M7) : Option(Optional | NotHidden) { + const M7t &M7) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this); done(); @@ -1344,34 +1338,34 @@ // One option... template - explicit list(const M0t &M0) : Option(ZeroOrMore | NotHidden) { + explicit list(const M0t &M0) : Option(ZeroOrMore, NotHidden) { apply(M0, this); done(); } // Two options... template - list(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) { + list(const M0t &M0, const M1t &M1) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); done(); } // Three options... template list(const M0t &M0, const M1t &M1, const M2t &M2) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } // Five options... template list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4) : Option(ZeroOrMore | NotHidden) { + const M4t &M4) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); done(); @@ -1380,7 +1374,7 @@ template list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) { + const M4t &M4, const M5t &M5) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); done(); @@ -1390,7 +1384,7 @@ class M4t, class M5t, class M6t> list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); done(); @@ -1400,7 +1394,7 @@ class M4t, class M5t, class M6t, class M7t> list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6, - const M7t &M7) : Option(ZeroOrMore | NotHidden) { + const M7t &M7) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this); done(); @@ -1542,34 +1536,34 @@ // One option... template - explicit bits(const M0t &M0) : Option(ZeroOrMore | NotHidden) { + explicit bits(const M0t &M0) : Option(ZeroOrMore, NotHidden) { apply(M0, this); done(); } // Two options... template - bits(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) { + bits(const M0t &M0, const M1t &M1) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); done(); } // Three options... template bits(const M0t &M0, const M1t &M1, const M2t &M2) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } // Five options... template bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4) : Option(ZeroOrMore | NotHidden) { + const M4t &M4) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); done(); @@ -1578,7 +1572,7 @@ template bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) { + const M4t &M4, const M5t &M5) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); done(); @@ -1588,7 +1582,7 @@ class M4t, class M5t, class M6t> bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); done(); @@ -1598,7 +1592,7 @@ class M4t, class M5t, class M6t, class M7t> bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6, - const M7t &M7) : Option(ZeroOrMore | NotHidden) { + const M7t &M7) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this); done(); @@ -1638,27 +1632,27 @@ // One option... template - explicit alias(const M0t &M0) : Option(Optional | Hidden), AliasFor(0) { + explicit alias(const M0t &M0) : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); done(); } // Two options... template - alias(const M0t &M0, const M1t &M1) : Option(Optional | Hidden), AliasFor(0) { + alias(const M0t &M0, const M1t &M1) : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); apply(M1, this); done(); } // Three options... template alias(const M0t &M0, const M1t &M1, const M2t &M2) - : Option(Optional | Hidden), AliasFor(0) { + : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template alias(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3) - : Option(Optional | Hidden), AliasFor(0) { + : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } From lenny at Colorado.EDU Mon Jan 23 16:30:55 2012 From: lenny at Colorado.EDU (Lenny Maiorani) Date: Mon, 23 Jan 2012 15:30:55 -0700 Subject: [llvm-commits] [cfe-commits] [PATCH][Review Request] EarlyCSE stack overflow - bugzilla 11794 In-Reply-To: References: <051207DF-F3D0-4E8B-A756-E6B193750483@colorado.edu> <184C997C-6E18-4BE8-A3F8-092102981B11@colorado.edu> <6663C373-1004-4FB2-9E39-F784587B4CDD@2pi.dk> Message-ID: <4AA7AFB1-51F3-48B4-B543-00141B738258@colorado.edu> On Jan 20, 2012, at 3:08 PM, Jakob Stoklund Olesen wrote: > On Jan 20, 2012, at 1:41 PM, Lenny Maiorani wrote: >> There is actually a real difference here. The difference is that when the std::stack or std::deque is being used, it is necessary to have 2 stacks or deques because there is no reverse iterator on DomTreeNode. I am trying to preserve the order of processing nodes so that there is not some case where the behavior changes. If the order does not need to be preserved, then clearly it can be further optimized. > > In this case, the child iteration order probably isn't important, but even if it were, you could still avoid the overhead with a vector like this: > > vec.resize(vec.size() + num_children) > std::copy(child_begin, child_end, vec.rbegin()) > > Anyway, the runtime differences you were seeing in your stack-test-1.cpp benchmark were probably dominated by the number of times you implicitly call free() in the inner loop. > > Note that DepthFirstIterator.h achieves the same thing using much less memory, and it has the stack that you need anyway in order to: > >> I still need to take a look at Jakob's other review comment. > > Thanks for working on this! > > /jakob Jakob, Thank you for the information about MachineCSE and noticing the hash tables which are being generated at the top of EarlyCSE::processNode(). My latest implementation contains roughly the same implementation as MachineCSE. MachineCSE generates stack of nodes to process using SmallVector. Since this data structure is so large, I think using SmallVector is not correct. Also, I implemented the solution using vec.resize() and std::copy(), but this doesn't significantly improve the performance of the vector solution. My numbers are not at all surprising to me. Vectors do not do as well as deques with many pushes and pops because deques manage their memory in slabs to prevent needing to do large reallocs and hence large copies on occasion. Using the DepthFirstIterator (df_iterator) would be nice, but I do not see an easy way to maintain the CurrentGeneration variable which gets modified on a per-tree-depth. This means that there would still need to be some sort of stack kept separately from the iterator. This would be confusing and clutter the code. I looked a bit at the hash tables at the top of EarlyCSE::processNode() and I do not think they are nested. It looks like they are copy-constructed from some hash tables which are class members. These are then modified and provided the order of node traversal does not change, then the data in those member hash tables should be the same. The std::deque implementation is still the one I think is best. It is quite possible I am missing a subtlety of the copy-constructor or something else. How say you? -Lenny From dpatel at apple.com Mon Jan 23 16:35:25 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 23 Jan 2012 22:35:25 -0000 Subject: [llvm-commits] [llvm] r148737 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp test/MC/X86/intel-syntax.s Message-ID: <20120123223525.B98832A6C12C@llvm.org> Author: dpatel Date: Mon Jan 23 16:35:25 2012 New Revision: 148737 URL: http://llvm.org/viewvc/llvm-project?rev=148737&view=rev Log: Intel syntax: Robustify parsing of memory operand's displacement experssion. Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/test/MC/X86/intel-syntax.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=148737&r1=148736&r2=148737&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Mon Jan 23 16:35:25 2012 @@ -655,8 +655,13 @@ Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); } else return ErrorOperand(PlusLoc, "unexpected token after +"); - } else if (getLexer().is(AsmToken::Identifier)) - ParseRegister(IndexReg, Start, End); + } else if (getLexer().is(AsmToken::Identifier)) { + // This could be an index registor or a displacement expression. + End = Parser.getTok().getLoc(); + if (!IndexReg) + ParseRegister(IndexReg, Start, End); + else if (getParser().ParseExpression(Disp, End)) return 0; + } } if (getLexer().isNot(AsmToken::RBrac)) Modified: llvm/trunk/test/MC/X86/intel-syntax.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax.s?rev=148737&r1=148736&r2=148737&view=diff ============================================================================== --- llvm/trunk/test/MC/X86/intel-syntax.s (original) +++ llvm/trunk/test/MC/X86/intel-syntax.s Mon Jan 23 16:35:25 2012 @@ -56,7 +56,9 @@ // CHECK: fld %st(0) fld ST(0) // CHECK: movl %fs:(%rdi), %eax -mov EAX, DWORD PTR FS:[RDI] + mov EAX, DWORD PTR FS:[RDI] // CHECK: leal (,%rdi,4), %r8d -lea R8D, DWORD PTR [4*RDI] + lea R8D, DWORD PTR [4*RDI] +// CHECK: movl _fnan(,%ecx,4), %ecx + mov ECX, DWORD PTR [4*ECX + _fnan] ret From dblaikie at gmail.com Mon Jan 23 16:37:11 2012 From: dblaikie at gmail.com (David Blaikie) Date: Mon, 23 Jan 2012 22:37:11 -0000 Subject: [llvm-commits] [llvm] r148738 - /llvm/trunk/lib/Support/CommandLine.cpp Message-ID: <20120123223711.8846C2A6C12C@llvm.org> Author: dblaikie Date: Mon Jan 23 16:37:11 2012 New Revision: 148738 URL: http://llvm.org/viewvc/llvm-project?rev=148738&view=rev Log: Remove dead default. Modified: llvm/trunk/lib/Support/CommandLine.cpp Modified: llvm/trunk/lib/Support/CommandLine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CommandLine.cpp?rev=148738&r1=148737&r2=148738&view=diff ============================================================================== --- llvm/trunk/lib/Support/CommandLine.cpp (original) +++ llvm/trunk/lib/Support/CommandLine.cpp Mon Jan 23 16:37:11 2012 @@ -882,7 +882,6 @@ case OneOrMore: case ZeroOrMore: case ConsumeAfter: break; - default: return error("bad num occurrences flag value!"); } return handleOccurrence(pos, ArgName, Value); From pogo.work at gmail.com Mon Jan 23 16:53:19 2012 From: pogo.work at gmail.com (Paul Robinson) Date: Mon, 23 Jan 2012 14:53:19 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 1:45 PM, Kostya Serebryany wrote: > There are 8 functions dealing with?LLVMAttribute in?include/llvm-c/Core.h. > Do you suggest to add 8 more functions that will deal with uint64_t? > Like this? > > void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); > void LLVMAddFunctionAttr64(LLVMValueRef Fn, uint64_t PA); > > --kcc With a more meaningful type name, e.g. LLVMAttribute64 or some such, yes; I prefer that to 8 more functions that take pairs of 32-bit enum values. void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); void LLVMAddFunctionAttr2(LLVMValueRef Fn, LLVMAttribute PA, LLVMAttribute2 PA2); Or to 8 more functions that take the second set of enum values. void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); void LLVMAddFunctionAttr2(LLVMValueRef Fn, LLVMAttribute2 PA); In all cases, the first function of each pair must be preserved for compatibility; the question is merely, what does the second function look like, and does the caller have to care which enum a given attribute belongs to. With the approach I suggested, I can convert to the new type and function without regard to the implementation detail of the bit number being used for the attribute I care about at the moment. All I need to know is its name, which is as it should be. I can't see any way to both expand the attribute set, and preserve binary compatibility, without defining 8 new functions of SOME kind. In that case, I would rather have a full new set of 8, and deprecate the old set of 8, than have to contend with 16 functions all actively in use. Pogo > > On Mon, Jan 23, 2012 at 1:40 PM, Paul Robinson wrote: >> >> On Mon, Jan 23, 2012 at 12:05 PM, Kostya Serebryany >> wrote: >> > >> > >> > On Mon, Jan 23, 2012 at 12:01 PM, Eli Friedman >> > wrote: >> >> >> >> On Mon, Jan 23, 2012 at 11:02 AM, Kostya Serebryany >> >> wrote: >> >> > My previous change in include/llvm-c/Core.h that introduced 64-bit >> >> > Attributes (r148553) caused a warning >> >> > while building with MSVC.?http://llvm.org/bugs/show_bug.cgi?id=11828 >> >> > The following patch fixes the problem (use "static const uint64_t" >> >> > instead >> >> > of enum). >> >> > Ok to commit? >> >> > >> >> > --kcc >> >> > >> >> > Index: include/llvm-c/Core.h >> >> > =================================================================== >> >> > --- include/llvm-c/Core.h ?(revision 148708) >> >> > +++ include/llvm-c/Core.h ?(working copy) >> >> > @@ -92,7 +92,7 @@ >> >> > ?/** Used to get the users and usees of a Value. See the llvm::Use >> >> > class. */ >> >> > ?typedef struct LLVMOpaqueUse *LLVMUseRef; >> >> > >> >> > -typedef enum { >> >> > +static const uint64_t >> >> > ? ? ?LLVMZExtAttribute ? ? ? = 1<<0, >> >> > ? ? ?LLVMSExtAttribute ? ? ? = 1<<1, >> >> > ? ? ?LLVMNoReturnAttribute ? = 1<<2, >> >> > @@ -119,8 +119,8 @@ >> >> > ? ? ?LLVMReturnsTwice = 1 << 29, >> >> > ? ? ?LLVMUWTable = 1 << 30, >> >> > ? ? ?LLVMNonLazyBind = 1U << 31, >> >> > - ? ?LLVMAddressSafety = 1ULL << 32 >> >> > -} LLVMAttribute; >> >> > + ? ?LLVMAddressSafety = 1ULL << 32; >> >> > +typedef uint64_t LLVMAttribute; >> >> > >> >> > ?typedef enum { >> >> > ? ?/* Terminator Instructions */ >> >> >> >> Hmm... actually, I'm not sure this is okay; it's a >> >> non-binary-compatible change to the C API. >> > >> > >> > Any other suggestion? >> > It's not easy to keep compatibility once the new (beyond 32-bits) >> > attributes >> > start getting used. >> > Maybe add "enum?LLVMAttribute2" for attributes in bits 33-64? >> > >> > --kcc >> > >> >> >> >> >> >> -Eli >> > >> > >> > >> > _______________________________________________ >> > llvm-commits mailing list >> > llvm-commits at cs.uiuc.edu >> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > >> >> I've run into this kind of problem before. >> >> A separate enum for attributes >= 32 is a royal pain. >> It's an implementation artifact that has no relevance to the >> semantics of the attributes. >> You'd need to add a new API to take the upper attributes, >> and the caller has to remember (or look up, every time) >> which attributes go with which enum. ?:-P >> >> Defining a new 64-bit type that understands all attributes >> is better, as all attributes can be handled the same way. >> You still need a new API, but the caller does not have to >> understand some arbitrary split between two different enums. >> >> Pogo > > From isanbard at gmail.com Mon Jan 23 16:55:02 2012 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 23 Jan 2012 22:55:02 -0000 Subject: [llvm-commits] [llvm] r148740 - in /llvm/trunk: include/llvm/Support/JSONParser.h lib/CodeGen/AsmPrinter/DwarfAccelTable.h lib/ExecutionEngine/Interpreter/Interpreter.h Message-ID: <20120123225502.E963A2A6C12C@llvm.org> Author: void Date: Mon Jan 23 16:55:02 2012 New Revision: 148740 URL: http://llvm.org/viewvc/llvm-project?rev=148740&view=rev Log: Remove extraneous ';'s. Modified: llvm/trunk/include/llvm/Support/JSONParser.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.h llvm/trunk/lib/ExecutionEngine/Interpreter/Interpreter.h Modified: llvm/trunk/include/llvm/Support/JSONParser.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/JSONParser.h?rev=148740&r1=148739&r2=148740&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/JSONParser.h (original) +++ llvm/trunk/include/llvm/Support/JSONParser.h Mon Jan 23 16:55:02 2012 @@ -174,7 +174,7 @@ return true; case JK_KeyValuePair: return false; - }; + } llvm_unreachable("Invalid JSONAtom kind"); } static bool classof(const JSONValue *Value) { return true; } @@ -190,7 +190,7 @@ /// /// This is the unescaped content of the JSON text. /// See http://www.ietf.org/rfc/rfc4627.txt for details. - StringRef getRawText() const { return RawText; }; + StringRef getRawText() const { return RawText; } private: JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {} @@ -380,7 +380,7 @@ case JK_KeyValuePair: case JK_String: return false; - }; + } llvm_unreachable("Invalid JSONAtom kind"); } static bool classof(const JSONContainer *Container) { return true; } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.h?rev=148740&r1=148739&r2=148740&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.h Mon Jan 23 16:55:02 2012 @@ -207,7 +207,7 @@ HashDataContents(DIE *D, char Flags) : Die(D), - Flags(Flags) { }; + Flags(Flags) { } #ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; Modified: llvm/trunk/lib/ExecutionEngine/Interpreter/Interpreter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/Interpreter/Interpreter.h?rev=148740&r1=148739&r2=148740&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/Interpreter/Interpreter.h (original) +++ llvm/trunk/lib/ExecutionEngine/Interpreter/Interpreter.h Mon Jan 23 16:55:02 2012 @@ -119,7 +119,7 @@ bool AbortOnFailure = true) { // FIXME: not implemented. return 0; - }; + } /// recompileAndRelinkFunction - For the interpreter, functions are always /// up-to-date. From sabre at nondot.org Mon Jan 23 16:57:10 2012 From: sabre at nondot.org (Chris Lattner) Date: Mon, 23 Jan 2012 22:57:10 -0000 Subject: [llvm-commits] [llvm] r148741 - in /llvm/trunk: include/llvm/Constants.h include/llvm/Value.h lib/VMCore/Constants.cpp lib/VMCore/LLVMContextImpl.cpp lib/VMCore/LLVMContextImpl.h Message-ID: <20120123225711.23E212A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 16:57:10 2012 New Revision: 148741 URL: http://llvm.org/viewvc/llvm-project?rev=148741&view=rev Log: start the implementation of a new ConstantDataVector and ConstantDataArray classes, per PR1324. Not all of their helper functions are implemented, nothing creates them, and the rest of the compiler doesn't handle them yet. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/include/llvm/Value.h llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.h Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148741&r1=148740&r2=148741&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Mon Jan 23 16:57:10 2012 @@ -34,6 +34,7 @@ class StructType; class PointerType; class VectorType; +class SequentialType; template struct ConstantCreator; @@ -298,7 +299,6 @@ /// ConstantAggregateZero - All zero aggregate value /// class ConstantAggregateZero : public Constant { - friend struct ConstantCreator; void *operator new(size_t, unsigned); // DO NOT IMPLEMENT ConstantAggregateZero(const ConstantAggregateZero &); // DO NOT IMPLEMENT protected: @@ -503,7 +503,6 @@ /// ConstantPointerNull - a constant pointer value that points to null /// class ConstantPointerNull : public Constant { - friend struct ConstantCreator; void *operator new(size_t, unsigned); // DO NOT IMPLEMENT ConstantPointerNull(const ConstantPointerNull &); // DO NOT IMPLEMENT protected: @@ -535,6 +534,166 @@ return V->getValueID() == ConstantPointerNullVal; } }; + +//===----------------------------------------------------------------------===// +/// ConstantDataSequential - A vector or array of data that contains no +/// relocations, and whose element type is a simple 1/2/4/8-byte integer or +/// float/double. This is the common base class of ConstantDataArray and +/// ConstantDataVector. +/// +class ConstantDataSequential : public Constant { + friend class LLVMContextImpl; + /// DataElements - A pointer to the bytes underlying this constant (which is + /// owned by the uniquing StringMap). + const char *DataElements; + + /// Next - This forms a link list of ConstantDataSequential nodes that have + /// the same value but different type. For example, 0,0,0,1 could be a 4 + /// element array of i8, or a 1-element array of i32. They'll both end up in + /// the same StringMap bucket, linked up. + ConstantDataSequential *Next; + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + ConstantDataSequential(const ConstantDataSequential &); // DO NOT IMPLEMENT +protected: + explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data) + : Constant(ty, VT, 0, 0), DataElements(Data) {} + ~ConstantDataSequential() { delete Next; } + + static Constant *getImpl(StringRef Bytes, Type *Ty); + +protected: + // allocate space for exactly zero operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } +public: + + virtual void destroyConstant(); + + /// getElementAsInteger - If this is a sequential container of integers (of + /// any size), return the specified element in the low bits of a uint64_t. + uint64_t getElementAsInteger(unsigned i) const; + + /// getElementAsAPFloat - If this is a sequential container of floating point + /// type, return the specified element as an APFloat. + APFloat getElementAsAPFloat(unsigned i) const; + + /// getElementAsFloat - If this is an sequential container of floats, return + /// the specified element as a float. + float getElementAsFloat(unsigned i) const; + + /// getElementAsDouble - If this is an sequential container of doubles, return + /// the specified element as a float. + double getElementAsDouble(unsigned i) const; + + /// getElementAsConstant - Return a Constant for a specified index's element. + /// Note that this has to compute a new constant to return, so it isn't as + /// efficient as getElementAsInteger/Float/Double. + Constant *getElementAsConstant(unsigned i) const; + + /// getType - Specialize the getType() method to always return a + /// SequentialType, which reduces the amount of casting needed in parts of the + /// compiler. + inline SequentialType *getType() const { + return reinterpret_cast(Value::getType()); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// + static bool classof(const ConstantDataSequential *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == ConstantDataArrayVal || + V->getValueID() == ConstantDataVectorVal; + } +}; + +//===----------------------------------------------------------------------===// +/// ConstantDataArray - An array of data that contains no relocations, and whose +/// element type is a simple 1/2/4/8-byte integer or float/double. +/// +class ConstantDataArray : public ConstantDataSequential { + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + ConstantDataArray(const ConstantDataArray &); // DO NOT IMPLEMENT + virtual void anchor(); + friend class ConstantDataSequential; + explicit ConstantDataArray(Type *ty, const char *Data) + : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {} +protected: + // allocate space for exactly zero operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } +public: + + /// get() constructors - Return a constant with array type with an element + /// count and element type matching the ArrayRef passed in. Note that this + /// can return a ConstantAggregateZero object. + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + + /// getType - Specialize the getType() method to always return an ArrayType, + /// which reduces the amount of casting needed in parts of the compiler. + /// + inline ArrayType *getType() const { + return reinterpret_cast(Value::getType()); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// + static bool classof(const ConstantDataArray *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == ConstantDataArrayVal; + } +}; + +//===----------------------------------------------------------------------===// +/// ConstantDataVector - A vector of data that contains no relocations, and +/// whose element type is a simple 1/2/4/8-byte integer or float/double. +/// +class ConstantDataVector : public ConstantDataSequential { + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + ConstantDataVector(const ConstantDataVector &); // DO NOT IMPLEMENT + virtual void anchor(); + friend class ConstantDataSequential; + explicit ConstantDataVector(Type *ty, const char *Data) + : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {} +protected: + // allocate space for exactly zero operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } +public: + + /// get() constructors - Return a constant with vector type with an element + /// count and element type matching the ArrayRef passed in. Note that this + /// can return a ConstantAggregateZero object. + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(ArrayRef Elts, LLVMContext &Context); + + /// getType - Specialize the getType() method to always return a VectorType, + /// which reduces the amount of casting needed in parts of the compiler. + /// + inline VectorType *getType() const { + return reinterpret_cast(Value::getType()); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// + static bool classof(const ConstantDataVector *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == ConstantDataVectorVal; + } +}; + + /// BlockAddress - The address of a basic block. /// @@ -893,7 +1052,6 @@ /// LangRef.html#undefvalues for details. /// class UndefValue : public Constant { - friend struct ConstantCreator; void *operator new(size_t, unsigned); // DO NOT IMPLEMENT UndefValue(const UndefValue &); // DO NOT IMPLEMENT protected: Modified: llvm/trunk/include/llvm/Value.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Value.h?rev=148741&r1=148740&r2=148741&view=diff ============================================================================== --- llvm/trunk/include/llvm/Value.h (original) +++ llvm/trunk/include/llvm/Value.h Mon Jan 23 16:57:10 2012 @@ -193,6 +193,8 @@ BlockAddressVal, // This is an instance of BlockAddress ConstantExprVal, // This is an instance of ConstantExpr ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero + ConstantDataArrayVal, // This is an instance of ConstantDataArray + ConstantDataVectorVal, // This is an instance of ConstantDataVector ConstantIntVal, // This is an instance of ConstantInt ConstantFPVal, // This is an instance of ConstantFP ConstantArrayVal, // This is an instance of ConstantArray Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148741&r1=148740&r2=148741&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Jan 23 16:57:10 2012 @@ -1913,6 +1913,154 @@ OperandList[i+1] = IdxList[i]; } +//===----------------------------------------------------------------------===// +// ConstantData* implementations + +void ConstantDataArray::anchor() {} +void ConstantDataVector::anchor() {} + +/// isAllZeros - return true if the array is empty or all zeros. +static bool isAllZeros(StringRef Arr) { + for (StringRef::iterator I = Arr.begin(), E = Arr.end(); I != E; ++I) + if (*I != 0) + return false; + return true; +} +/// getImpl - This is the underlying implementation of all of the +/// ConstantDataSequential::get methods. They all thunk down to here, providing +/// the correct element type. We take the bytes in as an StringRef because +/// we *want* an underlying "char*" to avoid TBAA type punning violations. +Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { + // If the elements are all zero, return a CAZ, which is more dense. + if (isAllZeros(Elements)) + return ConstantAggregateZero::get(Ty); + + // Do a lookup to see if we have already formed one of these. + StringMap::MapEntryTy &Slot = + Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements); + + // The bucket can point to a linked list of different CDS's that have the same + // body but different types. For example, 0,0,0,1 could be a 4 element array + // of i8, or a 1-element array of i32. They'll both end up in the same + /// StringMap bucket, linked up by their Next pointers. Walk the list. + ConstantDataSequential **Entry = &Slot.getValue(); + for (ConstantDataSequential *Node = *Entry; Node != 0; + Entry = &Node->Next, Node = *Entry) + if (Node->getType() == Ty) + return Node; + + // Okay, we didn't get a hit. Create a node of the right class, link it in, + // and return it. + if (isa(Ty)) + return *Entry = new ConstantDataArray(Ty, Slot.getKeyData()); + + assert(isa(Ty)); + return *Entry = new ConstantDataVector(Ty, Slot.getKeyData()); +} + +void ConstantDataSequential::destroyConstant() { + uint64_t ByteSize = + getType()->getElementType()->getPrimitiveSizeInBits()/8 * + getType()->getElementType()->getNumElements(); + + // Remove the constant from the StringMap. + StringMap &CDSConstants = + getType()->getContext().pImpl->CDSConstants; + + StringMap::iterator Slot = + CDSConstants.find(StringRef(DataElements, ByteSize)); + + assert(Slot != CDSConstants.end() && "CDS not found in uniquing table"); + + ConstantDataSequential **Entry = &Slot->getValue(); + + // Remove the entry from the hash table. + if ((*Entry)->Next == 0) { + // If there is only one value in the bucket (common case) it must be this + // entry, and removing the entry should remove the bucket completely. + assert((*Entry) == this && "Hash mismatch in ConstantDataSequential"); + getContext().pImpl->CDSConstants.erase(Slot); + } else { + // Otherwise, there are multiple entries linked off the bucket, unlink the + // node we care about but keep the bucket around. + for (ConstantDataSequential *Node = *Entry; ; + Entry = &Node->Next, Node = *Entry) { + assert(Node && "Didn't find entry in its uniquing hash table!"); + // If we found our entry, unlink it from the list and we're done. + if (Node == this) { + *Entry = Node->Next; + break; + } + } + } + + // If we were part of a list, make sure that we don't delete the list that is + // still owned by the uniquing map. + Next = 0; + + // Finally, actually delete it. + destroyConstantImpl(); +} + +/// get() constructors - Return a constant with array type with an element +/// count and element type matching the ArrayRef passed in. Note that this +/// can return a ConstantAggregateZero object. +Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context) { + Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty); +} +Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context){ + Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty); +} +Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context){ + Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); +} +Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context){ + Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); +} +Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context) { + Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); +} +Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context) { + Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); +} + + +/// get() constructors - Return a constant with vector type with an element +/// count and element type matching the ArrayRef passed in. Note that this +/// can return a ConstantAggregateZero object. +Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context) { + Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty); +} +Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context){ + Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty); +} +Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context){ + Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); +} +Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context){ + Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); +} +Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context) { + Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); +} +Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context) { + Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); + return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); +} + + + //===----------------------------------------------------------------------===// // replaceUsesOfWithOnConstant implementations Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.cpp?rev=148741&r1=148740&r2=148741&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.cpp (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.cpp Mon Jan 23 16:57:10 2012 @@ -79,6 +79,11 @@ DeleteContainerSeconds(IntConstants); DeleteContainerSeconds(FPConstants); + for (StringMap::iterator I = CDSConstants.begin(), + E = CDSConstants.end(); I != E; ++I) + delete I->second; + CDSConstants.clear(); + // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet // and the NonUniquedMDNodes sets, so copy the values out first. SmallVector MDNodes; Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=148741&r1=148740&r2=148741&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Mon Jan 23 16:57:10 2012 @@ -156,6 +156,9 @@ DenseMap UVConstants; + StringMap CDSConstants; + + DenseMap , BlockAddress*> BlockAddresses; ConstantUniqueMap ExprConstants; From asl at math.spbu.ru Mon Jan 23 16:57:52 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Mon, 23 Jan 2012 22:57:52 -0000 Subject: [llvm-commits] [llvm] r148742 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb2.td Message-ID: <20120123225752.975932A6C12C@llvm.org> Author: asl Date: Mon Jan 23 16:57:52 2012 New Revision: 148742 URL: http://llvm.org/viewvc/llvm-project?rev=148742&view=rev Log: Add missed mayStore flag to STREXD / t2STREXD Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=148742&r1=148741&r2=148742&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Jan 23 16:57:52 2012 @@ -4273,14 +4273,14 @@ NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } +} + def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, Requires<[IsARM, HasV7]> { Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=148742&r1=148741&r2=148742&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Mon Jan 23 16:57:52 2012 @@ -3065,9 +3065,7 @@ let Inst{11-8} = Rd; let Inst{7-0} = addr{7-0}; } -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, @@ -3076,6 +3074,7 @@ bits<4> Rt2; let Inst{11-8} = Rt2; } +} def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, Requires<[IsThumb2, HasV7]> { From stoklund at 2pi.dk Mon Jan 23 17:03:59 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 23 Jan 2012 15:03:59 -0800 Subject: [llvm-commits] [cfe-commits] [PATCH][Review Request] EarlyCSE stack overflow - bugzilla 11794 In-Reply-To: <4AA7AFB1-51F3-48B4-B543-00141B738258@colorado.edu> References: <051207DF-F3D0-4E8B-A756-E6B193750483@colorado.edu> <184C997C-6E18-4BE8-A3F8-092102981B11@colorado.edu> <6663C373-1004-4FB2-9E39-F784587B4CDD@2pi.dk> <4AA7AFB1-51F3-48B4-B543-00141B738258@colorado.edu> Message-ID: On Jan 23, 2012, at 2:30 PM, Lenny Maiorani wrote: > Thank you for the information about MachineCSE and noticing the hash tables which are being generated at the top of EarlyCSE::processNode(). My latest implementation contains roughly the same implementation as MachineCSE. MachineCSE generates stack of nodes to process using SmallVector. Since this data structure is so large, I think using SmallVector is not correct. SmallVector is almost always a better choice than std::vector, which is essentially equivalent to SmallVector. std::vector only makes sense if you need to create many vector instances that will probably overflow the small size. > Also, I implemented the solution using vec.resize() and std::copy(), but this doesn't significantly improve the performance of the vector solution. My numbers are not at all surprising to me. Vectors do not do as well as deques with many pushes and pops because deques manage their memory in slabs to prevent needing to do large reallocs and hence large copies on occasion. Interesting. A vector only needs to copy its elements once (amortized). > Using the DepthFirstIterator (df_iterator) would be nice, but I do not see an easy way to maintain the CurrentGeneration variable which gets modified on a per-tree-depth. This means that there would still need to be some sort of stack kept separately from the iterator. This would be confusing and clutter the code. Yes, unfortunately df_iterator doesn't allow you to use its internal stack. That would have been convenient. > I looked a bit at the hash tables at the top of EarlyCSE::processNode() and I do not think they are nested. It looks like they are copy-constructed from some hash tables which are class members. These are then modified and provided the order of node traversal does not change, then the data in those member hash tables should be the same. Look again. The implementation is in ScopedHashTable.h. When processing a node, there must be a ScopedHashTableScope instantiated for every dominator tree level above the node. The nested scopes form a linked list. /jakob From dblaikie at gmail.com Mon Jan 23 17:04:22 2012 From: dblaikie at gmail.com (David Blaikie) Date: Mon, 23 Jan 2012 15:04:22 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 2:53 PM, Paul Robinson wrote: > On Mon, Jan 23, 2012 at 1:45 PM, Kostya Serebryany wrote: >> There are 8 functions dealing with?LLVMAttribute in?include/llvm-c/Core.h. >> Do you suggest to add 8 more functions that will deal with uint64_t? >> Like this? >> >> void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); >> void LLVMAddFunctionAttr64(LLVMValueRef Fn, uint64_t PA); >> >> --kcc > > With a more meaningful type name, e.g. LLVMAttribute64 or some such, yes; Given the constraints of the stable C API (binary compatibility) there's no way we can make this forwards compatible, is there? (passing a struct we could add elements to in the future - that would break binary compat, yes?) > I prefer that to 8 more functions that take pairs of 32-bit enum values. > > void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); > void LLVMAddFunctionAttr2(LLVMValueRef Fn, LLVMAttribute PA, > LLVMAttribute2 PA2); > > Or to 8 more functions that take the second set of enum values. > > void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); > void LLVMAddFunctionAttr2(LLVMValueRef Fn, LLVMAttribute2 PA); > > In all cases, the first function of each pair must be preserved for > compatibility; > the question is merely, what does the second function look like, and does the > caller have to care which enum a given attribute belongs to. > With the approach I suggested, I can convert to the new type and function > without regard to the implementation detail of the bit number being used for > the attribute I care about at the moment. ?All I need to know is its name, > which is as it should be. > > I can't see any way to both expand the attribute set, and preserve binary > compatibility, without defining 8 new functions of SOME kind. ?In that case, > I would rather have a full new set of 8, and deprecate the old set of 8, > than have to contend with 16 functions all actively in use. > > Pogo > >> >> On Mon, Jan 23, 2012 at 1:40 PM, Paul Robinson wrote: >>> >>> On Mon, Jan 23, 2012 at 12:05 PM, Kostya Serebryany >>> wrote: >>> > >>> > >>> > On Mon, Jan 23, 2012 at 12:01 PM, Eli Friedman >>> > wrote: >>> >> >>> >> On Mon, Jan 23, 2012 at 11:02 AM, Kostya Serebryany >>> >> wrote: >>> >> > My previous change in include/llvm-c/Core.h that introduced 64-bit >>> >> > Attributes (r148553) caused a warning >>> >> > while building with MSVC.?http://llvm.org/bugs/show_bug.cgi?id=11828 >>> >> > The following patch fixes the problem (use "static const uint64_t" >>> >> > instead >>> >> > of enum). >>> >> > Ok to commit? >>> >> > >>> >> > --kcc >>> >> > >>> >> > Index: include/llvm-c/Core.h >>> >> > =================================================================== >>> >> > --- include/llvm-c/Core.h ?(revision 148708) >>> >> > +++ include/llvm-c/Core.h ?(working copy) >>> >> > @@ -92,7 +92,7 @@ >>> >> > ?/** Used to get the users and usees of a Value. See the llvm::Use >>> >> > class. */ >>> >> > ?typedef struct LLVMOpaqueUse *LLVMUseRef; >>> >> > >>> >> > -typedef enum { >>> >> > +static const uint64_t >>> >> > ? ? ?LLVMZExtAttribute ? ? ? = 1<<0, >>> >> > ? ? ?LLVMSExtAttribute ? ? ? = 1<<1, >>> >> > ? ? ?LLVMNoReturnAttribute ? = 1<<2, >>> >> > @@ -119,8 +119,8 @@ >>> >> > ? ? ?LLVMReturnsTwice = 1 << 29, >>> >> > ? ? ?LLVMUWTable = 1 << 30, >>> >> > ? ? ?LLVMNonLazyBind = 1U << 31, >>> >> > - ? ?LLVMAddressSafety = 1ULL << 32 >>> >> > -} LLVMAttribute; >>> >> > + ? ?LLVMAddressSafety = 1ULL << 32; >>> >> > +typedef uint64_t LLVMAttribute; >>> >> > >>> >> > ?typedef enum { >>> >> > ? ?/* Terminator Instructions */ >>> >> >>> >> Hmm... actually, I'm not sure this is okay; it's a >>> >> non-binary-compatible change to the C API. >>> > >>> > >>> > Any other suggestion? >>> > It's not easy to keep compatibility once the new (beyond 32-bits) >>> > attributes >>> > start getting used. >>> > Maybe add "enum?LLVMAttribute2" for attributes in bits 33-64? >>> > >>> > --kcc >>> > >>> >> >>> >> >>> >> -Eli >>> > >>> > >>> > >>> > _______________________________________________ >>> > llvm-commits mailing list >>> > llvm-commits at cs.uiuc.edu >>> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> > >>> >>> I've run into this kind of problem before. >>> >>> A separate enum for attributes >= 32 is a royal pain. >>> It's an implementation artifact that has no relevance to the >>> semantics of the attributes. >>> You'd need to add a new API to take the upper attributes, >>> and the caller has to remember (or look up, every time) >>> which attributes go with which enum. ?:-P >>> >>> Defining a new 64-bit type that understands all attributes >>> is better, as all attributes can be handled the same way. >>> You still need a new API, but the caller does not have to >>> understand some arbitrary split between two different enums. >>> >>> Pogo >> >> > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From preston.gurd at intel.com Mon Jan 23 17:05:37 2012 From: preston.gurd at intel.com (Gurd, Preston) Date: Mon, 23 Jan 2012 23:05:37 +0000 Subject: [llvm-commits] [llvm][PATCH - REVISED][Review request] X86 Instruction scheduler for the Intel Atom Message-ID: Revision 2: Tests which were failing, when run on an Atom, due to the tests finding a schedule different from what was expected, have been changed to use "-mcpu=generic" in order to prevent the Atom scheduler from running, so that all "make check" tests pass. From: Gurd, Preston Sent: Tuesday, January 17, 2012 4:29 PM To: Evan Cheng Cc: llvm-commits at cs.uiuc.edu Subject: [llvm-commits] [llvm][PATCH - REVISED][Review request] X86 Instruction scheduler for the Intel Atom The attached patch implements most of an instruction scheduler for the Intel Atom. It adds an instruction itinerary to all x86 instructions, giving each a default latency of 1, using the InstrItinClass IIC_DEFAULT. It sets specific latencies for Atom for the instructions in files X86InstrCMovSetCC.td, X86InstrArithmetic.td, X86InstrControl.td, and X86InstrShiftRotate.td. The Atom latencies for the remainder of the x86 instructions will be set in subsequent patches. It adds a test to verify that the scheduler is working. I realize that this patch is kind of large, but please consider that the vast majority of the changes consist only of adding an instruction itinerary class name to an instruction. Revision: the patch also changes the scheduling preference to "Hybrid" for i386 Atom, while leaving x86_64 as ILP. Please commit the patch if it seems acceptable. Preston From: Evan Cheng [mailto:evan.cheng at apple.com] Sent: Monday, January 16, 2012 12:01 PM To: Gurd, Preston Cc: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [llvm][PATCH][Review request] X86 Instruction scheduler for the Intel Atom Very nice. One question, I noticed you haven't changed the scheduling preference so x86_64 is still using ILP scheduler while i386 is using register pressure reduction scheduler. Have you tried changing the preference to latency scheduler for Atom? Evan On Jan 13, 2012, at 3:26 PM, Gurd, Preston wrote: The attached patch implements most of an instruction scheduler for the Intel Atom. It adds an instruction itinerary to all x86 instructions, giving each a default latency of 1, using the InstrItinClass IIC_DEFAULT. It sets specific latencies for Atom for the instructions in files X86InstrCMovSetCC.td, X86InstrArithmetic.td, X86InstrControl.td, and X86InstrShiftRotate.td. The Atom latencies for the remainder of the x86 instructions will be set in subsequent patches. It adds a test to verify that the scheduler is working. I realize that this patch is kind of large, but please consider that the vast majority of the changes consist only of adding an instruction itinerary class name to an instruction. -- Preston Gurd > Intel Waterloo -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/38404fb7/attachment-0001.html -------------- next part -------------- A non-text attachment was scrubbed... Name: llvm-x86-scheduler.diff Type: application/octet-stream Size: 186012 bytes Desc: llvm-x86-scheduler.diff Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/38404fb7/attachment-0001.obj From chandlerc at google.com Mon Jan 23 17:16:57 2012 From: chandlerc at google.com (Chandler Carruth) Date: Mon, 23 Jan 2012 15:16:57 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 3:04 PM, David Blaikie wrote: > On Mon, Jan 23, 2012 at 2:53 PM, Paul Robinson > wrote: > > On Mon, Jan 23, 2012 at 1:45 PM, Kostya Serebryany > wrote: > >> There are 8 functions dealing with LLVMAttribute > in include/llvm-c/Core.h. > >> Do you suggest to add 8 more functions that will deal with uint64_t? > >> Like this? > >> > >> void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); > >> void LLVMAddFunctionAttr64(LLVMValueRef Fn, uint64_t PA); > >> > >> --kcc > > > > With a more meaningful type name, e.g. LLVMAttribute64 or some such, yes; > > Given the constraints of the stable C API (binary compatibility) > there's no way we can make this forwards compatible, is there? > (passing a struct we could add elements to in the future - that would > break binary compat, yes?) > You simply have to make the struct opaque, and only manipulated through API calls. However that's pretty high cost. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/947ce2e9/attachment.html From dblaikie at gmail.com Mon Jan 23 17:19:40 2012 From: dblaikie at gmail.com (David Blaikie) Date: Mon, 23 Jan 2012 15:19:40 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 3:16 PM, Chandler Carruth wrote: > On Mon, Jan 23, 2012 at 3:04 PM, David Blaikie wrote: >> >> On Mon, Jan 23, 2012 at 2:53 PM, Paul Robinson >> wrote: >> > On Mon, Jan 23, 2012 at 1:45 PM, Kostya Serebryany >> > wrote: >> >> There are 8 functions dealing with?LLVMAttribute >> >> in?include/llvm-c/Core.h. >> >> Do you suggest to add 8 more functions that will deal with uint64_t? >> >> Like this? >> >> >> >> void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); >> >> void LLVMAddFunctionAttr64(LLVMValueRef Fn, uint64_t PA); >> >> >> >> --kcc >> > >> > With a more meaningful type name, e.g. LLVMAttribute64 or some such, >> > yes; >> >> Given the constraints of the stable C API (binary compatibility) >> there's no way we can make this forwards compatible, is there? >> (passing a struct we could add elements to in the future - that would >> break binary compat, yes?) > > > You simply have to make the struct opaque, and only manipulated through API > calls. However that's pretty high cost. Yeah - I was starting to see that. Pity - so we just add a new generation of API calls every time we reach the limit. - David From grosbach at apple.com Mon Jan 23 17:20:46 2012 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 23 Jan 2012 23:20:46 -0000 Subject: [llvm-commits] [llvm] r148745 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.h test/MC/ARM/neon-vld-encoding.s test/MC/ARM/neon-vst-encoding.s Message-ID: <20120123232046.BB0DC2A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 17:20:46 2012 New Revision: 148745 URL: http://llvm.org/viewvc/llvm-project?rev=148745&view=rev Log: NEON VLD3(multiple 3-element structures) assembly parsing. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h llvm/trunk/test/MC/ARM/neon-vld-encoding.s llvm/trunk/test/MC/ARM/neon-vst-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148745&r1=148744&r2=148745&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 17:20:46 2012 @@ -124,6 +124,15 @@ def VecListTwoQ : RegisterOperand { let ParserMatchClass = VecListTwoQAsmOperand; } +// Register list of three D registers spaced by 2 (three Q registers). +def VecListThreeQAsmOperand : AsmOperandClass { + let Name = "VecListThreeQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQ : RegisterOperand { + let ParserMatchClass = VecListThreeQAsmOperand; +} // Register list of one D register, with "all lanes" subscripting. def VecListOneDAllLanesAsmOperand : AsmOperandClass { @@ -6017,6 +6026,67 @@ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD3 multiple structurepseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + + // VMOV takes an optional datatype suffix defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148745&r1=148744&r2=148745&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 17:20:46 2012 @@ -1101,6 +1101,11 @@ return VectorList.Count == 2; } + bool isVecListThreeQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 3; + } + bool isSingleSpacedVectorAllLanes() const { return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced; } @@ -5376,6 +5381,62 @@ case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32; + + // VLD3 + case ARM::VLD3dWB_fixed_Asm_8: + Spacing = 1; + return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_fixed_Asm_16: + Spacing = 1; + return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_fixed_Asm_32: + Spacing = 1; + return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_fixed_Asm_8: + Spacing = 2; + return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_fixed_Asm_16: + Spacing = 2; + return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_fixed_Asm_32: + Spacing = 2; + return ARM::VLD3q32_UPD; + case ARM::VLD3dWB_register_Asm_8: + Spacing = 1; + return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_register_Asm_16: + Spacing = 1; + return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_register_Asm_32: + Spacing = 1; + return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_register_Asm_8: + Spacing = 2; + return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_register_Asm_16: + Spacing = 2; + return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_register_Asm_32: + Spacing = 2; + return ARM::VLD3q32_UPD; + case ARM::VLD3dAsm_8: + Spacing = 1; + return ARM::VLD3d8; + case ARM::VLD3dAsm_16: + Spacing = 1; + return ARM::VLD3d16; + case ARM::VLD3dAsm_32: + Spacing = 1; + return ARM::VLD3d32; + case ARM::VLD3qAsm_8: + Spacing = 2; + return ARM::VLD3q8; + case ARM::VLD3qAsm_16: + Spacing = 2; + return ARM::VLD3q16; + case ARM::VLD3qAsm_32: + Spacing = 2; + return ARM::VLD3q32; } } @@ -5588,7 +5649,7 @@ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + - Spacing)); + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5597,7 +5658,7 @@ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + - Spacing)); + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); @@ -5667,7 +5728,7 @@ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + - Spacing)); + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5676,7 +5737,7 @@ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + - Spacing)); + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5742,14 +5803,14 @@ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + - Spacing)); + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + - Spacing)); + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5757,6 +5818,77 @@ return true; } + // VLD3 multiple 3-element structure instructions. + case ARM::VLD3dAsm_8: + case ARM::VLD3dAsm_16: + case ARM::VLD3dAsm_32: + case ARM::VLD3qAsm_8: + case ARM::VLD3qAsm_16: + case ARM::VLD3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_fixed_Asm_8: + case ARM::VLD3dWB_fixed_Asm_16: + case ARM::VLD3dWB_fixed_Asm_32: + case ARM::VLD3qWB_fixed_Asm_8: + case ARM::VLD3qWB_fixed_Asm_16: + case ARM::VLD3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_register_Asm_8: + case ARM::VLD3dWB_register_Asm_16: + case ARM::VLD3dWB_register_Asm_32: + case ARM::VLD3qWB_register_Asm_8: + case ARM::VLD3qWB_register_Asm_16: + case ARM::VLD3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: case ARM::t2MOVSsr: { Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp?rev=148745&r1=148744&r2=148745&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp Mon Jan 23 17:20:46 2012 @@ -1086,3 +1086,13 @@ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; } +void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}"; +} Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h?rev=148745&r1=148744&r2=148745&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h Mon Jan 23 17:20:46 2012 @@ -143,6 +143,8 @@ raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm Modified: llvm/trunk/test/MC/ARM/neon-vld-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vld-encoding.s?rev=148745&r1=148744&r2=148745&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vld-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vld-encoding.s Mon Jan 23 17:20:46 2012 @@ -154,25 +154,46 @@ @ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4] -@ vld3.8 {d16, d17, d18}, [r0, :64] -@ vld3.16 {d16, d17, d18}, [r0] -@ vld3.32 {d16, d17, d18}, [r0] -@ vld3.8 {d16, d18, d20}, [r0, :64]! -@ vld3.8 {d17, d19, d21}, [r0, :64]! -@ vld3.16 {d16, d18, d20}, [r0]! -@ vld3.16 {d17, d19, d21}, [r0]! -@ vld3.32 {d16, d18, d20}, [r0]! -@ vld3.32 {d17, d19, d21}, [r0]! - -@ FIXME: vld3.8 {d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4] -@ FIXME: vld3.16 {d16, d17, d18}, [r0] @ encoding: [0x4f,0x04,0x60,0xf4] -@ FIXME: vld3.32 {d16, d17, d18}, [r0] @ encoding: [0x8f,0x04,0x60,0xf4] -@ FIXME: vld3.8 {d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4] -@ FIXME: vld3.8 {d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4] -@ FIXME: vld3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x60,0xf4] -@ FIXME: vld3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x60,0xf4] -@ FIXME: vld3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x60,0xf4] -@ FIXME: vld3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x60,0xf4] + vld3.8 {d16, d17, d18}, [r1] + vld3.16 {d6, d7, d8}, [r2] + vld3.32 {d1, d2, d3}, [r3] + vld3.8 {d16, d18, d20}, [r0, :64] + vld3.u16 {d27, d29, d31}, [r4] + vld3.i32 {d6, d8, d10}, [r5] + + vld3.i8 {d12, d13, d14}, [r6], r1 + vld3.i16 {d11, d12, d13}, [r7], r2 + vld3.u32 {d2, d3, d4}, [r8], r3 + vld3.8 {d4, d6, d8}, [r9], r4 + vld3.u16 {d14, d16, d18}, [r9], r4 + vld3.i32 {d16, d18, d20}, [r10], r5 + + vld3.p8 {d6, d7, d8}, [r8]! + vld3.16 {d9, d10, d11}, [r7]! + vld3.f32 {d1, d2, d3}, [r6]! + vld3.8 {d16, d18, d20}, [r0, :64]! + vld3.p16 {d20, d22, d24}, [r5]! + vld3.32 {d5, d7, d9}, [r4]! + + +@ CHECK: vld3.8 {d16, d17, d18}, [r1] @ encoding: [0x0f,0x04,0x61,0xf4] +@ CHECK: vld3.16 {d6, d7, d8}, [r2] @ encoding: [0x4f,0x64,0x22,0xf4] +@ CHECK: vld3.32 {d1, d2, d3}, [r3] @ encoding: [0x8f,0x14,0x23,0xf4] +@ CHECK: vld3.8 {d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x60,0xf4] +@ CHECK: vld3.16 {d27, d29, d31}, [r4] @ encoding: [0x4f,0xb5,0x64,0xf4] +@ CHECK: vld3.32 {d6, d8, d10}, [r5] @ encoding: [0x8f,0x65,0x25,0xf4] +@ CHECK: vld3.8 {d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x26,0xf4] +@ CHECK: vld3.16 {d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x27,0xf4] +@ CHECK: vld3.32 {d2, d3, d4}, [r8], r3 @ encoding: [0x83,0x24,0x28,0xf4] +@ CHECK: vld3.8 {d4, d6, d8}, [r9], r4 @ encoding: [0x04,0x45,0x29,0xf4] +@ CHECK: vld3.16 {d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x29,0xf4] +@ CHECK: vld3.32 {d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x6a,0xf4] +@ CHECK: vld3.8 {d6, d7, d8}, [r8]! @ encoding: [0x0d,0x64,0x28,0xf4] +@ CHECK: vld3.16 {d9, d10, d11}, [r7]! @ encoding: [0x4d,0x94,0x27,0xf4] +@ CHECK: vld3.32 {d1, d2, d3}, [r6]! @ encoding: [0x8d,0x14,0x26,0xf4] +@ CHECK: vld3.8 {d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4] +@ CHECK: vld3.16 {d20, d22, d24}, [r5]! @ encoding: [0x4d,0x45,0x65,0xf4] +@ CHECK: vld3.32 {d5, d7, d9}, [r4]! @ encoding: [0x8d,0x55,0x24,0xf4] @ vld4.8 {d16, d17, d18, d19}, [r0, :64] @@ -260,17 +281,39 @@ @ CHECK: vld2.32 {d22[], d24[]}, [r6], r4 @ encoding: [0xa4,0x6d,0xe6,0xf4] -@ vld3.8 {d16[1], d17[1], d18[1]}, [r0] -@ vld3.16 {d16[1], d17[1], d18[1]}, [r0] -@ vld3.32 {d16[1], d17[1], d18[1]}, [r0] -@ vld3.16 {d16[1], d18[1], d20[1]}, [r0] -@ vld3.32 {d17[1], d19[1], d21[1]}, [r0] - -@ FIXME: vld3.8 {d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf4] -@ FIXME: vld3.16 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x4f,0x06,0xe0,0xf4] -@ FIXME: vld3.32 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x8f,0x0a,0xe0,0xf4] -@ FIXME: vld3.16 {d16[1], d18[1], d20[1]}, [r0]@ encoding: [0x6f,0x06,0xe0,0xf4] -@ FIXME: vld3.32 {d17[1], d19[1], d21[1]}, [r0]@ encoding: [0xcf,0x1a,0xe0,0xf4] + vld3.8 {d16[1], d17[1], d18[1]}, [r1] + vld3.16 {d6[1], d7[1], d8[1]}, [r2] + vld3.32 {d1[1], d2[1], d3[1]}, [r3] + vld3.u16 {d27[2], d29[2], d31[2]}, [r4] + vld3.i32 {d6[0], d8[0], d10[0]}, [r5] + + vld3.i8 {d12[3], d13[3], d14[3]}, [r6], r1 + vld3.i16 {d11[2], d12[2], d13[2]}, [r7], r2 + vld3.u32 {d2[1], d3[1], d4[1]}, [r8], r3 + vld3.u16 {d14[2], d16[2], d18[2]}, [r9], r4 + vld3.i32 {d16[0], d18[0], d20[0]}, [r10], r5 + + vld3.p8 {d6[6], d7[6], d8[6]}, [r8]! + vld3.16 {d9[2], d10[2], d11[2]}, [r7]! + vld3.f32 {d1[1], d2[1], d3[1]}, [r6]! + vld3.p16 {d20[2], d22[2], d24[2]}, [r5]! + vld3.32 {d5[0], d7[0], d9[0]}, [r4]! + +@ CHECK: vld3.8 {d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xe1,0xf4] +@ CHECK: vld3.16 {d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0xa2,0xf4] +@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0xa3,0xf4] +@ CHECK: vld3.16 {d27[2], d29[2], d31[2]}, [r4] @ encoding: [0xaf,0xb6,0xe4,0xf4] +@ CHECK: vld3.32 {d6[0], d8[0], d10[0]}, [r5] @ encoding: [0x4f,0x6a,0xa5,0xf4] +@ CHECK: vld3.8 {d12[3], d13[3], d14[3]}, [r6], r1 @ encoding: [0x61,0xc2,0xa6,0xf4] +@ CHECK: vld3.16 {d11[2], d12[2], d13[2]}, [r7], r2 @ encoding: [0x82,0xb6,0xa7,0xf4] +@ CHECK: vld3.32 {d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0xa8,0xf4] +@ CHECK: vld3.16 {d14[2], d16[2], d18[2]}, [r9], r4 @ encoding: [0xa4,0xe6,0xa9,0xf4] +@ CHECK: vld3.32 {d16[0], d18[0], d20[0]}, [r10], r5 @ encoding: [0x45,0x0a,0xea,0xf4] +@ CHECK: vld3.8 {d6[6], d7[6], d8[6]}, [r8]! @ encoding: [0xcd,0x62,0xa8,0xf4] +@ CHECK: vld3.16 {d9[2], d10[2], d11[2]}, [r7]! @ encoding: [0x8d,0x96,0xa7,0xf4] +@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0xa6,0xf4] +@ CHECK: vld3.16 {d20[2], d21[2], d22[2]}, [r5]! @ encoding: [0xad,0x46,0xe5,0xf4] +@ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4] @ vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] Modified: llvm/trunk/test/MC/ARM/neon-vst-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vst-encoding.s?rev=148745&r1=148744&r2=148745&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vst-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vst-encoding.s Mon Jan 23 17:20:46 2012 @@ -133,41 +133,6 @@ @ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4] - vld3.8 {d16[1], d17[1], d18[1]}, [r1] - vld3.16 {d6[1], d7[1], d8[1]}, [r2] - vld3.32 {d1[1], d2[1], d3[1]}, [r3] - vld3.u16 {d27[2], d29[2], d31[2]}, [r4] - vld3.i32 {d6[0], d8[0], d10[0]}, [r5] - - vld3.i8 {d12[3], d13[3], d14[3]}, [r6], r1 - vld3.i16 {d11[2], d12[2], d13[2]}, [r7], r2 - vld3.u32 {d2[1], d3[1], d4[1]}, [r8], r3 - vld3.u16 {d14[2], d16[2], d18[2]}, [r9], r4 - vld3.i32 {d16[0], d18[0], d20[0]}, [r10], r5 - - vld3.p8 {d6[6], d7[6], d8[6]}, [r8]! - vld3.16 {d9[2], d10[2], d11[2]}, [r7]! - vld3.f32 {d1[1], d2[1], d3[1]}, [r6]! - vld3.p16 {d20[2], d22[2], d24[2]}, [r5]! - vld3.32 {d5[0], d7[0], d9[0]}, [r4]! - -@ CHECK: vld3.8 {d16[1], d17[1], d17[1]}, [r1] @ encoding: [0x2f,0x02,0xe1,0xf4] -@ CHECK: vld3.16 {d6[1], d7[1], d7[1]}, [r2] @ encoding: [0x4f,0x66,0xa2,0xf4] -@ CHECK: vld3.32 {d1[1], d2[1], d2[1]}, [r3] @ encoding: [0x8f,0x1a,0xa3,0xf4] -@ CHECK: vld3.16 {d27[2], d29[2], d29[2]}, [r4] @ encoding: [0xaf,0xb6,0xe4,0xf4] -@ CHECK: vld3.32 {d6[0], d8[0], d8[0]}, [r5] @ encoding: [0x4f,0x6a,0xa5,0xf4] -@ CHECK: vld3.8 {d12[3], d13[3], d13[3]}, [r6], r1 @ encoding: [0x61,0xc2,0xa6,0xf4] -@ CHECK: vld3.16 {d11[2], d12[2], d12[2]}, [r7], r2 @ encoding: [0x82,0xb6,0xa7,0xf4] -@ CHECK: vld3.32 {d2[1], d3[1], d3[1]}, [r8], r3 @ encoding: [0x83,0x2a,0xa8,0xf4] -@ CHECK: vld3.16 {d14[2], d16[2], d16[2]}, [r9], r4 @ encoding: [0xa4,0xe6,0xa9,0xf4] -@ CHECK: vld3.32 {d16[0], d18[0], d18[0]}, [r10], r5 @ encoding: [0x45,0x0a,0xea,0xf4] -@ CHECK: vld3.8 {d6[6], d7[6], d7[6]}, [r8]! @ encoding: [0xcd,0x62,0xa8,0xf4] -@ CHECK: vld3.16 {d9[2], d10[2], d10[2]}, [r7]! @ encoding: [0x8d,0x96,0xa7,0xf4] -@ CHECK: vld3.32 {d1[1], d2[1], d2[1]}, [r6]! @ encoding: [0x8d,0x1a,0xa6,0xf4] -@ CHECK: vld3.16 {d20[2], d21[2], d21[2]}, [r5]! @ encoding: [0xad,0x46,0xe5,0xf4] -@ CHECK: vld3.32 {d5[0], d7[0], d7[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4] - - @ vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] From mcrosier at apple.com Mon Jan 23 17:30:56 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 23 Jan 2012 15:30:56 -0800 Subject: [llvm-commits] [llvm] r148737 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp test/MC/X86/intel-syntax.s In-Reply-To: <20120123223525.B98832A6C12C@llvm.org> References: <20120123223525.B98832A6C12C@llvm.org> Message-ID: <50185F79-9B1C-408B-8960-A0782E13CA94@apple.com> Hi Devang, On Jan 23, 2012, at 2:35 PM, Devang Patel wrote: > Author: dpatel > Date: Mon Jan 23 16:35:25 2012 > New Revision: 148737 > > URL: http://llvm.org/viewvc/llvm-project?rev=148737&view=rev > Log: > Intel syntax: Robustify parsing of memory operand's displacement experssion. > > Modified: > llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp > llvm/trunk/test/MC/X86/intel-syntax.s > > Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=148737&r1=148736&r2=148737&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) > +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Mon Jan 23 16:35:25 2012 > @@ -655,8 +655,13 @@ > Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); > } else > return ErrorOperand(PlusLoc, "unexpected token after +"); > - } else if (getLexer().is(AsmToken::Identifier)) > - ParseRegister(IndexReg, Start, End); > + } else if (getLexer().is(AsmToken::Identifier)) { > + // This could be an index registor or a displacement expression. Typo: registor -> register Chad > + End = Parser.getTok().getLoc(); > + if (!IndexReg) > + ParseRegister(IndexReg, Start, End); > + else if (getParser().ParseExpression(Disp, End)) return 0; > + } > } > > if (getLexer().isNot(AsmToken::RBrac)) > > Modified: llvm/trunk/test/MC/X86/intel-syntax.s > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax.s?rev=148737&r1=148736&r2=148737&view=diff > ============================================================================== > --- llvm/trunk/test/MC/X86/intel-syntax.s (original) > +++ llvm/trunk/test/MC/X86/intel-syntax.s Mon Jan 23 16:35:25 2012 > @@ -56,7 +56,9 @@ > // CHECK: fld %st(0) > fld ST(0) > // CHECK: movl %fs:(%rdi), %eax > -mov EAX, DWORD PTR FS:[RDI] > + mov EAX, DWORD PTR FS:[RDI] > // CHECK: leal (,%rdi,4), %r8d > -lea R8D, DWORD PTR [4*RDI] > + lea R8D, DWORD PTR [4*RDI] > +// CHECK: movl _fnan(,%ecx,4), %ecx > + mov ECX, DWORD PTR [4*ECX + _fnan] > ret > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From dblaikie at gmail.com Mon Jan 23 17:27:47 2012 From: dblaikie at gmail.com (David Blaikie) Date: Mon, 23 Jan 2012 23:27:47 -0000 Subject: [llvm-commits] [llvm] r148746 - /llvm/trunk/include/llvm/Support/CommandLine.h Message-ID: <20120123232747.8CF302A6C12C@llvm.org> Author: dblaikie Date: Mon Jan 23 17:27:47 2012 New Revision: 148746 URL: http://llvm.org/viewvc/llvm-project?rev=148746&view=rev Log: Changing bitfield enums to unsigned ints. This was suggested by Chandler Carruth on the basis of past experience with esoteric compilers/quirks relating to signed enums. Modified: llvm/trunk/include/llvm/Support/CommandLine.h Modified: llvm/trunk/include/llvm/Support/CommandLine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/CommandLine.h?rev=148746&r1=148745&r2=148746&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/CommandLine.h (original) +++ llvm/trunk/include/llvm/Support/CommandLine.h Mon Jan 23 17:27:47 2012 @@ -163,12 +163,14 @@ virtual void anchor(); int NumOccurrences; // The number of times specified - enum NumOccurrencesFlag Occurrences : 3; + // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid + // with signed enums in bitfields in MSVC we'll store them as unsigned + unsigned Occurrences : 3; // enum NumOccurrencesFlag // not using the enum type for 'Value' because zero is an implementation // detail representing the non-value unsigned Value : 2; - enum OptionHidden HiddenFlag : 2; - enum FormattingFlags Formatting : 2; + unsigned HiddenFlag : 2; // enum OptionHidden + unsigned Formatting : 2; // enum FormattingFlags unsigned Misc : 3; unsigned Position; // Position of last occurrence of the option unsigned AdditionalVals;// Greater than 0 for multi-valued option. @@ -179,17 +181,17 @@ const char *ValueStr; // String describing what the value of this option is inline enum NumOccurrencesFlag getNumOccurrencesFlag() const { - return Occurrences; + return (enum NumOccurrencesFlag)Occurrences; } inline enum ValueExpected getValueExpectedFlag() const { - return Value ? static_cast(Value) + return Value ? ((enum ValueExpected)Value) : getValueExpectedFlagDefault(); } inline enum OptionHidden getOptionHiddenFlag() const { - return HiddenFlag; + return (enum OptionHidden)HiddenFlag; } inline enum FormattingFlags getFormattingFlag() const { - return Formatting; + return (enum FormattingFlags)Formatting; } inline unsigned getMiscFlags() const { return Misc; From grosbach at apple.com Mon Jan 23 17:45:44 2012 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 23 Jan 2012 23:45:44 -0000 Subject: [llvm-commits] [llvm] r148748 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/neon-vst-encoding.s Message-ID: <20120123234544.58CE12A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 17:45:44 2012 New Revision: 148748 URL: http://llvm.org/viewvc/llvm-project?rev=148748&view=rev Log: NEON VST3(multiple 3-element structures) assembly parsing. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/neon-vst-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148748&r1=148747&r2=148748&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 17:45:44 2012 @@ -6085,6 +6085,65 @@ (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VST3 multiple structurepseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VST3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VMOV takes an optional datatype suffix Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148748&r1=148747&r2=148748&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 17:45:44 2012 @@ -5173,7 +5173,7 @@ return false; } -static unsigned getRealVSTLNOpcode(unsigned Opc, unsigned &Spacing) { +static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) { switch(Opc) { default: assert(0 && "unexpected opcode!"); // VST1LN @@ -5253,10 +5253,66 @@ case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32; + + // VST3 + case ARM::VST3dWB_fixed_Asm_8: + Spacing = 1; + return ARM::VST3d8_UPD; + case ARM::VST3dWB_fixed_Asm_16: + Spacing = 1; + return ARM::VST3d16_UPD; + case ARM::VST3dWB_fixed_Asm_32: + Spacing = 1; + return ARM::VST3d32_UPD; + case ARM::VST3qWB_fixed_Asm_8: + Spacing = 2; + return ARM::VST3q8_UPD; + case ARM::VST3qWB_fixed_Asm_16: + Spacing = 2; + return ARM::VST3q16_UPD; + case ARM::VST3qWB_fixed_Asm_32: + Spacing = 2; + return ARM::VST3q32_UPD; + case ARM::VST3dWB_register_Asm_8: + Spacing = 1; + return ARM::VST3d8_UPD; + case ARM::VST3dWB_register_Asm_16: + Spacing = 1; + return ARM::VST3d16_UPD; + case ARM::VST3dWB_register_Asm_32: + Spacing = 1; + return ARM::VST3d32_UPD; + case ARM::VST3qWB_register_Asm_8: + Spacing = 2; + return ARM::VST3q8_UPD; + case ARM::VST3qWB_register_Asm_16: + Spacing = 2; + return ARM::VST3q16_UPD; + case ARM::VST3qWB_register_Asm_32: + Spacing = 2; + return ARM::VST3q32_UPD; + case ARM::VST3dAsm_8: + Spacing = 1; + return ARM::VST3d8; + case ARM::VST3dAsm_16: + Spacing = 1; + return ARM::VST3d16; + case ARM::VST3dAsm_32: + Spacing = 1; + return ARM::VST3d32; + case ARM::VST3qAsm_8: + Spacing = 2; + return ARM::VST3q8; + case ARM::VST3qAsm_16: + Spacing = 2; + return ARM::VST3q16; + case ARM::VST3qAsm_32: + Spacing = 2; + return ARM::VST3q32; } } -static unsigned getRealVLDLNOpcode(unsigned Opc, unsigned &Spacing) { +static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { switch(Opc) { default: assert(0 && "unexpected opcode!"); // VLD1LN @@ -5468,7 +5524,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5490,7 +5546,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5511,7 +5567,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5533,7 +5589,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5554,7 +5610,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(0)); // Vd @@ -5574,7 +5630,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(0)); // Vd @@ -5594,7 +5650,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn @@ -5617,7 +5673,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5644,7 +5700,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5673,7 +5729,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn @@ -5696,7 +5752,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5723,7 +5779,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5752,7 +5808,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5773,7 +5829,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5798,7 +5854,7 @@ // Shuffle the operands around so the lane index operand is in the // right place. unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5827,7 +5883,7 @@ case ARM::VLD3qAsm_32: { MCInst TmpInst; unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5849,7 +5905,7 @@ case ARM::VLD3qWB_fixed_Asm_32: { MCInst TmpInst; unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); @@ -5873,16 +5929,87 @@ case ARM::VLD3qWB_register_Asm_32: { MCInst TmpInst; unsigned Spacing; - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode(), Spacing)); + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VST3 multiple 3-element structure instructions. + case ARM::VST3dAsm_8: + case ARM::VST3dAsm_16: + case ARM::VST3dAsm_32: + case ARM::VST3qAsm_8: + case ARM::VST3qAsm_16: + case ARM::VST3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_fixed_Asm_8: + case ARM::VST3dWB_fixed_Asm_16: + case ARM::VST3dWB_fixed_Asm_32: + case ARM::VST3qWB_fixed_Asm_8: + case ARM::VST3qWB_fixed_Asm_16: + case ARM::VST3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing)); TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_register_Asm_8: + case ARM::VST3dWB_register_Asm_16: + case ARM::VST3dWB_register_Asm_32: + case ARM::VST3qWB_register_Asm_8: + case ARM::VST3qWB_register_Asm_16: + case ARM::VST3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(1)); // Rn TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn TmpInst.addOperand(Inst.getOperand(2)); // alignment TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); Inst = TmpInst; Modified: llvm/trunk/test/MC/ARM/neon-vst-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vst-encoding.s?rev=148748&r1=148747&r2=148748&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vst-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vst-encoding.s Mon Jan 23 17:45:44 2012 @@ -58,25 +58,45 @@ @ CHECK: vst2.32 {d8, d9, d10, d11}, [r0, :256]! @ encoding: [0xbd,0x83,0x00,0xf4] -@ vst3.8 {d16, d17, d18}, [r0, :64] -@ vst3.16 {d16, d17, d18}, [r0] -@ vst3.32 {d16, d17, d18}, [r0] -@ vst3.8 {d16, d18, d20}, [r0, :64]! -@ vst3.8 {d17, d19, d21}, [r0, :64]! -@ vst3.16 {d16, d18, d20}, [r0]! -@ vst3.16 {d17, d19, d21}, [r0]! -@ vst3.32 {d16, d18, d20}, [r0]! -@ vst3.32 {d17, d19, d21}, [r0]! - -@ FIXME: vst3.8 {d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf4] -@ FIXME: vst3.16 {d16, d17, d18}, [r0] @ encoding: [0x4f,0x04,0x40,0xf4] -@ FIXME: vst3.32 {d16, d17, d18}, [r0] @ encoding: [0x8f,0x04,0x40,0xf4] -@ FIXME: vst3.8 {d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4] -@ FIXME: vst3.8 {d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf4] -@ FIXME: vst3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x40,0xf4] -@ FIXME: vst3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x40,0xf4] -@ FIXME: vst3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x40,0xf4] -@ FIXME: vst3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x40,0xf4] + vst3.8 {d16, d17, d18}, [r1] + vst3.16 {d6, d7, d8}, [r2] + vst3.32 {d1, d2, d3}, [r3] + vst3.8 {d16, d18, d20}, [r0, :64] + vst3.u16 {d27, d29, d31}, [r4] + vst3.i32 {d6, d8, d10}, [r5] + + vst3.i8 {d12, d13, d14}, [r6], r1 + vst3.i16 {d11, d12, d13}, [r7], r2 + vst3.u32 {d2, d3, d4}, [r8], r3 + vst3.8 {d4, d6, d8}, [r9], r4 + vst3.u16 {d14, d16, d18}, [r9], r4 + vst3.i32 {d16, d18, d20}, [r10], r5 + + vst3.p8 {d6, d7, d8}, [r8]! + vst3.16 {d9, d10, d11}, [r7]! + vst3.f32 {d1, d2, d3}, [r6]! + vst3.8 {d16, d18, d20}, [r0, :64]! + vst3.p16 {d20, d22, d24}, [r5]! + vst3.32 {d5, d7, d9}, [r4]! + +@ CHECK: vst3.8 {d16, d17, d18}, [r1] @ encoding: [0x0f,0x04,0x41,0xf4] +@ CHECK: vst3.16 {d6, d7, d8}, [r2] @ encoding: [0x4f,0x64,0x02,0xf4] +@ CHECK: vst3.32 {d1, d2, d3}, [r3] @ encoding: [0x8f,0x14,0x03,0xf4] +@ CHECK: vst3.8 {d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x40,0xf4] +@ CHECK: vst3.16 {d27, d29, d31}, [r4] @ encoding: [0x4f,0xb5,0x44,0xf4] +@ CHECK: vst3.32 {d6, d8, d10}, [r5] @ encoding: [0x8f,0x65,0x05,0xf4] +@ CHECK: vst3.8 {d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x06,0xf4] +@ CHECK: vst3.16 {d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x07,0xf4] +@ CHECK: vst3.32 {d2, d3, d4}, [r8], r3 @ encoding: [0x83,0x24,0x08,0xf4] +@ CHECK: vst3.8 {d4, d6, d8}, [r9], r4 @ encoding: [0x04,0x45,0x09,0xf4] +@ CHECK: vst3.16 {d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x09,0xf4] +@ CHECK: vst3.32 {d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x4a,0xf4] +@ CHECK: vst3.8 {d6, d7, d8}, [r8]! @ encoding: [0x0d,0x64,0x08,0xf4] +@ CHECK: vst3.16 {d9, d10, d11}, [r7]! @ encoding: [0x4d,0x94,0x07,0xf4] +@ CHECK: vst3.32 {d1, d2, d3}, [r6]! @ encoding: [0x8d,0x14,0x06,0xf4] +@ CHECK: vst3.8 {d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4] +@ CHECK: vst3.16 {d20, d22, d24}, [r5]! @ encoding: [0x4d,0x45,0x45,0xf4] +@ CHECK: vst3.32 {d5, d7, d9}, [r4]! @ encoding: [0x8d,0x55,0x04,0xf4] @ vst4.8 {d16, d17, d18, d19}, [r0, :64] From dpatel at apple.com Mon Jan 23 17:56:33 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 23 Jan 2012 23:56:33 -0000 Subject: [llvm-commits] [llvm] r148751 - /llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Message-ID: <20120123235633.5D8492A6C12C@llvm.org> Author: dpatel Date: Mon Jan 23 17:56:33 2012 New Revision: 148751 URL: http://llvm.org/viewvc/llvm-project?rev=148751&view=rev Log: Fix typo. Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=148751&r1=148750&r2=148751&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Mon Jan 23 17:56:33 2012 @@ -656,7 +656,7 @@ } else return ErrorOperand(PlusLoc, "unexpected token after +"); } else if (getLexer().is(AsmToken::Identifier)) { - // This could be an index registor or a displacement expression. + // This could be an index register or a displacement expression. End = Parser.getTok().getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); From kcc at google.com Mon Jan 23 18:02:11 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 23 Jan 2012 16:02:11 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 3:19 PM, David Blaikie wrote: > On Mon, Jan 23, 2012 at 3:16 PM, Chandler Carruth > wrote: > > On Mon, Jan 23, 2012 at 3:04 PM, David Blaikie > wrote: > >> > >> On Mon, Jan 23, 2012 at 2:53 PM, Paul Robinson > >> wrote: > >> > On Mon, Jan 23, 2012 at 1:45 PM, Kostya Serebryany > >> > wrote: > >> >> There are 8 functions dealing with LLVMAttribute > >> >> in include/llvm-c/Core.h. > >> >> Do you suggest to add 8 more functions that will deal with uint64_t? > >> >> Like this? > >> >> > >> >> void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); > >> >> void LLVMAddFunctionAttr64(LLVMValueRef Fn, uint64_t PA); > >> >> > >> >> --kcc > >> > > >> > With a more meaningful type name, e.g. LLVMAttribute64 or some such, > >> > yes; > >> > >> Given the constraints of the stable C API (binary compatibility) > >> there's no way we can make this forwards compatible, is there? > >> (passing a struct we could add elements to in the future - that would > >> break binary compat, yes?) > > > > > > You simply have to make the struct opaque, and only manipulated through > API > > calls. However that's pretty high cost. > > Yeah - I was starting to see that. Pity - so we just add a new > generation of API calls every time we reach the limit. > So, do we go with void LLVMAddFunctionAttribute64(LLVMValueRef Fn, uint64_t PA); or with something more opaque like void LLVMAddFunctionAttribute(LLVMValueRef Fn, void *PAPtr); ? > > - David > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/68bf22d3/attachment.html From chandlerc at google.com Mon Jan 23 18:07:56 2012 From: chandlerc at google.com (Chandler Carruth) Date: Mon, 23 Jan 2012 16:07:56 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: On Mon, Jan 23, 2012 at 4:02 PM, Kostya Serebryany wrote: > > > On Mon, Jan 23, 2012 at 3:19 PM, David Blaikie wrote: > >> On Mon, Jan 23, 2012 at 3:16 PM, Chandler Carruth >> wrote: >> > On Mon, Jan 23, 2012 at 3:04 PM, David Blaikie >> wrote: >> >> >> >> On Mon, Jan 23, 2012 at 2:53 PM, Paul Robinson >> >> wrote: >> >> > On Mon, Jan 23, 2012 at 1:45 PM, Kostya Serebryany >> >> > wrote: >> >> >> There are 8 functions dealing with LLVMAttribute >> >> >> in include/llvm-c/Core.h. >> >> >> Do you suggest to add 8 more functions that will deal with uint64_t? >> >> >> Like this? >> >> >> >> >> >> void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); >> >> >> void LLVMAddFunctionAttr64(LLVMValueRef Fn, uint64_t PA); >> >> >> >> >> >> --kcc >> >> > >> >> > With a more meaningful type name, e.g. LLVMAttribute64 or some such, >> >> > yes; >> >> >> >> Given the constraints of the stable C API (binary compatibility) >> >> there's no way we can make this forwards compatible, is there? >> >> (passing a struct we could add elements to in the future - that would >> >> break binary compat, yes?) >> > >> > >> > You simply have to make the struct opaque, and only manipulated through >> API >> > calls. However that's pretty high cost. >> >> Yeah - I was starting to see that. Pity - so we just add a new >> generation of API calls every time we reach the limit. >> > > So, do we go with > void LLVMAddFunctionAttribute64(LLVMValueRef Fn, uint64_t PA); > > or with something more opaque like > void LLVMAddFunctionAttribute(LLVMValueRef Fn, void *PAPtr); > Typically you would go with something inbetween: struct LLVMAttribute; void LLVMAddFOOBARFunctionAttribute(LLVMValueRef Fn, LLVMAttribute *Attributes); Then provide Add and Remove variations for all of the attributes, substituting the attribute name for FOOBAR. I really don't know that this (very heavyweight approach) is the right way to go. I'd rather someone who is maintaining the LLVM C-APIs chime in, it may be that just upgrading the interfaces to 64-bit integers is "enough" for now. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/0a51c343/attachment.html From grosbach at apple.com Mon Jan 23 18:07:41 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 00:07:41 -0000 Subject: [llvm-commits] [llvm] r148755 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/neon-vst-encoding.s Message-ID: <20120124000742.025562A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 18:07:41 2012 New Revision: 148755 URL: http://llvm.org/viewvc/llvm-project?rev=148755&view=rev Log: NEON VST3(single element from one lane) assembly parsing. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/neon-vst-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148755&r1=148754&r2=148755&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 18:07:41 2012 @@ -6085,6 +6085,56 @@ (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VST3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + // VST3 multiple structurepseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148755&r1=148754&r2=148755&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 18:07:41 2012 @@ -5254,6 +5254,53 @@ Spacing = 2; return ARM::VST2LNq32; + // VST3LN + case ARM::VST3LNdWB_fixed_Asm_8: + Spacing = 1; + return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_fixed_Asm_16: + Spacing = 1; + return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_fixed_Asm_32: + Spacing = 1; + return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_fixed_Asm_16: + Spacing = 1; + return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_fixed_Asm_32: + Spacing = 2; + return ARM::VST3LNq32_UPD; + case ARM::VST3LNdWB_register_Asm_8: + Spacing = 1; + return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_register_Asm_16: + Spacing = 1; + return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_register_Asm_32: + Spacing = 1; + return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_register_Asm_16: + Spacing = 2; + return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_register_Asm_32: + Spacing = 2; + return ARM::VST3LNq32_UPD; + case ARM::VST3LNdAsm_8: + Spacing = 1; + return ARM::VST3LNd8; + case ARM::VST3LNdAsm_16: + Spacing = 1; + return ARM::VST3LNd16; + case ARM::VST3LNdAsm_32: + Spacing = 1; + return ARM::VST3LNd32; + case ARM::VST3LNqAsm_16: + Spacing = 2; + return ARM::VST3LNq16; + case ARM::VST3LNqAsm_32: + Spacing = 2; + return ARM::VST3LNq32; + // VST3 case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; @@ -5560,6 +5607,33 @@ Inst = TmpInst; return true; } + + case ARM::VST3LNdWB_register_Asm_8: + case ARM::VST3LNdWB_register_Asm_16: + case ARM::VST3LNdWB_register_Asm_32: + case ARM::VST3LNqWB_register_Asm_16: + case ARM::VST3LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_32: { @@ -5603,6 +5677,33 @@ Inst = TmpInst; return true; } + + case ARM::VST3LNdWB_fixed_Asm_8: + case ARM::VST3LNdWB_fixed_Asm_16: + case ARM::VST3LNdWB_fixed_Asm_32: + case ARM::VST3LNqWB_fixed_Asm_16: + case ARM::VST3LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_32: { @@ -5642,6 +5743,31 @@ Inst = TmpInst; return true; } + + case ARM::VST3LNdAsm_8: + case ARM::VST3LNdAsm_16: + case ARM::VST3LNdAsm_32: + case ARM::VST3LNqAsm_16: + case ARM::VST3LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_16: Modified: llvm/trunk/test/MC/ARM/neon-vst-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vst-encoding.s?rev=148755&r1=148754&r2=148755&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vst-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vst-encoding.s Mon Jan 23 18:07:41 2012 @@ -153,6 +153,41 @@ @ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4] + vst3.8 {d16[1], d17[1], d18[1]}, [r1] + vst3.16 {d6[1], d7[1], d8[1]}, [r2] + vst3.32 {d1[1], d2[1], d3[1]}, [r3] + vst3.u16 {d27[1], d29[1], d31[1]}, [r4] + vst3.i32 {d6[1], d8[1], d10[1]}, [r5] + + vst3.i8 {d12[1], d13[1], d14[1]}, [r6], r1 + vst3.i16 {d11[1], d12[1], d13[1]}, [r7], r2 + vst3.u32 {d2[1], d3[1], d4[1]}, [r8], r3 + vst3.u16 {d14[1], d16[1], d18[1]}, [r9], r4 + vst3.i32 {d16[1], d18[1], d20[1]}, [r10], r5 + + vst3.p8 {d6[1], d7[1], d8[1]}, [r8]! + vst3.16 {d9[1], d10[1], d11[1]}, [r7]! + vst3.f32 {d1[1], d2[1], d3[1]}, [r6]! + vst3.p16 {d20[1], d22[1], d24[1]}, [r5]! + vst3.32 {d5[1], d7[1], d9[1]}, [r4]! + +@ CHECK: vst3.8 {d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xc1,0xf4] +@ CHECK: vst3.16 {d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0x82,0xf4] +@ CHECK: vst3.32 {d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0x83,0xf4] +@ CHECK: vst3.16 {d27[1], d29[1], d31[1]}, [r4] @ encoding: [0x6f,0xb6,0xc4,0xf4] +@ CHECK: vst3.32 {d6[1], d8[1], d10[1]}, [r5] @ encoding: [0xcf,0x6a,0x85,0xf4] +@ CHECK: vst3.8 {d12[1], d13[1], d14[1]}, [r6], r1 @ encoding: [0x21,0xc2,0x86,0xf4] +@ CHECK: vst3.16 {d11[1], d12[1], d13[1]}, [r7], r2 @ encoding: [0x42,0xb6,0x87,0xf4] +@ CHECK: vst3.32 {d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0x88,0xf4] +@ CHECK: vst3.16 {d14[1], d16[1], d18[1]}, [r9], r4 @ encoding: [0x64,0xe6,0x89,0xf4] +@ CHECK: vst3.32 {d16[1], d18[1], d20[1]}, [r10], r5 @ encoding: [0xc5,0x0a,0xca,0xf4] +@ CHECK: vst3.8 {d6[1], d7[1], d8[1]}, [r8]! @ encoding: [0x2d,0x62,0x88,0xf4] +@ CHECK: vst3.16 {d9[1], d10[1], d11[1]}, [r7]! @ encoding: [0x4d,0x96,0x87,0xf4] +@ CHECK: vst3.32 {d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0x86,0xf4] +@ CHECK: vst3.16 {d20[1], d21[1], d22[1]}, [r5]! @ encoding: [0x6d,0x46,0xc5,0xf4] +@ CHECK: vst3.32 {d5[1], d7[1], d9[1]}, [r4]! @ encoding: [0xcd,0x5a,0x84,0xf4] + + @ vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] From grosbach at apple.com Mon Jan 23 18:12:39 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 00:12:39 -0000 Subject: [llvm-commits] [llvm] r148757 - /llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Message-ID: <20120124001239.42E6A2A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 18:12:39 2012 New Revision: 148757 URL: http://llvm.org/viewvc/llvm-project?rev=148757&view=rev Log: Fix typo. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148757&r1=148756&r2=148757&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 18:12:39 2012 @@ -6026,7 +6026,7 @@ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VLD3 multiple structurepseudo-instructions. These need special handling for +// VLD3 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", @@ -6135,7 +6135,7 @@ rGPR:$Rm, pred:$p)>; -// VST3 multiple structurepseudo-instructions. These need special handling for +// VST3 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", From chandlerc at gmail.com Mon Jan 23 18:30:17 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Tue, 24 Jan 2012 00:30:17 -0000 Subject: [llvm-commits] [llvm] r148759 - in /llvm/trunk: include/llvm/MC/MCAsmInfo.h lib/CodeGen/AsmPrinter/ARMException.cpp lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp test/CodeGen/ARM/ehabi-unwind.ll Message-ID: <20120124003017.548252A6C12C@llvm.org> Author: chandlerc Date: Mon Jan 23 18:30:17 2012 New Revision: 148759 URL: http://llvm.org/viewvc/llvm-project?rev=148759&view=rev Log: Revert r148686 (and r148694, a fix to it) due to a serious layering violation -- MC cannot depend on CodeGen. Specifically, the MCTargetDesc component of each target is actually a subcomponent of the MC library. As such, it cannot depend on the target-independent code generator, because MC itself cannot depend on the target-independent code generator. This change moved a flag from the ARM MCTargetDesc file ARMMCAsmInfo.cpp to the CodeGen layer in ARMException.cpp, leaving behind an 'extern' to refer back to it. That layering order isn't viable givin the constraints outlined above. Commandline flags are designed to be static specifically to avoid these types of bugs. Fixing this is likely going to require some non-trivial refactoring. Modified: llvm/trunk/include/llvm/MC/MCAsmInfo.h llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll Modified: llvm/trunk/include/llvm/MC/MCAsmInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmInfo.h?rev=148759&r1=148758&r2=148759&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmInfo.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmInfo.h Mon Jan 23 18:30:17 2012 @@ -30,7 +30,6 @@ namespace ExceptionHandling { enum ExceptionsType { None, DwarfCFI, SjLj, ARM, Win64 }; - enum ARMEHABIMode { ARMEHABIDisabled, ARMEHABIUnwind, ARMEHABIFull }; } namespace LCOMM { Modified: llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp?rev=148759&r1=148758&r2=148759&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp Mon Jan 23 18:30:17 2012 @@ -29,7 +29,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" @@ -37,18 +36,6 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -cl::opt -EnableARMEHABI("arm-enable-ehabi", cl::Hidden, - cl::desc("Generate ARM EHABI tables:"), - cl::values(clEnumValN(ExceptionHandling::ARMEHABIDisabled, "no", - "Do not generate ARM EHABI tables"), - clEnumValN(ExceptionHandling::ARMEHABIUnwind, "unwind", - "Emit unwinding instructions, but not descriptors"), - clEnumValN(ExceptionHandling::ARMEHABIFull, "full", - "Generate full ARM EHABI tables"), - clEnumValEnd)); - - ARMException::ARMException(AsmPrinter *A) : DwarfException(A), shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) @@ -85,15 +72,13 @@ Asm->OutStreamer.EmitPersonality(PerSym); } - if (EnableARMEHABI == ExceptionHandling::ARMEHABIFull) { - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - Asm->OutStreamer.EmitHandlerData(); + Asm->OutStreamer.EmitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); - } + // Emit actual exception table + EmitExceptionTable(); } Asm->OutStreamer.EmitFnEnd(); Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=148759&r1=148758&r2=148759&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Mon Jan 23 18:30:17 2012 @@ -1192,7 +1192,7 @@ } } -extern cl::opt EnableARMEHABI; +extern cl::opt EnableARMEHABI; // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. @@ -1203,8 +1203,7 @@ OutStreamer.EmitCodeRegion(); // Emit unwinding stuff for frame-related instructions - if (EnableARMEHABI != ExceptionHandling::ARMEHABIDisabled && - MI->getFlag(MachineInstr::FrameSetup)) + if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) EmitUnwindingInstruction(MI); // Do any auto-generated pseudo lowerings. Modified: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp?rev=148759&r1=148758&r2=148759&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp Mon Jan 23 18:30:17 2012 @@ -16,7 +16,10 @@ using namespace llvm; -extern cl::opt EnableARMEHABI; +cl::opt +EnableARMEHABI("arm-enable-ehabi", cl::Hidden, + cl::desc("Generate ARM EHABI tables"), + cl::init(false)); static const char *const arm_asm_table[] = { @@ -79,6 +82,6 @@ SupportsDebugInformation = true; // Exceptions handling - if (EnableARMEHABI != ExceptionHandling::ARMEHABIDisabled) + if (EnableARMEHABI) ExceptionsType = ExceptionHandling::ARM; } Modified: llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll?rev=148759&r1=148758&r2=148759&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll (original) +++ llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll Mon Jan 23 18:30:17 2012 @@ -1,8 +1,7 @@ ; Test that the EHABI unwind instruction generator does not encounter any ; unfamiliar instructions. -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi=full -disable-fp-elim -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi=full -; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi=unwind +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi define void @_Z1fv() nounwind { entry: From grosbach at apple.com Mon Jan 23 18:43:12 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 00:43:12 -0000 Subject: [llvm-commits] [llvm] r148761 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Message-ID: <20120124004313.06E142A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 18:43:12 2012 New Revision: 148761 URL: http://llvm.org/viewvc/llvm-project?rev=148761&view=rev Log: Tidy up. Remove some vertical space for readability. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148761&r1=148760&r2=148761&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 18:43:12 2012 @@ -1106,6 +1106,11 @@ return VectorList.Count == 3; } + bool isVecListFourQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 4; + } + bool isSingleSpacedVectorAllLanes() const { return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced; } @@ -5177,185 +5182,71 @@ switch(Opc) { default: assert(0 && "unexpected opcode!"); // VST1LN - case ARM::VST1LNdWB_fixed_Asm_8: - Spacing = 1; - return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_fixed_Asm_16: - Spacing = 1; - return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_fixed_Asm_32: - Spacing = 1; - return ARM::VST1LNd32_UPD; - case ARM::VST1LNdWB_register_Asm_8: - Spacing = 1; - return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_register_Asm_16: - Spacing = 1; - return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_register_Asm_32: - Spacing = 1; - return ARM::VST1LNd32_UPD; - case ARM::VST1LNdAsm_8: - Spacing = 1; - return ARM::VST1LNd8; - case ARM::VST1LNdAsm_16: - Spacing = 1; - return ARM::VST1LNd16; - case ARM::VST1LNdAsm_32: - Spacing = 1; - return ARM::VST1LNd32; + case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8; + case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16; + case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32; // VST2LN - case ARM::VST2LNdWB_fixed_Asm_8: - Spacing = 1; - return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_fixed_Asm_16: - Spacing = 1; - return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_fixed_Asm_32: - Spacing = 1; - return ARM::VST2LNd32_UPD; - case ARM::VST2LNqWB_fixed_Asm_16: - Spacing = 2; - return ARM::VST2LNq16_UPD; - case ARM::VST2LNqWB_fixed_Asm_32: - Spacing = 2; - return ARM::VST2LNq32_UPD; - - case ARM::VST2LNdWB_register_Asm_8: - Spacing = 1; - return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_register_Asm_16: - Spacing = 1; - return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_register_Asm_32: - Spacing = 1; - return ARM::VST2LNd32_UPD; - case ARM::VST2LNqWB_register_Asm_16: - Spacing = 2; - return ARM::VST2LNq16_UPD; - case ARM::VST2LNqWB_register_Asm_32: - Spacing = 2; - return ARM::VST2LNq32_UPD; - - case ARM::VST2LNdAsm_8: - Spacing = 1; - return ARM::VST2LNd8; - case ARM::VST2LNdAsm_16: - Spacing = 1; - return ARM::VST2LNd16; - case ARM::VST2LNdAsm_32: - Spacing = 1; - return ARM::VST2LNd32; - case ARM::VST2LNqAsm_16: - Spacing = 2; - return ARM::VST2LNq16; - case ARM::VST2LNqAsm_32: - Spacing = 2; - return ARM::VST2LNq32; + case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8; + case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16; + case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32; + case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16; + case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32; // VST3LN - case ARM::VST3LNdWB_fixed_Asm_8: - Spacing = 1; - return ARM::VST3LNd8_UPD; - case ARM::VST3LNdWB_fixed_Asm_16: - Spacing = 1; - return ARM::VST3LNd16_UPD; - case ARM::VST3LNdWB_fixed_Asm_32: - Spacing = 1; - return ARM::VST3LNd32_UPD; - case ARM::VST3LNqWB_fixed_Asm_16: - Spacing = 1; - return ARM::VST3LNq16_UPD; - case ARM::VST3LNqWB_fixed_Asm_32: - Spacing = 2; - return ARM::VST3LNq32_UPD; - case ARM::VST3LNdWB_register_Asm_8: - Spacing = 1; - return ARM::VST3LNd8_UPD; - case ARM::VST3LNdWB_register_Asm_16: - Spacing = 1; - return ARM::VST3LNd16_UPD; - case ARM::VST3LNdWB_register_Asm_32: - Spacing = 1; - return ARM::VST3LNd32_UPD; - case ARM::VST3LNqWB_register_Asm_16: - Spacing = 2; - return ARM::VST3LNq16_UPD; - case ARM::VST3LNqWB_register_Asm_32: - Spacing = 2; - return ARM::VST3LNq32_UPD; - case ARM::VST3LNdAsm_8: - Spacing = 1; - return ARM::VST3LNd8; - case ARM::VST3LNdAsm_16: - Spacing = 1; - return ARM::VST3LNd16; - case ARM::VST3LNdAsm_32: - Spacing = 1; - return ARM::VST3LNd32; - case ARM::VST3LNqAsm_16: - Spacing = 2; - return ARM::VST3LNq16; - case ARM::VST3LNqAsm_32: - Spacing = 2; - return ARM::VST3LNq32; + case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8; + case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16; + case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32; + case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16; + case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32; // VST3 - case ARM::VST3dWB_fixed_Asm_8: - Spacing = 1; - return ARM::VST3d8_UPD; - case ARM::VST3dWB_fixed_Asm_16: - Spacing = 1; - return ARM::VST3d16_UPD; - case ARM::VST3dWB_fixed_Asm_32: - Spacing = 1; - return ARM::VST3d32_UPD; - case ARM::VST3qWB_fixed_Asm_8: - Spacing = 2; - return ARM::VST3q8_UPD; - case ARM::VST3qWB_fixed_Asm_16: - Spacing = 2; - return ARM::VST3q16_UPD; - case ARM::VST3qWB_fixed_Asm_32: - Spacing = 2; - return ARM::VST3q32_UPD; - case ARM::VST3dWB_register_Asm_8: - Spacing = 1; - return ARM::VST3d8_UPD; - case ARM::VST3dWB_register_Asm_16: - Spacing = 1; - return ARM::VST3d16_UPD; - case ARM::VST3dWB_register_Asm_32: - Spacing = 1; - return ARM::VST3d32_UPD; - case ARM::VST3qWB_register_Asm_8: - Spacing = 2; - return ARM::VST3q8_UPD; - case ARM::VST3qWB_register_Asm_16: - Spacing = 2; - return ARM::VST3q16_UPD; - case ARM::VST3qWB_register_Asm_32: - Spacing = 2; - return ARM::VST3q32_UPD; - case ARM::VST3dAsm_8: - Spacing = 1; - return ARM::VST3d8; - case ARM::VST3dAsm_16: - Spacing = 1; - return ARM::VST3d16; - case ARM::VST3dAsm_32: - Spacing = 1; - return ARM::VST3d32; - case ARM::VST3qAsm_8: - Spacing = 2; - return ARM::VST3q8; - case ARM::VST3qAsm_16: - Spacing = 2; - return ARM::VST3q16; - case ARM::VST3qAsm_32: - Spacing = 2; - return ARM::VST3q32; + case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8; + case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16; + case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32; + case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8; + case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16; + case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32; } } @@ -5363,183 +5254,69 @@ switch(Opc) { default: assert(0 && "unexpected opcode!"); // VLD1LN - case ARM::VLD1LNdWB_fixed_Asm_8: - Spacing = 1; - return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_fixed_Asm_16: - Spacing = 1; - return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_fixed_Asm_32: - Spacing = 1; - return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdWB_register_Asm_8: - Spacing = 1; - return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_register_Asm_16: - Spacing = 1; - return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_register_Asm_32: - Spacing = 1; - return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdAsm_8: - Spacing = 1; - return ARM::VLD1LNd8; - case ARM::VLD1LNdAsm_16: - Spacing = 1; - return ARM::VLD1LNd16; - case ARM::VLD1LNdAsm_32: - Spacing = 1; - return ARM::VLD1LNd32; + case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8; + case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16; + case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32; // VLD2LN - case ARM::VLD2LNdWB_fixed_Asm_8: - Spacing = 1; - return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_fixed_Asm_16: - Spacing = 1; - return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_fixed_Asm_32: - Spacing = 1; - return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNqWB_fixed_Asm_16: - Spacing = 1; - return ARM::VLD2LNq16_UPD; - case ARM::VLD2LNqWB_fixed_Asm_32: - Spacing = 2; - return ARM::VLD2LNq32_UPD; - case ARM::VLD2LNdWB_register_Asm_8: - Spacing = 1; - return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_register_Asm_16: - Spacing = 1; - return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_register_Asm_32: - Spacing = 1; - return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNqWB_register_Asm_16: - Spacing = 2; - return ARM::VLD2LNq16_UPD; - case ARM::VLD2LNqWB_register_Asm_32: - Spacing = 2; - return ARM::VLD2LNq32_UPD; - case ARM::VLD2LNdAsm_8: - Spacing = 1; - return ARM::VLD2LNd8; - case ARM::VLD2LNdAsm_16: - Spacing = 1; - return ARM::VLD2LNd16; - case ARM::VLD2LNdAsm_32: - Spacing = 1; - return ARM::VLD2LNd32; - case ARM::VLD2LNqAsm_16: - Spacing = 2; - return ARM::VLD2LNq16; - case ARM::VLD2LNqAsm_32: - Spacing = 2; - return ARM::VLD2LNq32; + case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8; + case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16; + case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32; + case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16; + case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; // VLD3LN - case ARM::VLD3LNdWB_fixed_Asm_8: - Spacing = 1; - return ARM::VLD3LNd8_UPD; - case ARM::VLD3LNdWB_fixed_Asm_16: - Spacing = 1; - return ARM::VLD3LNd16_UPD; - case ARM::VLD3LNdWB_fixed_Asm_32: - Spacing = 1; - return ARM::VLD3LNd32_UPD; - case ARM::VLD3LNqWB_fixed_Asm_16: - Spacing = 1; - return ARM::VLD3LNq16_UPD; - case ARM::VLD3LNqWB_fixed_Asm_32: - Spacing = 2; - return ARM::VLD3LNq32_UPD; - case ARM::VLD3LNdWB_register_Asm_8: - Spacing = 1; - return ARM::VLD3LNd8_UPD; - case ARM::VLD3LNdWB_register_Asm_16: - Spacing = 1; - return ARM::VLD3LNd16_UPD; - case ARM::VLD3LNdWB_register_Asm_32: - Spacing = 1; - return ARM::VLD3LNd32_UPD; - case ARM::VLD3LNqWB_register_Asm_16: - Spacing = 2; - return ARM::VLD3LNq16_UPD; - case ARM::VLD3LNqWB_register_Asm_32: - Spacing = 2; - return ARM::VLD3LNq32_UPD; - case ARM::VLD3LNdAsm_8: - Spacing = 1; - return ARM::VLD3LNd8; - case ARM::VLD3LNdAsm_16: - Spacing = 1; - return ARM::VLD3LNd16; - case ARM::VLD3LNdAsm_32: - Spacing = 1; - return ARM::VLD3LNd32; - case ARM::VLD3LNqAsm_16: - Spacing = 2; - return ARM::VLD3LNq16; - case ARM::VLD3LNqAsm_32: - Spacing = 2; - return ARM::VLD3LNq32; + case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8; + case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16; + case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32; + case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16; + case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32; // VLD3 - case ARM::VLD3dWB_fixed_Asm_8: - Spacing = 1; - return ARM::VLD3d8_UPD; - case ARM::VLD3dWB_fixed_Asm_16: - Spacing = 1; - return ARM::VLD3d16_UPD; - case ARM::VLD3dWB_fixed_Asm_32: - Spacing = 1; - return ARM::VLD3d32_UPD; - case ARM::VLD3qWB_fixed_Asm_8: - Spacing = 2; - return ARM::VLD3q8_UPD; - case ARM::VLD3qWB_fixed_Asm_16: - Spacing = 2; - return ARM::VLD3q16_UPD; - case ARM::VLD3qWB_fixed_Asm_32: - Spacing = 2; - return ARM::VLD3q32_UPD; - case ARM::VLD3dWB_register_Asm_8: - Spacing = 1; - return ARM::VLD3d8_UPD; - case ARM::VLD3dWB_register_Asm_16: - Spacing = 1; - return ARM::VLD3d16_UPD; - case ARM::VLD3dWB_register_Asm_32: - Spacing = 1; - return ARM::VLD3d32_UPD; - case ARM::VLD3qWB_register_Asm_8: - Spacing = 2; - return ARM::VLD3q8_UPD; - case ARM::VLD3qWB_register_Asm_16: - Spacing = 2; - return ARM::VLD3q16_UPD; - case ARM::VLD3qWB_register_Asm_32: - Spacing = 2; - return ARM::VLD3q32_UPD; - case ARM::VLD3dAsm_8: - Spacing = 1; - return ARM::VLD3d8; - case ARM::VLD3dAsm_16: - Spacing = 1; - return ARM::VLD3d16; - case ARM::VLD3dAsm_32: - Spacing = 1; - return ARM::VLD3d32; - case ARM::VLD3qAsm_8: - Spacing = 2; - return ARM::VLD3q8; - case ARM::VLD3qAsm_16: - Spacing = 2; - return ARM::VLD3q16; - case ARM::VLD3qAsm_32: - Spacing = 2; - return ARM::VLD3q32; + case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8; + case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16; + case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32; + case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8; + case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16; + case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32; } } From grosbach at apple.com Mon Jan 23 18:43:17 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 00:43:17 -0000 Subject: [llvm-commits] [llvm] r148762 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.h test/MC/ARM/neon-vld-encoding.s Message-ID: <20120124004317.EAADE2A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 18:43:17 2012 New Revision: 148762 URL: http://llvm.org/viewvc/llvm-project?rev=148762&view=rev Log: NEON VLD4(multiple 4 element structures) assembly parsing. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h llvm/trunk/test/MC/ARM/neon-vld-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148762&r1=148761&r2=148762&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 18:43:17 2012 @@ -133,6 +133,15 @@ def VecListThreeQ : RegisterOperand { let ParserMatchClass = VecListThreeQAsmOperand; } +// Register list of three D registers spaced by 2 (three Q registers). +def VecListFourQAsmOperand : AsmOperandClass { + let Name = "VecListFourQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQ : RegisterOperand { + let ParserMatchClass = VecListFourQAsmOperand; +} // Register list of one D register, with "all lanes" subscripting. def VecListOneDAllLanesAsmOperand : AsmOperandClass { @@ -6196,6 +6205,65 @@ +// VLD4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VMOV takes an optional datatype suffix defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148762&r1=148761&r2=148762&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 18:43:17 2012 @@ -5317,6 +5317,26 @@ case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8; case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16; case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32; + + // VLD4 + case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8; + case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16; + case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32; + case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8; + case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16; + case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32; } } @@ -5848,6 +5868,83 @@ return true; } + // VLD4 multiple 3-element structure instructions. + case ARM::VLD4dAsm_8: + case ARM::VLD4dAsm_16: + case ARM::VLD4dAsm_32: + case ARM::VLD4qAsm_8: + case ARM::VLD4qAsm_16: + case ARM::VLD4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_fixed_Asm_8: + case ARM::VLD4dWB_fixed_Asm_16: + case ARM::VLD4dWB_fixed_Asm_32: + case ARM::VLD4qWB_fixed_Asm_8: + case ARM::VLD4qWB_fixed_Asm_16: + case ARM::VLD4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_register_Asm_8: + case ARM::VLD4dWB_register_Asm_16: + case ARM::VLD4dWB_register_Asm_32: + case ARM::VLD4qWB_register_Asm_8: + case ARM::VLD4qWB_register_Asm_16: + case ARM::VLD4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // VST3 multiple 3-element structure instructions. case ARM::VST3dAsm_8: case ARM::VST3dAsm_16: Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp?rev=148762&r1=148761&r2=148762&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp Mon Jan 23 18:43:17 2012 @@ -1096,3 +1096,15 @@ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}"; } + +void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}"; +} Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h?rev=148762&r1=148761&r2=148762&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h Mon Jan 23 18:43:17 2012 @@ -145,6 +145,8 @@ raw_ostream &O); void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm Modified: llvm/trunk/test/MC/ARM/neon-vld-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vld-encoding.s?rev=148762&r1=148761&r2=148762&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vld-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vld-encoding.s Mon Jan 23 18:43:17 2012 @@ -196,25 +196,45 @@ @ CHECK: vld3.32 {d5, d7, d9}, [r4]! @ encoding: [0x8d,0x55,0x24,0xf4] -@ vld4.8 {d16, d17, d18, d19}, [r0, :64] -@ vld4.16 {d16, d17, d18, d19}, [r0, :128] -@ vld4.32 {d16, d17, d18, d19}, [r0, :256] -@ vld4.8 {d16, d18, d20, d22}, [r0, :256]! -@ vld4.8 {d17, d19, d21, d23}, [r0, :256]! -@ vld4.16 {d16, d18, d20, d22}, [r0]! -@ vld4.16 {d17, d19, d21, d23}, [r0]! -@ vld4.32 {d16, d18, d20, d22}, [r0]! -@ vld4.32 {d17, d19, d21, d23}, [r0]! - -@ FIXME: vld4.8 {d16, d17, d18, d19}, [r0, :64]@ encoding: [0x1f,0x00,0x60,0xf4] -@ FIXME: vld4.16 {d16, d17, d18, d19}, [r0,:128]@ encoding:[0x6f,0x00,0x60,0xf4] -@ FIXME: vld4.32 {d16, d17, d18, d19}, [r0,:256]@ encoding:[0xbf,0x00,0x60,0xf4] -@ FIXME: vld4.8 {d16, d18, d20, d22}, [r0,:256]!@ encoding:[0x3d,0x01,0x60,0xf4] -@ FIXME: vld4.8 {d17, d19, d21, d23}, [r0,:256]!@ encoding:[0x3d,0x11,0x60,0xf4] -@ FIXME: vld4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4] -@ FIXME: vld4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4] -@ FIXME: vld4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4] -@ FIXME: vld4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4] + vld4.8 {d16, d17, d18, d19}, [r1, :64] + vld4.16 {d16, d17, d18, d19}, [r2, :128] + vld4.32 {d16, d17, d18, d19}, [r3, :256] + vld4.8 {d17, d19, d21, d23}, [r5, :256] + vld4.16 {d17, d19, d21, d23}, [r7] + vld4.32 {d16, d18, d20, d22}, [r8] + + vld4.s8 {d16, d17, d18, d19}, [r1, :64]! + vld4.s16 {d16, d17, d18, d19}, [r2, :128]! + vld4.s32 {d16, d17, d18, d19}, [r3, :256]! + vld4.u8 {d17, d19, d21, d23}, [r5, :256]! + vld4.u16 {d17, d19, d21, d23}, [r7]! + vld4.u32 {d16, d18, d20, d22}, [r8]! + + vld4.p8 {d16, d17, d18, d19}, [r1, :64], r8 + vld4.p16 {d16, d17, d18, d19}, [r2], r7 + vld4.f32 {d16, d17, d18, d19}, [r3, :64], r5 + vld4.i8 {d16, d18, d20, d22}, [r4, :256], r2 + vld4.i16 {d16, d18, d20, d22}, [r6], r3 + vld4.i32 {d17, d19, d21, d23}, [r9], r4 + +@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x61,0xf4] +@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x62,0xf4] +@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x63,0xf4] +@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x65,0xf4] +@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x67,0xf4] +@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x68,0xf4] +@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x61,0xf4] +@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x62,0xf4] +@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x63,0xf4] +@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x65,0xf4] +@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x67,0xf4] +@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x68,0xf4] +@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x61,0xf4] +@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x62,0xf4] +@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x63,0xf4] +@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x64,0xf4] +@ CHECK: vld4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x66,0xf4] +@ CHECK: vld4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x69,0xf4] vld1.8 {d4[]}, [r1] From chandlerc at google.com Mon Jan 23 18:47:10 2012 From: chandlerc at google.com (Chandler Carruth) Date: Mon, 23 Jan 2012 16:47:10 -0800 Subject: [llvm-commits] [llvm] r148686 - in /llvm/trunk: include/llvm/MC/MCAsmInfo.h lib/CodeGen/AsmPrinter/ARMException.cpp lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp test/CodeGen/ARM/ehabi-unwind.ll In-Reply-To: <20120123075740.9CD762A6C12C@llvm.org> References: <20120123075740.9CD762A6C12C@llvm.org> Message-ID: On Sun, Jan 22, 2012 at 11:57 PM, Evgeniy Stepanov < eugeni.stepanov at gmail.com> wrote: > Author: eugenis > Date: Mon Jan 23 01:57:39 2012 > New Revision: 148686 > > URL: http://llvm.org/viewvc/llvm-project?rev=148686&view=rev > Log: > An option to selectively enable parts of ARM EHABI support. > > This change adds an new value to the --arm-enable-ehabi option that > disables emitting unwinding descriptors. This mode gives a working > backtrace() without the (currently broken) exception support. > I've had to revert this in r148759. It violates a very important layering constraint: the MC library (of which every target's MCTargetDesc sub-component is a part) must not depend on the CodeGen library. Moving this flag into CodeGen doesn't work. See my description for more details. Frankly, I think you stumbled on a much bigger layering problem: if ARMException.cpp (and related utilities in lib/CodeGen/AsmPrinter) are needed by various parts of the MC library, they should all be moved down into the MC library. That's the library resposible for asm printing these days. I've looped in the MC experts on this thread to provide more insights. -Chandler -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/3a4d3798/attachment.html From grosbach at apple.com Mon Jan 23 18:58:13 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 00:58:13 -0000 Subject: [llvm-commits] [llvm] r148764 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/neon-vst-encoding.s Message-ID: <20120124005813.652142A6C12C@llvm.org> Author: grosbach Date: Mon Jan 23 18:58:13 2012 New Revision: 148764 URL: http://llvm.org/viewvc/llvm-project?rev=148764&view=rev Log: NEON VST4(multiple 4 element structures) assembly parsing. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/neon-vst-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148764&r1=148763&r2=148764&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Jan 23 18:58:13 2012 @@ -6264,6 +6264,66 @@ (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; + +// VST4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VST4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VMOV takes an optional datatype suffix defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148764&r1=148763&r2=148764&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jan 23 18:58:13 2012 @@ -5247,6 +5247,26 @@ case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8; case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16; case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32; + + // VST4 + case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8; + case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16; + case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32; + case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8; + case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16; + case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32; } } @@ -6016,6 +6036,83 @@ return true; } + // VST4 multiple 3-element structure instructions. + case ARM::VST4dAsm_8: + case ARM::VST4dAsm_16: + case ARM::VST4dAsm_32: + case ARM::VST4qAsm_8: + case ARM::VST4qAsm_16: + case ARM::VST4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_fixed_Asm_8: + case ARM::VST4dWB_fixed_Asm_16: + case ARM::VST4dWB_fixed_Asm_32: + case ARM::VST4qWB_fixed_Asm_8: + case ARM::VST4qWB_fixed_Asm_16: + case ARM::VST4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_register_Asm_8: + case ARM::VST4dWB_register_Asm_16: + case ARM::VST4dWB_register_Asm_32: + case ARM::VST4qWB_register_Asm_8: + case ARM::VST4qWB_register_Asm_16: + case ARM::VST4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: case ARM::t2MOVSsr: { Modified: llvm/trunk/test/MC/ARM/neon-vst-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vst-encoding.s?rev=148764&r1=148763&r2=148764&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vst-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vst-encoding.s Mon Jan 23 18:58:13 2012 @@ -99,23 +99,45 @@ @ CHECK: vst3.32 {d5, d7, d9}, [r4]! @ encoding: [0x8d,0x55,0x04,0xf4] -@ vst4.8 {d16, d17, d18, d19}, [r0, :64] -@ vst4.16 {d16, d17, d18, d19}, [r0, :128] -@ vst4.8 {d16, d18, d20, d22}, [r0, :256]! -@ vst4.8 {d17, d19, d21, d23}, [r0, :256]! -@ vst4.16 {d16, d18, d20, d22}, [r0]! -@ vst4.16 {d17, d19, d21, d23}, [r0]! -@ vst4.32 {d16, d18, d20, d22}, [r0]! -@ vst4.32 {d17, d19, d21, d23}, [r0]! - -@ FIXME: vst4.8 {d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf4] -@ FIXME: vst4.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf4] -@ FIXME: vst4.8 {d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf4] -@ FIXME: vst4.8 {d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf4] -@ FIXME: vst4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf4] -@ FIXME: vst4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf4] -@ FIXME: vst4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf4] -@ FIXME: vst4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf4] + vst4.8 {d16, d17, d18, d19}, [r1, :64] + vst4.16 {d16, d17, d18, d19}, [r2, :128] + vst4.32 {d16, d17, d18, d19}, [r3, :256] + vst4.8 {d17, d19, d21, d23}, [r5, :256] + vst4.16 {d17, d19, d21, d23}, [r7] + vst4.32 {d16, d18, d20, d22}, [r8] + + vst4.s8 {d16, d17, d18, d19}, [r1, :64]! + vst4.s16 {d16, d17, d18, d19}, [r2, :128]! + vst4.s32 {d16, d17, d18, d19}, [r3, :256]! + vst4.u8 {d17, d19, d21, d23}, [r5, :256]! + vst4.u16 {d17, d19, d21, d23}, [r7]! + vst4.u32 {d16, d18, d20, d22}, [r8]! + + vst4.p8 {d16, d17, d18, d19}, [r1, :64], r8 + vst4.p16 {d16, d17, d18, d19}, [r2], r7 + vst4.f32 {d16, d17, d18, d19}, [r3, :64], r5 + vst4.i8 {d16, d18, d20, d22}, [r4, :256], r2 + vst4.i16 {d16, d18, d20, d22}, [r6], r3 + vst4.i32 {d17, d19, d21, d23}, [r9], r4 + +@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x41,0xf4] +@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x42,0xf4] +@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x43,0xf4] +@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x45,0xf4] +@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x47,0xf4] +@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x48,0xf4] +@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x41,0xf4] +@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x42,0xf4] +@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x43,0xf4] +@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x45,0xf4] +@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x47,0xf4] +@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x48,0xf4] +@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x41,0xf4] +@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x42,0xf4] +@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x43,0xf4] +@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x44,0xf4] +@ CHECK: vst4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x46,0xf4] +@ CHECK: vst4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x49,0xf4] vst2.8 {d16[1], d17[1]}, [r0, :16] From zinob at codeaurora.org Mon Jan 23 19:11:45 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Mon, 23 Jan 2012 17:11:45 -0800 Subject: [llvm-commits] FW: Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set Message-ID: <000001ccda35$239f0e10$6add2a30$@org> Description: This contribution extends LLVM greedy Register Allocator to optimize for code size when LLVM compiler targets ARM Thumb 2 instruction set. This heuristic favors assigning register R0 through R7 to operands used in instruction that can be encoded in 16 bits (16-bit is allowed only if R0-7 are used). Operands that appear most frequently in a function (and in instructions that qualify) get R0-7 register. This heuristic is turned on by default and has impact on generated code only if -mthumb compiler switch is used. To turn this heuristic off use -disable-favor-r0-7 feature flag. This patch modifies: 1) The LLVM greedy register allocator located in LLVM/CodeGen directory: To add the new code size heuristic. 2) The ARM-specific flies located in LLVM/Target/ARM directory: To add the function that determines which instruction can be encoded in 16-bits and a fix to enable the compiler to emit CMN instruction in 16-bits encoding. 3) The LLVM test suite: fix test/CodeGen/Thumb2/thumb2-cmn.ll test. Performance impact: I focused on -Os and -mthumb flags. But observed similar improvement with -O3 and -mthumb. Runtime measured on Qualcomm 8660. Code size: - SPEC2000 benchmarks between 0 to 0.6% code size reduction (with no noticeable regression). - EEMBC benchmarks between 0 to 6% reduction (no noticeable regression). Automotive and Networking average about 1% code size reduction and Consumer about 0.5%. Runtime: - SPEC2000 between -1% and 6% speed up (Spec2k/ammp 6%) - EEMBC overall averages faster -1 to 5%. Modified: test/CodeGen/Thumb2/thumb2-cmn.ll include/llvm/Target/TargetInstrInfo.h include/llvm/CodeGen/LiveInterval.h lib/Target/ARM/Thumb2SizeReduction.cpp lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMBaseInstrInfo.h lib/CodeGen/RegAllocGreedy.cpp lib/CodeGen/CalcSpillWeights.cpp for details see RACodeSize.txt Testing: See ARMTestSuiteResult.txt and ARMSimple-Os-mthumb.txt Note -O3 is also completed on X86 and ARM CPUs -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/55b2ccc9/attachment-0001.html -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: RACodeSize.txt Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/55b2ccc9/attachment-0003.txt -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: ARMTestSuiteResult.txt Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/55b2ccc9/attachment-0004.txt -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: ARMsimple-Os-mthumb.txt Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/55b2ccc9/attachment-0005.txt From jcarter at mips.com Mon Jan 23 20:22:40 2012 From: jcarter at mips.com (Carter, Jack) Date: Tue, 24 Jan 2012 02:22:40 +0000 Subject: [llvm-commits] Resubmittal of Mips big endian changes Message-ID: <86AC779C188FE74F88F6494478B46332E95EBA@exchdb03.mips.com> Resubmittal for Mips big endian patch with minimal 64 bit support. The 64 bit support caused a rethink of how we constructed some of our objects and thus the inclusion. big_endian.patch These changes allow us to compile big endian from the commandline for 32 bit Mips targets. The biggest changes are to endianize the opcode and then the fixups. This has been tested for shared, non-shared executables through execution. They also allow for minimal Mips 64 bit object production. This have been tested through to execution for a series of simple tests for non-shared little endian. Neither big-endian nor 64 bit have gotten the extensive testing of 32 bit little endian. That will come soon. Contributers: Jack Carter ******************************************** # llvm/include/llvm/Support/ELF.h ******************************************** Some EF_MIPS_ARCH_ defines were missing and one was just wrong. This affects marking the ELF object file for target architecture. This is needed for 64 bit. ******************************************** # llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp ******************************************** In addition to the class vs. create issues described above this change also has some issues of note. In ApplyFixup the instruction (or data) stream is already endianized. Way to early for my taste, but such is it currently. I de-endianize the target object, usually a 32 bit instruction, apply the fixup, and then re-endianize it. This brings up 2 questions: 1: Why do I need to worry about the endianization here? I shouldn't. It should be handled later on right before output. 2: Why can't I use the "bits" field from: NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1; I get pointed at the whole object which has already been endianized. The fixup value is in platform endian which in our current case mean little endian. In order to do the fixup the value needs to be found in the object and converted to the target endian. The full object is endianized putting the target fixup where we expect it and the fixup value is added (not inserted) to the value already in that location. Since we had to de-endianize the whole object, we now need to re-endianize it. If I suppose since I know the object size from the relocation type I could grab the exact number of bytes needed and de-endianize them, but then I would need to break the routine up into 16,32 and 64 bit variants, probably with templates. I will have to put this into my todo list for later. What I have now seems to work on our testing. ******************************************** # llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp ******************************************** Put in 64 bit variant into MipsELFObjectWriter class parameters and passed to createELFObjectWriter() so the ELF header will get the setting. ******************************************** # llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp ******************************************** Put in big endian support. Note: I don't think it should be done this low, but it is how llvm is set up at this time. I also added a FIXME for a non-related switch case that I suspect needs to generate an assertion, but have not tested my hunch yet. ******************************************** # llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp ******************************************** I changed the TargetRegistry to invoke targets for endianess and 32/64. The most controversial part about this is that I decided to not split up the variants into subclasses and went instead with different "create" routines that changed the single class parameters. It just was less confusing to me. This hopefully will become moot as I want to instead pass a reference to the fully fleshed out SubtargetInfo class object so target information is available to me no matter which of the myriad Mips target variants I have to process and note. ******************************************** # llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h ******************************************** Declarations for .cpp changes above and some endian macros. Note: these macros should go away for the appropriate llvm variants. I didn't find ones that matched exactly what I wanted. I have a FIXME in the code for this. ******************************************** # modified: test/MC/Mips/elf-bigendian.ll <-- new file ******************************************** This is .ll instead of .s because Mips doesn't have its assembler llvm-mc port done yet. Check that the ELF header marks this as big endian Check that the text section header is present, readable and the type,flags and alignment is correct. The sh_entsize is NOT correct and I have marked that on my todo list. Check that the first words of the text section, GP prolog are correct. If the endianess is done wrong this test will fail. ******************************************** # modified: test/MC/Mips/test/MC/Mips/elf-64bitEL.ll <-- new file ******************************************** This is .ll instead of .s because Mips doesn't have its assembler llvm-mc port done yet. Check for ELF header marking this 64 bit Check that it is little endian Check that the text section header is present and the type,flags and alignment is correct. The sh_entsize is NOT correct and I have marked that on my todo list. Check that the first words of the text section, GP prolog are correct. Check that one of the sections/tables that will be affected by 64 bit is correct. I picked the symbol table since each of its' entries will be twice as big as the 32 bit flavor. # ############################################################### Jack -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/1f52f455/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: big_endian.patch Type: text/x-patch Size: 19745 bytes Desc: big_endian.patch Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/1f52f455/attachment.bin From apazos at codeaurora.org Mon Jan 23 20:35:42 2012 From: apazos at codeaurora.org (Ana Pazos) Date: Mon, 23 Jan 2012 18:35:42 -0800 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> Message-ID: <003c01ccda40$dddd4610$9997d230$@org> Hi Anton, We have internal micro benchmarks that do matrix operations (multiply, transpose, etc.) and we see up to 60% improvement when using fused multiply add/sub instructions. Regarding accuracy, what I know is that Qualcomm provides IEEE-754 2008 specified result. This means the multiply is performed without any loss of accuracy (i.e., no rounding) and then the add/subtract operation happens. The final result is rounded according to the configured rounding mode in the VFP unit. Thanks for integrating the change. Hope others find it useful. Ana. -----Original Message----- From: Anton Korobeynikov [mailto:anton at korobeynikov.info] Sent: Sunday, January 22, 2012 4:12 AM To: Ana Pazos Cc: llvm-commits at cs.uiuc.edu; rajav at codeaurora.org Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Hi Ana, Comitted as r148658. > Some ARMv7-A processor implementations (e.g, Qualcomm 8960, ARM Cortex-A5) > support fused multiply add/subtract instructions (VFMA/VFMS) which have > lower latency and greater accuracy than the chained multiply add/subtract > instructions (VMLA/VMLS). Just curious - what are the performance / accuracy wins for fma stuff here? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From craig.topper at gmail.com Mon Jan 23 21:03:17 2012 From: craig.topper at gmail.com (Craig Topper) Date: Tue, 24 Jan 2012 03:03:17 -0000 Subject: [llvm-commits] [llvm] r148771 - /llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Message-ID: <20120124030317.9C5292A6C12C@llvm.org> Author: ctopper Date: Mon Jan 23 21:03:17 2012 New Revision: 148771 URL: http://llvm.org/viewvc/llvm-project?rev=148771&view=rev Log: Add comments near load pattern fragments indicating that all integer vector loads are promoted to v2i64 or v4i64 so that no one tries to reintroduce pattern fragments for other types. Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=148771&r1=148770&r2=148771&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Jan 23 21:03:17 2012 @@ -176,11 +176,13 @@ //===----------------------------------------------------------------------===// // 128-bit load pattern fragments +// NOTE: all 128-bit integer vector loads are promoted to v2i64 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; // 256-bit load pattern fragments +// NOTE: all 256-bit integer vector loads are promoted to v4i64 def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; @@ -213,6 +215,7 @@ (f64 (alignedload node:$ptr))>; // 128-bit aligned load pattern fragments +// NOTE: all 128-bit integer vector loads are promoted to v2i64 def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; def alignedloadv2f64 : PatFrag<(ops node:$ptr), @@ -221,6 +224,7 @@ (v2i64 (alignedload node:$ptr))>; // 256-bit aligned load pattern fragments +// NOTE: all 256-bit integer vector loads are promoted to v4i64 def alignedloadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (alignedload256 node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), @@ -243,11 +247,13 @@ def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; // 128-bit memop pattern fragments +// NOTE: all 128-bit integer vector loads are promoted to v2i64 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; // 256-bit memop pattern fragments +// NOTE: all 256-bit integer vector loads are promoted to v4i64 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; From stoklund at 2pi.dk Mon Jan 23 21:17:53 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 23 Jan 2012 19:17:53 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <000001ccda35$239f0e10$6add2a30$@org> References: <000001ccda35$239f0e10$6add2a30$@org> Message-ID: <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> On Jan 23, 2012, at 5:11 PM, Zino Benaissa wrote: > Description: > This contribution extends LLVM greedy Register Allocator to optimize for code size when LLVM compiler targets ARM Thumb 2 instruction set. This heuristic favors assigning register R0 through R7 to operands used in instruction that can be encoded in 16 bits (16-bit is allowed only if R0-7 are used). Operands that appear most frequently in a function (and in instructions that qualify) get R0-7 register. > This heuristic is turned on by default and has impact on generated code only if -mthumb compiler switch is used. To turn this heuristic off use -disable-favor-r0-7 feature flag. > > This patch modifies: > 1) The LLVM greedy register allocator located in LLVM/CodeGen directory: To add the new code size heuristic. > 2) The ARM-specific flies located in LLVM/Target/ARM directory: To add the function that determines which instruction can be encoded in 16-bits and a fix to enable the compiler to emit CMN instruction in 16-bits encoding. > 3) The LLVM test suite: fix test/CodeGen/Thumb2/thumb2-cmn.ll test. Hi Zino, Thanks for working on this interesting patch. Please submit the CMN-related changes as an independent patch. If you don't mind, I would like you to run a couple of experiments to better understand why this change improves some benchmarks. First of all, is the regHasSizeImpact() hook necessary? Do you get significantly different results if you pretend this function always returns 2? Second, what happens if you use a 'flatter' spill weight? Instead of your patch, in LiveIntervals::getSpillWeight replace this: float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); with this: float lc = std::pow(1 + (100.0 / (loopDepth + 10))/5, (double)loopDepth); And in CalcSpillWeights.h, replace the number 25 in normalizeSpillWeight() with 250. Does that give you similar results? Thanks, /jakob -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/fc5f82d1/attachment.html From hfinkel at anl.gov Mon Jan 23 22:13:40 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Mon, 23 Jan 2012 22:13:40 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> Message-ID: <1327378420.32397.1603.camel@sapling> On Tue, 2012-01-17 at 13:25 -0600, Sebastian Pop wrote: > Hi, > > On Fri, Dec 30, 2011 at 3:09 AM, Tobias Grosser wrote: > > As it seems my intuition is wrong, I am very eager to see and understand > > an example where a search limit of 4000 is really needed. > > > > To make the ball roll again, I attached a testcase that can be tuned > to understand the impact on compile time for different sizes of a > basic block. One can also set the number of iterations in the loop to > 1 to test the vectorizer with no loops around. > > Hal, could you please report the compile times with/without the > vectorizer for different basic block sizes? I've looked at your test case, and I am pleased to report a negligible compile-time increase! Also, there is no vectorization of the main loop :) Here's why: (as you know) the main part of the loop is essentially one long dependency chain, and so there is nothing to vectorize there. The only vectorization opportunities come from unrolling the loop. Using the default thresholds, the loop will not even partially unroll (because the body is too large). As a result, essentially nothing happens. I've prepared a reduced version of your test case (attached). Using -unroll-threshold=300 (along with -unroll-allow-partial), I can make the loop unroll partially (the reduced loop size is 110, so this allows unrolling 2 iterations). Once this is done, the vectorizer finds candidate pairs and vectorizes [as a practical manner, you need -basicaa too]. I think that even this is probably too big for a regression test. I don't think that the basic structure really adds anything over existing tests (although I need to make sure that alias-analysis use is otherwise covered), but I'll copy-and-paste a small portion into a regression test to cover the search limit logic (which is currently uncovered). We should probably discuss different situations that we'd like to see covered in the regression suite (perhaps post-commit). Thanks for working on this! I'll post an updated patch for review shortly. -Hal > > Once this parameter is tuned, could we get this code committed to llvm? > > Thanks, > Sebastian > > PS: this testcase is also a compile time hog for GCC at -O3 when the > loop vectorizer is running. > > -- > Qualcomm Innovation Center, Inc is a member of Code Aurora Forum -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory -------------- next part -------------- ; ModuleID = '/homes/hfinkel/tmp/bbvect.c' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; This test case is based on code by Sebastian Pop. define i32 @main() nounwind uwtable readnone { entry: %A = alloca [100 x i32], align 16 %B = alloca [100 x i32], align 16 %C = alloca [100 x i32], align 16 br label %for.body for.body: ; preds = %for.body, %entry %indvars.iv21247 = phi i64 [ 0, %entry ], [ %indvars.iv.next21248, %for.body ] %arrayidx = getelementptr inbounds [100 x i32]* %A, i64 0, i64 %indvars.iv21247 %0 = trunc i64 %indvars.iv21247 to i32 store i32 %0, i32* %arrayidx, align 4, !tbaa !0 %1 = add nsw i64 %indvars.iv21247, 42 %arrayidx2 = getelementptr inbounds [100 x i32]* %B, i64 0, i64 %indvars.iv21247 %2 = trunc i64 %1 to i32 store i32 %2, i32* %arrayidx2, align 4, !tbaa !0 %3 = shl nsw i64 %indvars.iv21247, 1 %arrayidx4 = getelementptr inbounds [100 x i32]* %C, i64 0, i64 %indvars.iv21247 %4 = trunc i64 %3 to i32 store i32 %4, i32* %arrayidx4, align 4, !tbaa !0 %indvars.iv.next21248 = add i64 %indvars.iv21247, 1 %lftr.wideiv21251 = trunc i64 %indvars.iv.next21248 to i32 %exitcond21252 = icmp eq i32 %lftr.wideiv21251, 100 br i1 %exitcond21252, label %do.body9, label %for.body for.cond21230.preheader: ; preds = %do.body9 %arrayidx21236 = getelementptr inbounds [100 x i32]* %B, i64 0, i64 99 %arrayidx21234 = getelementptr inbounds [100 x i32]* %A, i64 0, i64 99 %arrayidx21239 = getelementptr inbounds [100 x i32]* %C, i64 0, i64 99 %5 = load i32* %arrayidx21236, align 4, !tbaa !0 %6 = load i32* %arrayidx21234, align 4, !tbaa !0 %add21237 = add i32 %5, %6 %7 = load i32* %arrayidx21239, align 4, !tbaa !0 %add21240 = add i32 %add21237, %7 ret i32 %add21240 do.body9: ; preds = %for.body, %do.body9 %indvars.iv = phi i64 [ %indvars.iv.next, %do.body9 ], [ 0, %for.body ] %arrayidx11 = getelementptr inbounds [100 x i32]* %B, i64 0, i64 %indvars.iv %8 = load i32* %arrayidx11, align 4, !tbaa !0 %arrayidx13 = getelementptr inbounds [100 x i32]* %C, i64 0, i64 %indvars.iv %9 = load i32* %arrayidx13, align 4, !tbaa !0 %add14 = add nsw i32 %9, %8 %arrayidx16 = getelementptr inbounds [100 x i32]* %A, i64 0, i64 %indvars.iv %mul21 = mul nsw i32 %add14, %9 %sub = sub nsw i32 %add14, %mul21 %mul41 = mul nsw i32 %add14, %sub %sub48 = sub nsw i32 %add14, %mul41 %mul62 = mul nsw i32 %add14, %sub48 %sub69 = sub nsw i32 %add14, %mul62 %mul83 = mul nsw i32 %add14, %sub69 %sub90 = sub nsw i32 %add14, %mul83 %mul104 = mul nsw i32 %add14, %sub90 %sub111 = sub nsw i32 %add14, %mul104 %mul125 = mul nsw i32 %add14, %sub111 %sub132 = sub nsw i32 %add14, %mul125 %mul146 = mul nsw i32 %add14, %sub132 %sub153 = sub nsw i32 %add14, %mul146 %mul167 = mul nsw i32 %add14, %sub153 %sub174 = sub nsw i32 %add14, %mul167 %mul188 = mul nsw i32 %add14, %sub174 %sub195 = sub nsw i32 %add14, %mul188 %mul209 = mul nsw i32 %add14, %sub195 %sub216 = sub nsw i32 %add14, %mul209 %mul231 = mul nsw i32 %add14, %sub216 %sub238 = sub nsw i32 %add14, %mul231 %mul252 = mul nsw i32 %add14, %sub238 %sub259 = sub nsw i32 %add14, %mul252 %mul273 = mul nsw i32 %add14, %sub259 %sub280 = sub nsw i32 %add14, %mul273 %mul294 = mul nsw i32 %add14, %sub280 %sub301 = sub nsw i32 %add14, %mul294 %mul315 = mul nsw i32 %add14, %sub301 %sub322 = sub nsw i32 %add14, %mul315 %mul336 = mul nsw i32 %add14, %sub322 %sub343 = sub nsw i32 %add14, %mul336 %mul357 = mul nsw i32 %add14, %sub343 %sub364 = sub nsw i32 %add14, %mul357 %mul378 = mul nsw i32 %add14, %sub364 %sub385 = sub nsw i32 %add14, %mul378 %mul399 = mul nsw i32 %add14, %sub385 %sub406 = sub nsw i32 %add14, %mul399 %mul420 = mul nsw i32 %add14, %sub406 %sub427 = sub nsw i32 %add14, %mul420 %mul443 = mul nsw i32 %add14, %sub427 %sub450 = sub nsw i32 %add14, %mul443 %mul464 = mul nsw i32 %add14, %sub450 %sub471 = sub nsw i32 %add14, %mul464 %mul485 = mul nsw i32 %add14, %sub471 %sub492 = sub nsw i32 %add14, %mul485 %mul506 = mul nsw i32 %add14, %sub492 %sub513 = sub nsw i32 %add14, %mul506 %mul527 = mul nsw i32 %add14, %sub513 %sub534 = sub nsw i32 %add14, %mul527 %mul548 = mul nsw i32 %add14, %sub534 %sub555 = sub nsw i32 %add14, %mul548 %mul569 = mul nsw i32 %add14, %sub555 %sub576 = sub nsw i32 %add14, %mul569 %mul590 = mul nsw i32 %add14, %sub576 %sub597 = sub nsw i32 %add14, %mul590 %mul611 = mul nsw i32 %add14, %sub597 %sub618 = sub nsw i32 %add14, %mul611 %mul632 = mul nsw i32 %add14, %sub618 %sub639 = sub nsw i32 %add14, %mul632 %mul655 = mul nsw i32 %add14, %sub639 %sub662 = sub nsw i32 %add14, %mul655 %mul676 = mul nsw i32 %add14, %sub662 %sub683 = sub nsw i32 %add14, %mul676 %mul697 = mul nsw i32 %add14, %sub683 %sub704 = sub nsw i32 %add14, %mul697 %mul718 = mul nsw i32 %add14, %sub704 %sub725 = sub nsw i32 %add14, %mul718 %mul739 = mul nsw i32 %add14, %sub725 %sub746 = sub nsw i32 %add14, %mul739 %mul760 = mul nsw i32 %add14, %sub746 %sub767 = sub nsw i32 %add14, %mul760 %mul781 = mul nsw i32 %add14, %sub767 %sub788 = sub nsw i32 %add14, %mul781 %mul802 = mul nsw i32 %add14, %sub788 %sub809 = sub nsw i32 %add14, %mul802 %mul823 = mul nsw i32 %add14, %sub809 %sub830 = sub nsw i32 %add14, %mul823 %mul844 = mul nsw i32 %add14, %sub830 %sub851 = sub nsw i32 %add14, %mul844 %mul867 = mul nsw i32 %add14, %sub851 %sub874 = sub nsw i32 %add14, %mul867 %mul888 = mul nsw i32 %add14, %sub874 %sub895 = sub nsw i32 %add14, %mul888 %mul909 = mul nsw i32 %add14, %sub895 %sub916 = sub nsw i32 %add14, %mul909 %mul930 = mul nsw i32 %add14, %sub916 %sub937 = sub nsw i32 %add14, %mul930 %mul951 = mul nsw i32 %add14, %sub937 %sub958 = sub nsw i32 %add14, %mul951 %mul972 = mul nsw i32 %add14, %sub958 %sub979 = sub nsw i32 %add14, %mul972 %mul993 = mul nsw i32 %add14, %sub979 %sub21179 = sub nsw i32 %add14, %mul993 %mul21193 = mul nsw i32 %add14, %sub21179 %sub21200 = sub nsw i32 %add14, %mul21193 store i32 %add14, i32* %arrayidx16, align 4, !tbaa !0 %mul21214 = mul nsw i32 %add14, %sub21200 store i32 %mul21214, i32* %arrayidx11, align 4, !tbaa !0 %sub21221 = sub nsw i32 %add14, %mul21214 store i32 %sub21221, i32* %arrayidx13, align 4, !tbaa !0 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 100 br i1 %exitcond, label %for.cond21230.preheader, label %do.body9 } !0 = metadata !{metadata !"int", metadata !1} !1 = metadata !{metadata !"omnipotent char", metadata !2} !2 = metadata !{metadata !"Simple C/C++ TBAA", null} From sabre at nondot.org Mon Jan 23 22:43:41 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 04:43:41 -0000 Subject: [llvm-commits] [llvm] r148778 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120124044341.8D1372A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 22:43:41 2012 New Revision: 148778 URL: http://llvm.org/viewvc/llvm-project?rev=148778&view=rev Log: implement the ConstantDataSequential accessor methods. No need for 'getOperand' :) Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148778&r1=148777&r2=148778&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Mon Jan 23 22:43:41 2012 @@ -597,6 +597,10 @@ inline SequentialType *getType() const { return reinterpret_cast(Value::getType()); } + + /// getElementType - Return the element type of the array/vector. + Type *getElementType() const; + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// @@ -605,6 +609,9 @@ return V->getValueID() == ConstantDataArrayVal || V->getValueID() == ConstantDataVectorVal; } +private: + uint64_t getElementByteSize() const; + const char *getElementPointer(unsigned Elt) const; }; //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148778&r1=148777&r2=148778&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Jan 23 22:43:41 2012 @@ -1919,6 +1919,40 @@ void ConstantDataArray::anchor() {} void ConstantDataVector::anchor() {} +/// getElementType - Return the element type of the array/vector. +Type *ConstantDataSequential::getElementType() const { + return getType()->getElementType(); +} + +/// isElementTypeConstantDataCompatible - Return true if this type is valid for +/// a ConstantDataSequential. This is i8/i16/i32/i64/float/double. +static bool isElementTypeConstantDataCompatible(const Type *Ty) { + if (Ty->isFloatTy() || Ty->isDoubleTy()) return true; + if (const IntegerType *IT = dyn_cast(Ty)) { + switch (IT->getBitWidth()) { + case 8: + case 16: + case 32: + case 64: + return true; + default: break; + } + } + return false; +} + +/// getElementByteSize - Return the size in bytes of the elements in the data. +uint64_t ConstantDataSequential::getElementByteSize() const { + return getElementType()->getPrimitiveSizeInBits()/8; +} + +/// getElementPointer - Return the start of the specified element. +const char *ConstantDataSequential::getElementPointer(unsigned Elt) const { + assert(Elt < getElementType()->getNumElements() && "Invalid Elt"); + return DataElements+Elt*getElementByteSize(); +} + + /// isAllZeros - return true if the array is empty or all zeros. static bool isAllZeros(StringRef Arr) { for (StringRef::iterator I = Arr.begin(), E = Arr.end(); I != E; ++I) @@ -1931,6 +1965,8 @@ /// the correct element type. We take the bytes in as an StringRef because /// we *want* an underlying "char*" to avoid TBAA type punning violations. Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { + assert(isElementTypeConstantDataCompatible(cast(Ty)-> + getElementType())); // If the elements are all zero, return a CAZ, which is more dense. if (isAllZeros(Elements)) return ConstantAggregateZero::get(Ty); @@ -1959,9 +1995,7 @@ } void ConstantDataSequential::destroyConstant() { - uint64_t ByteSize = - getType()->getElementType()->getPrimitiveSizeInBits()/8 * - getType()->getElementType()->getNumElements(); + uint64_t ByteSize = getElementByteSize() * getElementType()->getNumElements(); // Remove the constant from the StringMap. StringMap &CDSConstants = @@ -2059,6 +2093,62 @@ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } +/// getElementAsInteger - If this is a sequential container of integers (of +/// any size), return the specified element in the low bits of a uint64_t. +uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { + assert(isa(getElementType()) && + "Accessor can only be used when element is an integer"); + const char *EltPtr = getElementPointer(Elt); + + // The data is stored in host byte order, make sure to cast back to the right + // type to load with the right endianness. + switch (cast(getElementType())->getBitWidth()) { + default: assert(0 && "Invalid bitwidth for CDS"); + case 8: return *(uint8_t*)EltPtr; + case 16: return *(uint16_t*)EltPtr; + case 32: return *(uint32_t*)EltPtr; + case 64: return *(uint64_t*)EltPtr; + } +} + +/// getElementAsAPFloat - If this is a sequential container of floating point +/// type, return the specified element as an APFloat. +APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { + const char *EltPtr = getElementPointer(Elt); + + switch (getElementType()->getTypeID()) { + default: assert("Accessor can only be used when element is float/double!"); + case Type::FloatTyID: return APFloat(*(float*)EltPtr); + case Type::DoubleTyID: return APFloat(*(double*)EltPtr); + } +} + +/// getElementAsFloat - If this is an sequential container of floats, return +/// the specified element as a float. +float ConstantDataSequential::getElementAsFloat(unsigned Elt) const { + assert(getElementType()->isFloatTy() && + "Accessor can only be used when element is a 'float'"); + return *(float*)getElementPointer(Elt); +} + +/// getElementAsDouble - If this is an sequential container of doubles, return +/// the specified element as a float. +double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { + assert(getElementType()->isDoubleTy() && + "Accessor can only be used when element is a 'float'"); + return *(double*)getElementPointer(Elt); +} + +/// getElementAsConstant - Return a Constant for a specified index's element. +/// Note that this has to compute a new constant to return, so it isn't as +/// efficient as getElementAsInteger/Float/Double. +Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { + if (getElementType()->isFloatTy() || getElementType()->isDoubleTy()) + return ConstantFP::get(getContext(), getElementAsAPFloat(Elt)); + + return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); +} + From stoklund at 2pi.dk Mon Jan 23 22:44:01 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 24 Jan 2012 04:44:01 -0000 Subject: [llvm-commits] [llvm] r148779 - /llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Message-ID: <20120124044401.6010F2A6C12C@llvm.org> Author: stoklund Date: Mon Jan 23 22:44:01 2012 New Revision: 148779 URL: http://llvm.org/viewvc/llvm-project?rev=148779&view=rev Log: Preserve flags in CoalesceExtSubRegs. This won't have an effect until EliminateRegSequences() starts setting the undef flags. Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=148779&r1=148778&r2=148779&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon Jan 23 22:44:01 2012 @@ -1728,8 +1728,10 @@ CanCoalesce = false; break; } - // Keep track of one of the uses. - SomeMI = UseMI; + // Keep track of one of the uses. Preferably the first one which has a + // flag. + if (!SomeMI || UseMI->getOperand(0).isUndef()) + SomeMI = UseMI; } if (!CanCoalesce) continue; @@ -1738,7 +1740,9 @@ MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, SomeMI->getDebugLoc(), TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(DstReg, RegState::Define | + getUndefRegState(SomeMI->getOperand(0).isUndef()), + NewDstSubIdx) .addReg(SrcReg, 0, NewSrcSubIdx); // Remove all the old extract instructions. From asl at math.spbu.ru Mon Jan 23 22:58:56 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Tue, 24 Jan 2012 04:58:56 -0000 Subject: [llvm-commits] [llvm] r148782 - /llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Message-ID: <20120124045856.E06EC2A6C12C@llvm.org> Author: asl Date: Mon Jan 23 22:58:56 2012 New Revision: 148782 URL: http://llvm.org/viewvc/llvm-project?rev=148782&view=rev Log: Use correct register class for am2offset register operands. This pacifies machine verifier Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=148782&r1=148781&r2=148782&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Jan 23 22:58:56 2012 @@ -766,7 +766,7 @@ let PrintMethod = "printAddrMode2OffsetOperand"; // When using this for assembly, it's always as a post-index offset. let ParserMatchClass = PostIdxRegShiftedAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } // FIXME: am2offset_imm should only need the immediate, not the GPR. Having @@ -778,7 +778,7 @@ let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; let ParserMatchClass = AM2OffsetImmAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } From anton at korobeynikov.info Mon Jan 23 23:06:36 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 24 Jan 2012 09:06:36 +0400 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <003c01ccda40$dddd4610$9997d230$@org> References: <027a01cccb29$a4709100$ed51b300$@org> <003c01ccda40$dddd4610$9997d230$@org> Message-ID: Hi Ana, > Regarding accuracy, what I know is that Qualcomm provides IEEE-754 2008 specified result. This means the multiply is performed without any loss of accuracy (i.e., no rounding) and then the add/subtract operation happens. The final result is rounded according to the configured rounding mode in the VFP unit. In which case multiply is performed w/o loss of accuracy? FMA mode? Of usual VMLA? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From anton at korobeynikov.info Mon Jan 23 23:10:58 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 24 Jan 2012 09:10:58 +0400 Subject: [llvm-commits] [llvm] r148686 - in /llvm/trunk: include/llvm/MC/MCAsmInfo.h lib/CodeGen/AsmPrinter/ARMException.cpp lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp test/CodeGen/ARM/ehabi-unwind.ll In-Reply-To: References: <20120123075740.9CD762A6C12C@llvm.org> Message-ID: Hi Chandler, > Frankly, I think you stumbled on a much bigger layering problem: if > ARMException.cpp (and related utilities in lib/CodeGen/AsmPrinter) are > needed by various parts of the MC library, they should all be moved down > into the MC library. That's the library resposible for asm printing these > days. I've looped in the MC experts on this thread to provide more insights. Thanks on checking this! I somehow missed the violation, my fault... Also, all the stuff inside ARMException is not needed by MC in any way. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From nicholas at mxc.ca Mon Jan 23 23:19:27 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 23 Jan 2012 21:19:27 -0800 Subject: [llvm-commits] [llvm] r148715 - /llvm/trunk/lib/Object/ELFObjectFile.cpp In-Reply-To: <20120123184604.9348B2A6C12C@llvm.org> References: <20120123184604.9348B2A6C12C@llvm.org> Message-ID: <4F1E3F5F.5070909@mxc.ca> Matt Beaumont-Gay wrote: > Author: matthewbg > Date: Mon Jan 23 12:46:04 2012 > New Revision: 148715 > > URL: http://llvm.org/viewvc/llvm-project?rev=148715&view=rev > Log: > Silence warnings in -asserts build > > Modified: > llvm/trunk/lib/Object/ELFObjectFile.cpp > > Modified: llvm/trunk/lib/Object/ELFObjectFile.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/ELFObjectFile.cpp?rev=148715&r1=148714&r2=148715&view=diff > ============================================================================== > --- llvm/trunk/lib/Object/ELFObjectFile.cpp (original) > +++ llvm/trunk/lib/Object/ELFObjectFile.cpp Mon Jan 23 12:46:04 2012 > @@ -1675,6 +1675,9 @@ > bool isBinary = isa(result); > bool isDyld = isa >(result); > bool isFile = isa >(result); > + (void)isBinary; > + (void)isDyld; > + (void)isFile; > assert(isBinary&& isDyld&& isFile&& > "Type inquiry failed for ELF object!"); Please sink these values into the assert, even when it is this much code. (Or in this case, how about multiple asserts, one for each "type inquiry"?) Nick > return result; > @@ -1695,6 +1698,9 @@ > bool isBinary = isa(result); > bool isDyld = isa >(result); > bool isFile = isa >(result); > + (void)isBinary; > + (void)isDyld; > + (void)isFile; > assert(isBinary&& isFile&& !isDyld&& > "Type inquiry failed for ELF object!"); > return result; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From nicholas at mxc.ca Mon Jan 23 23:24:26 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 23 Jan 2012 21:24:26 -0800 Subject: [llvm-commits] [llvm] r147286 - in /llvm/trunk: lib/Transforms/Utils/SimplifyCFG.cpp test/Transforms/SimplifyCFG/preserve-branchweights.ll In-Reply-To: <2D02DA62-8677-4481-8127-9952932D187C@2pi.dk> References: <20111227043152.F2AE42A6C12C@llvm.org> <4F1CE13B.2090908@mxc.ca> <2D02DA62-8677-4481-8127-9952932D187C@2pi.dk> Message-ID: <4F1E408A.4000202@mxc.ca> Jakob Stoklund Olesen wrote: > > On Jan 22, 2012, at 8:25 PM, Nick Lewycky wrote: > >> So, I've tried to implement this and ended up with the patch attached. Any suggestions for improvement would be appreciated. > > Thanks, Nick. The patch looks good to me. Uh... patch looks terrible to me, but sure okay, I'll commit it. :) > BTW, are you actually benefiting from computing everything in APInt? We know the ranges of these values. Ah, hrm. You just pointed out a bug in my patch, it's possible for me to produce two values whose sums don't fit in 32-bits. As for APInt, it doesn't really matter. >> You still need the GCD stuff, otherwise you could end up with branch weights 'i32 5', 'i32 5'. That might be legal, but it's certainly not optimal. > > I don't think it actually hurts, but you get some really weird rounding behavior based on number theoretic happenstance. > > These weights really should have been floats, but we had to approximate with ints because we need reproducible results across platforms. They are approximations of real numbers, they are not intended as 'mathematical integers'. > > Let me put it this way: If the weights were floats, would you be computing the GCD of the mantissas? I don't care about taking the GCD for math reasons, I care about not storing different representations of the same data in the .bc files. Incidentally it'll also decrease the chance of the next user needing to deal with overflow in a slow-path, and that's good too, but not why I wrote it. Nick From sabre at nondot.org Mon Jan 23 23:42:11 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 05:42:11 -0000 Subject: [llvm-commits] [llvm] r148785 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120124054211.D7BC72A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 23:42:11 2012 New Revision: 148785 URL: http://llvm.org/viewvc/llvm-project?rev=148785&view=rev Log: Add some accessor methods to CAZ and UndefValue that help simplify clients. Make some CDS methods public. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148785&r1=148784&r2=148785&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Mon Jan 23 23:42:11 2012 @@ -310,10 +310,22 @@ return User::operator new(s, 0); } public: - static ConstantAggregateZero* get(Type *Ty); + static ConstantAggregateZero *get(Type *Ty); virtual void destroyConstant(); + /// getSequentialElement - If this CAZ has array or vector type, return a zero + /// with the right element type. + Constant *getSequentialElement(); + + /// getStructElement - If this CAZ has struct type, return a zero with the + /// right element type for the specified element. + Constant *getStructElement(unsigned Elt); + + /// getElementValue - Return a zero of the right value for the specified GEP + /// index. + Constant *getElementValue(Constant *C); + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// static bool classof(const ConstantAggregateZero *) { return true; } @@ -568,8 +580,12 @@ } public: - virtual void destroyConstant(); - + /// isElementTypeCompatible - Return true if a ConstantDataSequential can be + /// formed with a vector or array of the specified element type. + /// ConstantDataArray only works with normal float and int types that are + /// stored densely in memory, not with things like i42 or x86_f80. + static bool isElementTypeCompatible(const Type *Ty); + /// getElementAsInteger - If this is a sequential container of integers (of /// any size), return the specified element in the low bits of a uint64_t. uint64_t getElementAsInteger(unsigned i) const; @@ -601,7 +617,13 @@ /// getElementType - Return the element type of the array/vector. Type *getElementType() const; + /// getElementByteSize - Return the size (in bytes) of each element in the + /// array/vector. The size of the elements is known to be a multiple of one + /// byte. + uint64_t getElementByteSize() const; + virtual void destroyConstant(); + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// static bool classof(const ConstantDataSequential *) { return true; } @@ -610,7 +632,6 @@ V->getValueID() == ConstantDataVectorVal; } private: - uint64_t getElementByteSize() const; const char *getElementPointer(unsigned Elt) const; }; @@ -1074,6 +1095,18 @@ /// static UndefValue *get(Type *T); + /// getSequentialElement - If this Undef has array or vector type, return a + /// undef with the right element type. + UndefValue *getSequentialElement(); + + /// getStructElement - If this undef has struct type, return a undef with the + /// right element type for the specified element. + UndefValue *getStructElement(unsigned Elt); + + /// getElementValue - Return an undef of the right value for the specified GEP + /// index. + UndefValue *getElementValue(Constant *C); + virtual void destroyConstant(); /// Methods for support type inquiry through isa, cast, and dyn_cast: Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148785&r1=148784&r2=148785&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Jan 23 23:42:11 2012 @@ -599,6 +599,57 @@ } //===----------------------------------------------------------------------===// +// ConstantAggregateZero Implementation +//===----------------------------------------------------------------------===// + +/// getSequentialElement - If this CAZ has array or vector type, return a zero +/// with the right element type. +Constant *ConstantAggregateZero::getSequentialElement() { + return Constant::getNullValue( + cast(getType())->getElementType()); +} + +/// getStructElement - If this CAZ has struct type, return a zero with the +/// right element type for the specified element. +Constant *ConstantAggregateZero::getStructElement(unsigned Elt) { + return Constant::getNullValue( + cast(getType())->getElementType(Elt)); +} + +/// getElementValue - Return a zero of the right value for the specified GEP +/// index if we can, otherwise return null (e.g. if C is a ConstantExpr). +Constant *ConstantAggregateZero::getElementValue(Constant *C) { + if (isa(getType())) + return getSequentialElement(); + return getStructElement(cast(C)->getZExtValue()); +} + +//===----------------------------------------------------------------------===// +// UndefValue Implementation +//===----------------------------------------------------------------------===// + +/// getSequentialElement - If this undef has array or vector type, return an +/// undef with the right element type. +UndefValue *UndefValue::getSequentialElement() { + return UndefValue::get(cast(getType())->getElementType()); +} + +/// getStructElement - If this undef has struct type, return a zero with the +/// right element type for the specified element. +UndefValue *UndefValue::getStructElement(unsigned Elt) { + return UndefValue::get(cast(getType())->getElementType(Elt)); +} + +/// getElementValue - Return an undef of the right value for the specified GEP +/// index if we can, otherwise return null (e.g. if C is a ConstantExpr). +UndefValue *UndefValue::getElementValue(Constant *C) { + if (isa(getType())) + return getSequentialElement(); + return getStructElement(cast(C)->getZExtValue()); +} + + +//===----------------------------------------------------------------------===// // ConstantXXX Classes //===----------------------------------------------------------------------===// @@ -990,6 +1041,7 @@ } } + //===----------------------------------------------------------------------===// // Factory Function Implementation @@ -1004,7 +1056,7 @@ return Entry; } -/// destroyConstant - Remove the constant from the constant table... +/// destroyConstant - Remove the constant from the constant table. /// void ConstantAggregateZero::destroyConstant() { getContext().pImpl->CAZConstants.erase(getType()); @@ -1924,9 +1976,11 @@ return getType()->getElementType(); } -/// isElementTypeConstantDataCompatible - Return true if this type is valid for -/// a ConstantDataSequential. This is i8/i16/i32/i64/float/double. -static bool isElementTypeConstantDataCompatible(const Type *Ty) { +/// isElementTypeCompatible - Return true if a ConstantDataSequential can be +/// formed with a vector or array of the specified element type. +/// ConstantDataArray only works with normal float and int types that are +/// stored densely in memory, not with things like i42 or x86_f80. +bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) { if (Ty->isFloatTy() || Ty->isDoubleTy()) return true; if (const IntegerType *IT = dyn_cast(Ty)) { switch (IT->getBitWidth()) { @@ -1960,13 +2014,13 @@ return false; return true; } + /// getImpl - This is the underlying implementation of all of the /// ConstantDataSequential::get methods. They all thunk down to here, providing /// the correct element type. We take the bytes in as an StringRef because /// we *want* an underlying "char*" to avoid TBAA type punning violations. Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { - assert(isElementTypeConstantDataCompatible(cast(Ty)-> - getElementType())); + assert(isElementTypeCompatible(cast(Ty)->getElementType())); // If the elements are all zero, return a CAZ, which is more dense. if (isAllZeros(Elements)) return ConstantAggregateZero::get(Ty); From zinob at codeaurora.org Mon Jan 23 23:46:04 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Mon, 23 Jan 2012 21:46:04 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> Message-ID: <000c01ccda5b$76b208c0$64161a40$@org> Hi Jacob, Thanks for quick feedback. See my comments below, Cheers, Zino From: Jakob Stoklund Olesen [mailto:stoklund at 2pi.dk] Sent: Monday, January 23, 2012 7:18 PM To: Zino Benaissa Cc: llvm-commits at cs.uiuc.edu; rajav at codeaurora.org Subject: Re: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set On Jan 23, 2012, at 5:11 PM, Zino Benaissa wrote: Description: This contribution extends LLVM greedy Register Allocator to optimize for code size when LLVM compiler targets ARM Thumb 2 instruction set. This heuristic favors assigning register R0 through R7 to operands used in instruction that can be encoded in 16 bits (16-bit is allowed only if R0-7 are used). Operands that appear most frequently in a function (and in instructions that qualify) get R0-7 register. This heuristic is turned on by default and has impact on generated code only if -mthumb compiler switch is used. To turn this heuristic off use -disable-favor-r0-7 feature flag. This patch modifies: 1) The LLVM greedy register allocator located in LLVM/CodeGen directory: To add the new code size heuristic. 2) The ARM-specific flies located in LLVM/Target/ARM directory: To add the function that determines which instruction can be encoded in 16-bits and a fix to enable the compiler to emit CMN instruction in 16-bits encoding. 3) The LLVM test suite: fix test/CodeGen/Thumb2/thumb2-cmn.ll test. Hi Zino, Thanks for working on this interesting patch. Please submit the CMN-related changes as an independent patch. ? Sure, I can do that. If you don't mind, I would like you to run a couple of experiments to better understand why this change improves some benchmarks. ? Sure, please let me know what you find. First of all, is the regHasSizeImpact() hook necessary? Do you get significantly different results if you pretend this function always returns 2? ? For my experiments, precision is quite important to maximize code size gains. Second, what happens if you use a 'flatter' spill weight? Instead of your patch, in LiveIntervals::getSpillWeight replace this: float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); with this: float lc = std::pow(1 + (100.0 / (loopDepth + 10))/5, (double)loopDepth); And in CalcSpillWeights.h, replace the number 25 in normalizeSpillWeight() with 250. Does that give you similar results? ? I did not try this experiment. However by doing so, aren?t you increasing chances for spills to be inserted in hot blocks (like inner loops)? Thanks, /jakob -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120123/7aaa4bc1/attachment.html From sabre at nondot.org Mon Jan 23 23:43:50 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 05:43:50 -0000 Subject: [llvm-commits] [llvm] r148786 - in /llvm/trunk: include/llvm/Analysis/ConstantFolding.h lib/Analysis/ConstantFolding.cpp Message-ID: <20120124054350.58BD42A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 23:43:50 2012 New Revision: 148786 URL: http://llvm.org/viewvc/llvm-project?rev=148786&view=rev Log: Split the interesting bits of ConstantFoldLoadThroughGEPConstantExpr out into a new ConstantFoldLoadThroughGEPIndices (more useful) function and rewrite it to be simpler, more efficient, and to handle the new ConstantDataSequential type. Enhance ConstantFoldLoadFromConstPtr to handle ConstantDataSequential. Modified: llvm/trunk/include/llvm/Analysis/ConstantFolding.h llvm/trunk/lib/Analysis/ConstantFolding.cpp Modified: llvm/trunk/include/llvm/Analysis/ConstantFolding.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ConstantFolding.h?rev=148786&r1=148785&r2=148786&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/ConstantFolding.h (original) +++ llvm/trunk/include/llvm/Analysis/ConstantFolding.h Mon Jan 23 23:43:50 2012 @@ -81,7 +81,14 @@ /// getelementptr constantexpr, return the constant value being addressed by the /// constant expression, or null if something is funny and we can't decide. Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE); - + +/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr +/// indices (with an *implied* zero pointer index that is not in the list), +/// return the constant value being addressed by a virtual load, or null if +/// something is funny and we can't decide. +Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, + ArrayRef Indices); + /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. bool canConstantFoldCallTo(const Function *F); Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=148786&r1=148785&r2=148786&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Mon Jan 23 23:43:50 2012 @@ -273,7 +273,7 @@ } return false; } - + if (ConstantStruct *CS = dyn_cast(C)) { const StructLayout *SL = TD.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); @@ -347,6 +347,24 @@ return true; } + if (ConstantDataSequential *CDS = dyn_cast(C)) { + uint64_t EltSize = CDS->getElementByteSize(); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + for (; Index != CDS->getType()->getNumElements(); ++Index) { + if (!ReadDataFromGlobal(CDS->getElementAsConstant(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + if (EltSize >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= EltSize; + CurPtr += EltSize; + } + return true; + } + if (ConstantExpr *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::IntToPtr && CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) @@ -990,56 +1008,51 @@ /// constant expression, or null if something is funny and we can't decide. Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { - if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType())) + if (!CE->getOperand(1)->isNullValue()) return 0; // Do not allow stepping over the value! + SmallVector Indices(CE->getNumOperands()-2); + for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) + Indices[i-2] = CE->getOperand(i); + return ConstantFoldLoadThroughGEPIndices(C, Indices); +} + +/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr +/// indices (with an *implied* zero pointer index that is not in the list), +/// return the constant value being addressed by a virtual load, or null if +/// something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, + ArrayRef Indices) { // Loop over all of the operands, tracking down which value we are - // addressing... - gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); - for (++I; I != E; ++I) - if (StructType *STy = dyn_cast(*I)) { - ConstantInt *CU = cast(I.getOperand()); - assert(CU->getZExtValue() < STy->getNumElements() && - "Struct index out of range!"); - unsigned El = (unsigned)CU->getZExtValue(); - if (ConstantStruct *CS = dyn_cast(C)) { - C = CS->getOperand(El); - } else if (isa(C)) { - C = Constant::getNullValue(STy->getElementType(El)); - } else if (isa(C)) { - C = UndefValue::get(STy->getElementType(El)); - } else { + // addressing. + for (unsigned i = 0, e = Indices.size(); i != e; ++i) { + ConstantInt *Idx = dyn_cast(Indices[i]); + if (Idx == 0) return 0; + + uint64_t IdxVal = Idx->getZExtValue(); + + if (ConstantStruct *CS = dyn_cast(C)) { + C = CS->getOperand(IdxVal); + } else if (ConstantAggregateZero *CAZ = dyn_cast(C)){ + C = CAZ->getElementValue(Idx); + } else if (UndefValue *UV = dyn_cast(C)) { + C = UV->getElementValue(Idx); + } else if (ConstantArray *CA = dyn_cast(C)) { + if (IdxVal >= CA->getType()->getNumElements()) return 0; - } - } else if (ConstantInt *CI = dyn_cast(I.getOperand())) { - if (ArrayType *ATy = dyn_cast(*I)) { - if (CI->getZExtValue() >= ATy->getNumElements()) - return 0; - if (ConstantArray *CA = dyn_cast(C)) - C = CA->getOperand(CI->getZExtValue()); - else if (isa(C)) - C = Constant::getNullValue(ATy->getElementType()); - else if (isa(C)) - C = UndefValue::get(ATy->getElementType()); - else - return 0; - } else if (VectorType *VTy = dyn_cast(*I)) { - if (CI->getZExtValue() >= VTy->getNumElements()) - return 0; - if (ConstantVector *CP = dyn_cast(C)) - C = CP->getOperand(CI->getZExtValue()); - else if (isa(C)) - C = Constant::getNullValue(VTy->getElementType()); - else if (isa(C)) - C = UndefValue::get(VTy->getElementType()); - else - return 0; - } else { + C = CA->getOperand(IdxVal); + } else if (ConstantDataSequential *CDS=dyn_cast(C)){ + if (IdxVal >= CDS->getType()->getNumElements()) return 0; - } + C = CDS->getElementAsConstant(IdxVal); + } else if (ConstantVector *CV = dyn_cast(C)) { + if (IdxVal >= CV->getType()->getNumElements()) + return 0; + C = CV->getOperand(IdxVal); } else { return 0; } + } return C; } From sabre at nondot.org Mon Jan 23 23:49:25 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 05:49:25 -0000 Subject: [llvm-commits] [llvm] r148788 - /llvm/trunk/lib/Analysis/ScalarEvolution.cpp Message-ID: <20120124054925.26F472A6C12C@llvm.org> Author: lattner Date: Mon Jan 23 23:49:24 2012 New Revision: 148788 URL: http://llvm.org/viewvc/llvm-project?rev=148788&view=rev Log: switch SCEV to use the new ConstantFoldLoadThroughGEPIndices function instead of its own hard coded thing, allowing it to handle ConstantDataSequential and fixing some obscure bugs (e.g. it would previously crash on a CAZ of vector type). Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=148788&r1=148787&r2=148788&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Mon Jan 23 23:49:24 2012 @@ -4580,39 +4580,6 @@ return cast(Val)->getValue(); } -/// GetAddressedElementFromGlobal - Given a global variable with an initializer -/// and a GEP expression (missing the pointer index) indexing into it, return -/// the addressed element of the initializer or null if the index expression is -/// invalid. -static Constant * -GetAddressedElementFromGlobal(GlobalVariable *GV, - const std::vector &Indices) { - Constant *Init = GV->getInitializer(); - for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - uint64_t Idx = Indices[i]->getZExtValue(); - if (ConstantStruct *CS = dyn_cast(Init)) { - assert(Idx < CS->getNumOperands() && "Bad struct index!"); - Init = cast(CS->getOperand(Idx)); - } else if (ConstantArray *CA = dyn_cast(Init)) { - if (Idx >= CA->getNumOperands()) return 0; // Bogus program - Init = cast(CA->getOperand(Idx)); - } else if (isa(Init)) { - if (StructType *STy = dyn_cast(Init->getType())) { - assert(Idx < STy->getNumElements() && "Bad struct index!"); - Init = Constant::getNullValue(STy->getElementType(Idx)); - } else if (ArrayType *ATy = dyn_cast(Init->getType())) { - if (Idx >= ATy->getNumElements()) return 0; // Bogus program - Init = Constant::getNullValue(ATy->getElementType()); - } else { - llvm_unreachable("Unknown constant aggregate type!"); - } - } else { - return 0; // Unknown initializer type - } - } - return Init; -} - /// ComputeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. @@ -4640,7 +4607,7 @@ // Okay, we allow one non-constant index into the GEP instruction. Value *VarIdx = 0; - std::vector Indexes; + std::vector Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { @@ -4674,7 +4641,8 @@ // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), + Indexes); if (Result == 0) break; // Cannot compute! // Evaluate the condition for this iteration. From eli.bendersky at intel.com Tue Jan 24 01:35:28 2012 From: eli.bendersky at intel.com (Bendersky, Eli) Date: Tue, 24 Jan 2012 07:35:28 +0000 Subject: [llvm-commits] [llvm] r148653 - in /llvm/trunk: include/llvm/Object/ObjectFile.h include/llvm/Support/Endian.h lib/Object/ELFObjectFile.cpp In-Reply-To: <14B0BD87-2FF4-417D-B932-3F18E7E939B4@apple.com> References: <20120122090104.4CD0B2A6C12C@llvm.org> <14B0BD87-2FF4-417D-B932-3F18E7E939B4@apple.com> Message-ID: <9BBE4537D1BAAB479E9E8F9D4234619D322E28@HASMSX103.ger.corp.intel.com> > -----Original Message----- > From: Jim Grosbach [mailto:grosbach at apple.com] > Sent: Monday, January 23, 2012 20:38 > To: Bendersky, Eli > Cc: llvm-commits at cs.uiuc.edu > Subject: Re: [llvm-commits] [llvm] r148653 - in /llvm/trunk: > include/llvm/Object/ObjectFile.h include/llvm/Support/Endian.h > lib/Object/ELFObjectFile.cpp > > Hi Eli, > > This patch uses std::vector quite a lot. Have you considered SmallVector? It > seems likely that may be a better fit in at least some cases. > > -Jim > Hi Jim, Thanks for taking the time to review the patch. We'll be happy for the code to conform to the LLVM coding philosophy, but would be happy for some advice. We did consider SmallVector, but eventually just went with std::vector. Here is the reasoning. Currently std::vector is used in 2 places in the added code: 1. It's being passed to the constructor of DyldELFObject as a simple method of tracking memory allocations. The same vector is being passed by reference to other methods of the object. This solution is probably temporary, since we plan to eventually roll a more sophisticated memory manager (Ashok Thirumurthi explained this intention in a separate discussion earlier). This vector is created once in the calling code, and the functions in ELFObjectFile just add pointers to it with push_back. 2. Another std::vector is created in the rebaseObject method to keep track of the addresses of COMMON symbols. Note that we don't know in advance how many such symbols there are. Also note that rebaseObject is just called once per JITted module. IIUC, SmallVector's chief advantage over std::vector is that it can allocate some elements on the stack without malloc, so when we have a good guess of the maximal amount of elements the vector will contain, SmallVector provides a way to avoid a malloc. In light of the above, which use(s) of std::vector would you recommend replacing with SmallVector? Thanks in advance, Eli --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. From sabre at nondot.org Tue Jan 24 01:54:10 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 07:54:10 -0000 Subject: [llvm-commits] [llvm] r148790 - in /llvm/trunk: include/llvm/Constants.h lib/Analysis/ValueTracking.cpp lib/VMCore/Constants.cpp Message-ID: <20120124075410.A49302A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 01:54:10 2012 New Revision: 148790 URL: http://llvm.org/viewvc/llvm-project?rev=148790&view=rev Log: teach valuetracking about ConstantDataSequential Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/Analysis/ValueTracking.cpp llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148790&r1=148789&r2=148790&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 01:54:10 2012 @@ -325,7 +325,11 @@ /// getElementValue - Return a zero of the right value for the specified GEP /// index. Constant *getElementValue(Constant *C); - + + /// getElementValue - Return a zero of the right value for the specified GEP + /// index. + Constant *getElementValue(unsigned Idx); + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// static bool classof(const ConstantAggregateZero *) { return true; } @@ -1106,7 +1110,11 @@ /// getElementValue - Return an undef of the right value for the specified GEP /// index. UndefValue *getElementValue(Constant *C); - + + /// getElementValue - Return an undef of the right value for the specified GEP + /// index. + UndefValue *getElementValue(unsigned Idx); + virtual void destroyConstant(); /// Methods for support type inquiry through isa, cast, and dyn_cast: Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=148790&r1=148789&r2=148790&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp (original) +++ llvm/trunk/lib/Analysis/ValueTracking.cpp Tue Jan 24 01:54:10 2012 @@ -100,6 +100,19 @@ } return; } + if (ConstantDataSequential *CDS = dyn_cast(V)) { + // We know that CDS must be a vector of integers. Take the intersection of + // each element. + KnownZero.setAllBits(); KnownOne.setAllBits(); + APInt Elt(KnownZero.getBitWidth(), 0); + for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + Elt = CDS->getElementAsInteger(i); + KnownZero &= ~Elt; + KnownOne &= Elt; + } + return; + } + // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); @@ -1472,50 +1485,51 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our - // recursion) + // recursion). if (idx_range.empty()) return V; - // We have indices, so V should have an indexable type - assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) - && "Not looking at a struct or array?"); - assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) - && "Invalid indices for type?"); + // We have indices, so V should have an indexable type. + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && + "Not looking at a struct or array?"); + assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && + "Invalid indices for type?"); CompositeType *PTy = cast(V->getType()); if (isa(V)) - return UndefValue::get(ExtractValueInst::getIndexedType(PTy, - idx_range)); - else if (isa(V)) + return UndefValue::get(ExtractValueInst::getIndexedType(PTy, idx_range)); + if (isa(V)) return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, idx_range)); - else if (Constant *C = dyn_cast(V)) { - if (isa(C) || isa(C)) - // Recursively process this constant - return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1), - InsertBefore); - } else if (InsertValueInst *I = dyn_cast(V)) { + if (isa(V) || isa(V)) + // Recursively process this constant + return FindInsertedValue(cast(V)->getOperand(idx_range[0]), + idx_range.slice(1), InsertBefore); + if (ConstantDataSequential *CDS = dyn_cast(V)) + return CDS->getElementAsConstant(idx_range[0]); + + if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices const unsigned *req_idx = idx_range.begin(); for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); i != e; ++i, ++req_idx) { if (req_idx == idx_range.end()) { - if (InsertBefore) - // The requested index identifies a part of a nested aggregate. Handle - // this specially. For example, - // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 - // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 - // %C = extractvalue {i32, { i32, i32 } } %B, 1 - // This can be changed into - // %A = insertvalue {i32, i32 } undef, i32 10, 0 - // %C = insertvalue {i32, i32 } %A, i32 11, 1 - // which allows the unused 0,0 element from the nested struct to be - // removed. - return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), - InsertBefore); - else - // We can't handle this without inserting insertvalues + // We can't handle this without inserting insertvalues + if (!InsertBefore) return 0; + + // The requested index identifies a part of a nested aggregate. Handle + // this specially. For example, + // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 + // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 + // %C = extractvalue {i32, { i32, i32 } } %B, 1 + // This can be changed into + // %A = insertvalue {i32, i32 } undef, i32 10, 0 + // %C = insertvalue {i32, i32 } %A, i32 11, 1 + // which allows the unused 0,0 element from the nested struct to be + // removed. + return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), + InsertBefore); } // This insert value inserts something else than what we are looking for. @@ -1531,7 +1545,9 @@ return FindInsertedValue(I->getInsertedValueOperand(), makeArrayRef(req_idx, idx_range.end()), InsertBefore); - } else if (ExtractValueInst *I = dyn_cast(V)) { + } + + if (ExtractValueInst *I = dyn_cast(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148790&r1=148789&r2=148790&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 01:54:10 2012 @@ -624,6 +624,15 @@ return getStructElement(cast(C)->getZExtValue()); } +/// getElementValue - Return a zero of the right value for the specified GEP +/// index. +Constant *ConstantAggregateZero::getElementValue(unsigned Idx) { + if (isa(getType())) + return getSequentialElement(); + return getStructElement(Idx); +} + + //===----------------------------------------------------------------------===// // UndefValue Implementation //===----------------------------------------------------------------------===// @@ -648,6 +657,15 @@ return getStructElement(cast(C)->getZExtValue()); } +/// getElementValue - Return an undef of the right value for the specified GEP +/// index. +UndefValue *UndefValue::getElementValue(unsigned Idx) { + if (isa(getType())) + return getSequentialElement(); + return getStructElement(Idx); +} + + //===----------------------------------------------------------------------===// // ConstantXXX Classes From james.molloy at arm.com Tue Jan 24 02:36:52 2012 From: james.molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 08:36:52 -0000 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSubtarge In-Reply-To: <20120122120733.DC7EF2A6C12C@llvm.org> References: <20120122120733.DC7EF2A6C12C@llvm.org> Message-ID: <000c01ccda73$52b639b0$f822ad10$@molloy@arm.com> Hi Anton, I'm not 100% happy with this patch. The implementation means that -mattr=+neon,+vfp4 does not enable neon-vfp4, which I think is rather confusing. I'd much prefer the predicate to be rewritten such that hasNEONVFP4() === hasNEONVFP4 || (hasVFP4 && hasNEON). Cheers, James -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Anton Korobeynikov Sent: 22 January 2012 12:08 To: llvm-commits at cs.uiuc.edu Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSubtarget... Author: asl Date: Sun Jan 22 06:07:33 2012 New Revision: 148658 URL: http://llvm.org/viewvc/llvm-project?rev=148658&view=rev Log: Add fused multiple+add instructions from VFPv4. Patch by Ana Pazos! Added: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Modified: llvm/trunk/lib/Target/ARM/ARM.td llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMInstrVFP.td llvm/trunk/lib/Target/ARM/ARMSchedule.td llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp llvm/trunk/lib/Target/ARM/ARMSubtarget.h Modified: llvm/trunk/lib/Target/ARM/ARM.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=148 658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARM.td (original) +++ llvm/trunk/lib/Target/ARM/ARM.td Sun Jan 22 06:07:33 2012 @@ -32,9 +32,15 @@ def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; +def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true", + "Enable NEON-VFP4 instructions", + [FeatureVFP4, FeatureNEON]>; def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter. cpp?rev=148658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Sun Jan 22 06:07:33 2012 @@ -719,15 +719,25 @@ if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/vfpv3/vfpv2 for .fpu parameters */ - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); + * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (Subtarget->hasNEONVFP4()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); + else + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); /* If emitted for NEON, omit from VFP below, since you can have both * NEON and VFP in build attributes but only one .fpu */ emitFPU = false; } + /* VFPv4 + .fpu */ + if (Subtarget->hasVFP4()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv4A); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4"); + /* VFPv3 + .fpu */ - if (Subtarget->hasVFP3()) { + } else if (Subtarget->hasVFP3()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv3A); if (emitFPU) Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.t d?rev=148658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sun Jan 22 06:07:33 2012 @@ -179,8 +179,14 @@ AssemblerPredicate<"FeatureVFP2">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3">; +def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, + AssemblerPredicate<"FeatureVFP4">; +def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; +def HasNEONVFP4 : Predicate<"Subtarget->hasNEONVFP4()">, + AssemblerPredicate<"FeatureNEONVFP4">; +def NoNEONVFP4 : Predicate<"!Subtarget->hasNEONVFP4()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; def HasDivide : Predicate<"Subtarget->hasDivide()">, Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.t d?rev=148658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Sun Jan 22 06:07:33 2012 @@ -3897,10 +3897,10 @@ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -3955,10 +3955,10 @@ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4007,6 +4007,24 @@ "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEONVFP4]>; + +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", + v4f32, fmul_su, fadd_mlx>, + Requires<[HasNEONVFP4]>; + +// Fused Vector Multiply Subtract (floating-point) +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEONVFP4]>; +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", + v4f32, fmul_su, fsub_mlx>, + Requires<[HasNEONVFP4]>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -5358,9 +5376,13 @@ def : N3VSPat; def : N3VSPat; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; +def : N3VSMulOpPat, + Requires<[HasNEONVFP4, UseNEONForFP]>; +def : N3VSMulOpPat, + Requires<[HasNEONVFP4, UseNEONForFP]>; def : N2VSPat; def : N2VSPat; def : N3VSPat; Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td ?rev=148658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Sun Jan 22 06:07:33 2012 @@ -920,7 +920,7 @@ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -928,7 +928,7 @@ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -936,10 +936,10 @@ def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -947,7 +947,7 @@ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -955,7 +955,7 @@ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -963,10 +963,10 @@ def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -974,7 +974,7 @@ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -982,7 +982,7 @@ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -990,10 +990,10 @@ def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1001,14 +1001,14 @@ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1016,11 +1016,116 @@ def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; +//===---------------------------------------------------------------------- ===// +// Fused FP Multiply-Accumulate Operations. +// +def VFMAD : ADbI<0b11101, 0b10, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4]>; + +def VFMAS : ASbIn<0b11101, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4]>; +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP]>; + +def VFMSD : ADbI<0b11101, 0b10, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4]>; + +def VFMSS : ASbIn<0b11101, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4]>; +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP]>; + +def VFNMAD : ADbI<0b11101, 0b01, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4]>; + +def VFNMAS : ASbI<0b11101, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), + (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4]>; +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), + (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP]>; + +def VFNMSD : ADbI<0b11101, 0b01, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4]>; + +def VFNMSS : ASbI<0b11101, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), + (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4]>; +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), + (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP]>; //===----------------------------------------------------------------------= ==// // FP Conditional moves. Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td ?rev=148658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original) +++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Sun Jan 22 06:07:33 2012 @@ -118,6 +118,8 @@ def IIC_fpMUL64 : InstrItinClass; def IIC_fpMAC32 : InstrItinClass; def IIC_fpMAC64 : InstrItinClass; +def IIC_fpFMAC32 : InstrItinClass; +def IIC_fpFMAC64 : InstrItinClass; def IIC_fpDIV32 : InstrItinClass; def IIC_fpDIV64 : InstrItinClass; def IIC_fpSQRT32 : InstrItinClass; @@ -208,6 +210,8 @@ def IIC_VPERMQ3 : InstrItinClass; def IIC_VMACD : InstrItinClass; def IIC_VMACQ : InstrItinClass; +def IIC_VFMACD : InstrItinClass; +def IIC_VFMACQ : InstrItinClass; def IIC_VRECSD : InstrItinClass; def IIC_VRECSQ : InstrItinClass; def IIC_VCNTiD : InstrItinClass; Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.c pp?rev=148658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Sun Jan 22 06:07:33 2012 @@ -47,7 +47,9 @@ , HasV7Ops(false) , HasVFPv2(false) , HasVFPv3(false) + , HasVFPv4(false) , HasNEON(false) + , HasNEONVFPv4(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h ?rev=148658&r1=148657&r2=148658&view=diff ============================================================================ == --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original) +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Sun Jan 22 06:07:33 2012 @@ -45,11 +45,13 @@ bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are - /// supported. + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what + /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; + bool HasVFPv4; bool HasNEON; + bool HasNEONVFPv4; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -197,7 +199,9 @@ bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } + bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } + bool hasNEONVFP4() const { return HasNEONVFPv4; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } Added: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll? rev=148658&view=auto ============================================================================ == --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (added) +++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Sun Jan 22 06:07:33 2012 @@ -0,0 +1,68 @@ +; RUN: llc < %s -march=arm -mattr=+neon-vfpv4 | FileCheck %s +; Check generated fused MAC and MLS. + +define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readnone noinline { +;CHECK: fusedMACTest1: +;CHECK: vfma.f64 + %1 = fmul double %d1, %d2 + %2 = fadd double %1, %d3 + ret double %2 +} + +define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone noinline { +;CHECK: fusedMACTest2: +;CHECK: vfma.f32 + %1 = fmul float %f1, %f2 + %2 = fadd float %1, %f3 + ret float %2 +} + +define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readnone noinline { +;CHECK: fusedMACTest3: +;CHECK: vfms.f64 + %1 = fmul double %d2, %d3 + %2 = fsub double %d1, %1 + ret double %2 +} + +define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone noinline { +;CHECK: fusedMACTest4: +;CHECK: vfms.f32 + %1 = fmul float %f2, %f3 + %2 = fsub float %f1, %1 + ret float %2 +} + +define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readnone noinline { +;CHECK: fusedMACTest5: +;CHECK: vfnma.f64 + %1 = fmul double %d1, %d2 + %2 = fsub double -0.0, %1 + %3 = fsub double %2, %d3 + ret double %3 +} + +define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind { +;CHECK: fusedMACTest6: +;CHECK: vfnma.f32 + %1 = fmul float %f1, %f2 + %2 = fsub float -0.0, %1 + %3 = fsub float %2, %f3 + ret float %3 +} + +define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind { +;CHECK: fusedMACTest7: +;CHECK: vfnms.f64 + %1 = fmul double %d1, %d2 + %2 = fsub double %1, %d3 + ret double %2 +} + +define float @fusedMACTest8(float %f1, float %f2, float %f3) nounwind { +;CHECK: fusedMACTest8: +;CHECK: vfnms.f32 + %1 = fmul float %f1, %f2 + %2 = fsub float %1, %f3 + ret float %2 +} _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From james.molloy at arm.com Tue Jan 24 02:38:06 2012 From: james.molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 08:38:06 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> Message-ID: <000d01ccda73$7e93a040$7bbae0c0$@molloy@arm.com> Hi Ana, [Forwarding my review comments to this thread because I originally replied to the commit itself] I'm not 100% happy with this patch. The implementation means that -mattr=+neon,+vfp4 does not enable neon-vfp4, which I think is rather confusing. I'd much prefer the predicate to be rewritten such that hasNEONVFP4() === hasNEONVFP4 || (hasVFP4 && hasNEON), what do you think? Cheers, James -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Anton Korobeynikov Sent: 22 January 2012 12:12 To: Ana Pazos Cc: rajav at codeaurora.org; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Hi Ana, Comitted as r148658. > Some ARMv7-A processor implementations (e.g, Qualcomm 8960, ARM Cortex-A5) > support fused multiply add/subtract instructions (VFMA/VFMS) which have > lower latency and greater accuracy than the chained multiply add/subtract > instructions (VMLA/VMLS). Just curious - what are the performance / accuracy wins for fma stuff here? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From anton at korobeynikov.info Tue Jan 24 02:47:07 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 24 Jan 2012 12:47:07 +0400 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> Message-ID: Hi James, > I'm not 100% happy with this patch. The implementation means that -mattr=+neon,+vfp4 does not enable neon-vfp4, which I think is rather confusing. > I'd much prefer the predicate to be rewritten such that hasNEONVFP4() === hasNEONVFP4 || (hasVFP4 && hasNEON), what do you think? Is it possible for a core to have vfpv4, but have "previous" neon? VFPv3-like? Or it's pretty tied situation? So, on VFPv4 core NEON will also always be VFPv4 ? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From eugeni.stepanov at gmail.com Tue Jan 24 02:48:49 2012 From: eugeni.stepanov at gmail.com (Evgeniy Stepanov) Date: Tue, 24 Jan 2012 12:48:49 +0400 Subject: [llvm-commits] [llvm] r148686 - in /llvm/trunk: include/llvm/MC/MCAsmInfo.h lib/CodeGen/AsmPrinter/ARMException.cpp lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp test/CodeGen/ARM/ehabi-unwind.ll In-Reply-To: References: <20120123075740.9CD762A6C12C@llvm.org> Message-ID: On Tue, Jan 24, 2012 at 9:10 AM, Anton Korobeynikov wrote: > Hi Chandler, > >> Frankly, I think you stumbled on a much bigger layering problem: if >> ARMException.cpp (and related utilities in lib/CodeGen/AsmPrinter) are >> needed by various parts of the MC library, they should all be moved down >> into the MC library. That's the library resposible for asm printing these >> days. I've looped in the MC experts on this thread to provide more insights. > Thanks on checking this! I somehow missed the violation, my fault... > Also, all the stuff inside ARMException is not needed by MC in any way. The problem is, ARM-specific code in ARMException.cpp can not depend on ARM-specific code in ARM/MCTargetDesc, because the former is, at the same time, a part of the platform-independent libLLVMAsmPrinter.a. So we can not move the option back to MC, either. I think it would be better to return to the idea of 2 options: -arm-enable-ehabi and -arm-enable-ehabi-descriptors. Note that these are hidden options for partially implemented functionality, so the long and maybe cumbersome names do not really matter. WDYT? From james.molloy at arm.com Tue Jan 24 03:01:15 2012 From: james.molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 09:01:15 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> Message-ID: <000e01ccda76$ba8d99e0$2fa8cda0$@molloy@arm.com> Hi Anton, Had to look this up to confirm, but in the ARMARM (V7-AR, 1.4.1) it states: "if the implementation includes VFPv4 it must include Advanced SIMDv2." So my opinion on the predicates in LLVM is that they should reflect how the ARMARM describes the architecture. That is: * VFP4 does not imply NEON. * NEONv2 does not imply VFP4. * VFP4 && NEON implies NEON is V2. Refactoring the patch to reflect this would make it a lot easier for a user to map their feature selection to the description in the ARMARM, I think. Also, the patch doesn't hook the VMLAs up to "Requires<[UseFPVMLx]>" - is there any reason for this? I know that flag isn't really used but when we do hook VMLAs up to fast-math or disable-excess-fp-precision, it'd be nice to have all implementations orthogonal. Cheers, James -----Original Message----- From: Anton Korobeynikov [mailto:anton at korobeynikov.info] Sent: 24 January 2012 08:47 To: James Molloy Cc: Ana Pazos; rajav at codeaurora.org; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Hi James, > I'm not 100% happy with this patch. The implementation means that -mattr=+neon,+vfp4 does not enable neon-vfp4, which I think is rather confusing. > I'd much prefer the predicate to be rewritten such that hasNEONVFP4() === hasNEONVFP4 || (hasVFP4 && hasNEON), what do you think? Is it possible for a core to have vfpv4, but have "previous" neon? VFPv3-like? Or it's pretty tied situation? So, on VFPv4 core NEON will also always be VFPv4 ? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From sabre at nondot.org Tue Jan 24 02:58:58 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 08:58:58 -0000 Subject: [llvm-commits] [llvm] r148791 - /llvm/trunk/include/llvm/ADT/StringRef.h Message-ID: <20120124085858.1CAFC2A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 02:58:57 2012 New Revision: 148791 URL: http://llvm.org/viewvc/llvm-project?rev=148791&view=rev Log: add ::drop_back() and ::drop_front() methods, which are like pop_front/pop_back on a vector, but a) aren't destructive to "this", and b) can take a # elements to drop. Modified: llvm/trunk/include/llvm/ADT/StringRef.h Modified: llvm/trunk/include/llvm/ADT/StringRef.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringRef.h?rev=148791&r1=148790&r2=148791&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/StringRef.h (original) +++ llvm/trunk/include/llvm/ADT/StringRef.h Tue Jan 24 02:58:57 2012 @@ -353,6 +353,20 @@ Start = min(Start, Length); return StringRef(Data + Start, min(N, Length - Start)); } + + /// drop_front - Return a StringRef equal to 'this' but with the first + /// elements dropped. + StringRef drop_front(unsigned N = 1) const { + assert(size() >= N && "Dropping more elements than exist"); + return substr(N); + } + + /// drop_back - Return a StringRef equal to 'this' but with the last + /// elements dropped. + StringRef drop_back(unsigned N = 1) const { + assert(size() >= N && "Dropping more elements than exist"); + return substr(0, size()-N); + } /// slice - Return a reference to the substring from [Start, End). /// From sabre at nondot.org Tue Jan 24 03:01:07 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 09:01:07 -0000 Subject: [llvm-commits] [llvm] r148792 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120124090108.0A4DA2A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 03:01:07 2012 New Revision: 148792 URL: http://llvm.org/viewvc/llvm-project?rev=148792&view=rev Log: Add various "string" methods to ConstantDataSequential, which have the same semantics as ConstantArray's but much more efficient because they don't have to return std::string's. The ConstantArray methods will eventually be removed. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148792&r1=148791&r2=148792&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 03:01:07 2012 @@ -371,6 +371,9 @@ return reinterpret_cast(Value::getType()); } + // FIXME: String methods will eventually be removed. + + /// isString - This method returns true if the array is an array of i8 and /// the elements of the array are all ConstantInt's. bool isString() const; @@ -626,6 +629,30 @@ /// byte. uint64_t getElementByteSize() const; + + /// isString - This method returns true if this is an array of i8. + bool isString() const; + + /// isCString - This method returns true if the array "isString", ends with a + /// nul byte, and does not contains any other nul bytes. + bool isCString() const; + + /// getAsString - If this array is isString(), then this method returns the + /// array as a StringRef. Otherwise, it asserts out. + /// + StringRef getAsString() const; + + /// getAsCString - If this array is isCString(), then this method returns the + /// array (without the trailing null byte) as a StringRef. Otherwise, it + /// asserts out. + /// + StringRef getAsCString() const { + assert(isCString() && "Isn't a C string"); + StringRef Str = getAsString(); + return Str.substr(0, Str.size()-1); + } + + virtual void destroyConstant(); /// Methods for support type inquiry through isa, cast, and dyn_cast: Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148792&r1=148791&r2=148792&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 03:01:07 2012 @@ -2221,7 +2221,34 @@ return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); } +/// isString - This method returns true if this is an array of i8. +bool ConstantDataSequential::isString() const { + return isa(getType()) && getElementType()->isIntegerTy(8); +} + +/// getAsString - If this array is isString(), then this method returns the +/// array as a StringRef. Otherwise, it asserts out. +/// +StringRef ConstantDataSequential::getAsString() const { + assert(isString() && "Not a string"); + return StringRef(DataElements, getType()->getNumElements()); +} + +/// isCString - This method returns true if the array "isString", ends with a +/// nul byte, and does not contains any other nul bytes. +bool ConstantDataSequential::isCString() const { + if (!isString()) + return false; + + StringRef Str = getAsString(); + + // The last value must be nul. + if (Str.back() != 0) return false; + + // Other elements must be non-nul. + return Str.drop_back().find(0) == StringRef::npos; +} //===----------------------------------------------------------------------===// From baldrick at free.fr Tue Jan 24 03:06:00 2012 From: baldrick at free.fr (Duncan Sands) Date: Tue, 24 Jan 2012 10:06:00 +0100 Subject: [llvm-commits] [llvm] r148746 - /llvm/trunk/include/llvm/Support/CommandLine.h In-Reply-To: <20120123232747.8CF302A6C12C@llvm.org> References: <20120123232747.8CF302A6C12C@llvm.org> Message-ID: <4F1E7478.3080006@free.fr> Hi David, > --- llvm/trunk/include/llvm/Support/CommandLine.h (original) > +++ llvm/trunk/include/llvm/Support/CommandLine.h Mon Jan 23 17:27:47 2012 > @@ -163,12 +163,14 @@ > virtual void anchor(); > > int NumOccurrences; // The number of times specified > - enum NumOccurrencesFlag Occurrences : 3; > + // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid to avoid -> to avoid problems > + // with signed enums in bitfields in MSVC we'll store them as unsigned I'm not sure it is fair to name and shame MSVC here since (AFAIK) it is being perfectly standards conformant in its behaviour. Ciao, Duncan. From anton at korobeynikov.info Tue Jan 24 03:10:05 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 24 Jan 2012 13:10:05 +0400 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> Message-ID: > Also, the patch doesn't hook the VMLAs up to "Requires<[UseFPVMLx]>" - is there any reason for this? I know that flag isn't really used but when we do hook VMLAs up to fast-math or disable-excess-fp-precision, it'd be nice to have all implementations orthogonal. I think we reached the point where we should have a clean set of features. Given the mess we have already.... So, let's summarize. We have the following set of target features: VFPvN {N=2,3,4} NEON (do we need NEONv2?) UseFPVMLx, flag to enable codegen in excess precision. Anything else here? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From chandlerc at google.com Tue Jan 24 03:10:49 2012 From: chandlerc at google.com (Chandler Carruth) Date: Tue, 24 Jan 2012 01:10:49 -0800 Subject: [llvm-commits] [llvm] r148791 - /llvm/trunk/include/llvm/ADT/StringRef.h In-Reply-To: <20120124085858.1CAFC2A6C12C@llvm.org> References: <20120124085858.1CAFC2A6C12C@llvm.org> Message-ID: On Tue, Jan 24, 2012 at 12:58 AM, Chris Lattner wrote: > Author: lattner > Date: Tue Jan 24 02:58:57 2012 > New Revision: 148791 > > URL: http://llvm.org/viewvc/llvm-project?rev=148791&view=rev > Log: > add ::drop_back() and ::drop_front() methods, which are like > pop_front/pop_back on a vector, but a) aren't destructive to "this", and b) > can take a # elements to drop. > FWIW, the string_ref proposal for standardization provides pop_front and pop_back with an N that is the number of elements to pop... Would it be worth following that lead, and just copying the string ref before calling them? I'm particularly interested in the use case for returning the adjusted string in case we should add these methods to the standards proposal. We did propose a 'slice' method that might be more what you want, it works much like Python slicing, so you can do 's.slice(2, -2)' which would be equivalent to 's.drop_front(2).drop_back(2)' AFAICT... I don't recall exactly how the LLVM StringRef slice works. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/8b4c3853/attachment.html From james.molloy at arm.com Tue Jan 24 03:23:09 2012 From: james.molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 09:23:09 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> Message-ID: <001001ccda79$c9bbd550$5d337ff0$@molloy@arm.com> Hi Anton, Both VFPv3 and NEONv1 can have the fp16 extension. Cheers, James -----Original Message----- From: Anton Korobeynikov [mailto:anton at korobeynikov.info] Sent: 24 January 2012 09:10 To: James Molloy Cc: Ana Pazos; rajav at codeaurora.org; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions > Also, the patch doesn't hook the VMLAs up to "Requires<[UseFPVMLx]>" - is there any reason for this? I know that flag isn't really used but when we do hook VMLAs up to fast-math or disable-excess-fp-precision, it'd be nice to have all implementations orthogonal. I think we reached the point where we should have a clean set of features. Given the mess we have already.... So, let's summarize. We have the following set of target features: VFPvN {N=2,3,4} NEON (do we need NEONv2?) UseFPVMLx, flag to enable codegen in excess precision. Anything else here? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From anton at korobeynikov.info Tue Jan 24 03:26:43 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 24 Jan 2012 13:26:43 +0400 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <4f1e7885.13e9d80a.3fb8.ffffb41dSMTPIN_ADDED@mx.google.com> References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> <4f1e7885.13e9d80a.3fb8.ffffb41dSMTPIN_ADDED@mx.google.com> Message-ID: > Both VFPv3 and NEONv1 can have the fp16 extension. What's about VFPv4? Is it possible to have "limited register" variant of VFPv4/NEONv2 ? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From james.molloy at arm.com Tue Jan 24 03:33:19 2012 From: james.molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 09:33:19 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> <4f1e7885.13e9d80a.3fb8.ffffb41dSMTPIN_ADDED@mx.google.com> Message-ID: <001101ccda7b$35530e90$9ff92bb0$@molloy@arm.com> VFPv4 is a superset of VFPv3+fp16, same with NEONv2. "VFPv4 and VFPv4U add both the Half-precision Extension and the fused multiply-add instructions to the features of VFPv3." For the register set: "VFPv4 can be implemented with either thirty-two or sixteen doubleword registers" "Where necessary, these implementation options are distinguished using the terms: VFPv4-D32 or VFPv4-D16" "where the term VFPv4 is used it covers both options". So, VFP4 should imply VFPv4-D16, i.e. the smaller register file variant. There should be a way to optionally enable the 32 register variant. -----Original Message----- From: Anton Korobeynikov [mailto:anton at korobeynikov.info] Sent: 24 January 2012 09:27 To: James Molloy Cc: Ana Pazos; rajav at codeaurora.org; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions > Both VFPv3 and NEONv1 can have the fp16 extension. What's about VFPv4? Is it possible to have "limited register" variant of VFPv4/NEONv2 ? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From sabre at nondot.org Tue Jan 24 03:31:43 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 09:31:43 -0000 Subject: [llvm-commits] [llvm] r148793 - in /llvm/trunk: include/llvm/Constants.h lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/VMCore/Constants.cpp Message-ID: <20120124093143.C57C82A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 03:31:43 2012 New Revision: 148793 URL: http://llvm.org/viewvc/llvm-project?rev=148793&view=rev Log: Add AsmPrinter (aka MCLowering) support for ConstantDataSequential, and clean up some other misc stuff. Unlike ConstantArray, we will prefer to emit .fill directives for "String" arrays that all have the same value, since they are denser than emitting a .ascii Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148793&r1=148792&r2=148793&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 03:31:43 2012 @@ -640,7 +640,10 @@ /// getAsString - If this array is isString(), then this method returns the /// array as a StringRef. Otherwise, it asserts out. /// - StringRef getAsString() const; + StringRef getAsString() const { + assert(isString() && "Not a string"); + return getRawDataValues(); + } /// getAsCString - If this array is isCString(), then this method returns the /// array (without the trailing null byte) as a StringRef. Otherwise, it @@ -652,6 +655,10 @@ return Str.substr(0, Str.size()-1); } + /// getRawDataValues - Return the raw, underlying, bytes of this data. Note + /// that this is an extremely tricky thing to work with, as it exposes the + /// host endianness of the data elements. + StringRef getRawDataValues() const; virtual void destroyConstant(); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=148793&r1=148792&r2=148793&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Jan 24 03:31:43 2012 @@ -1557,6 +1557,19 @@ /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const ConstantDataSequential *V) { + StringRef Data = V->getRawDataValues(); + assert(!Data.empty() && "Empty aggregates should be CAZ node"); + char C = Data[0]; + for (unsigned i = 1, e = Data.size(); i != e; ++i) + if (Data[i] != C) return -1; + return C; +} + + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantInt *CI = dyn_cast(V)) { @@ -1582,8 +1595,7 @@ if (const ConstantArray *CA = dyn_cast(V)) { // Make sure all array elements are sequences of the same repeated // byte. - if (CA->getNumOperands() == 0) return -1; - + assert(CA->getNumOperands() != 0 && "Should be a CAZ"); int Byte = isRepeatedByteSequence(CA->getOperand(0), TM); if (Byte == -1) return -1; @@ -1594,10 +1606,70 @@ } return Byte; } + + if (const ConstantDataSequential *CDS = dyn_cast(V)) + return isRepeatedByteSequence(CDS); return -1; } +static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, + unsigned AddrSpace,AsmPrinter &AP){ + + // See if we can aggregate this into a .fill, if so, emit it as such. + int Value = isRepeatedByteSequence(CDS, AP.TM); + if (Value != -1) { + uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType()); + return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + + // If this can be emitted with .ascii/.asciz, emit it as such. + if (CDS->isString()) + return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); + + // Otherwise, emit the values in successive locations. + unsigned ElementByteSize = CDS->getElementByteSize(); + if (isa(CDS->getElementType())) { + for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), + ElementByteSize, AddrSpace); + } + return; + } + + // FP Constants are printed as integer constants to avoid losing + // precision. + assert(CDS->getElementType()->isFloatTy() || + CDS->getElementType()->isDoubleTy()); + + if (ElementByteSize == 4) { + for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + union { + float F; + uint32_t I; + }; + + F = CDS->getElementAsFloat(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); + } + return; + } + + for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + union { + double F; + uint64_t I; + }; + + F = CDS->getElementAsDouble(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); + } +} + static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, AsmPrinter &AP) { if (AddrSpace != 0 || !CA->isString()) { @@ -1640,28 +1712,6 @@ AP.OutStreamer.EmitZeros(Padding, AddrSpace); } -static void LowerVectorConstant(const Constant *CV, unsigned AddrSpace, - AsmPrinter &AP) { - // Look through bitcasts - if (const ConstantExpr *CE = dyn_cast(CV)) - if (CE->getOpcode() == Instruction::BitCast) - CV = CE->getOperand(0); - - if (const ConstantVector *V = dyn_cast(CV)) - return EmitGlobalConstantVector(V, AddrSpace, AP); - - // If we get here, we're stuck; report the problem to the user. - // FIXME: Are there any other useful tricks for vectors? - { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported vector expression in static initializer: "; - WriteAsOperand(OS, CV, /*PrintType=*/false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); - } -} - static void EmitGlobalConstantStruct(const ConstantStruct *CS, unsigned AddrSpace, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! @@ -1812,12 +1862,6 @@ } } - if (const ConstantArray *CVA = dyn_cast(CV)) - return EmitGlobalConstantArray(CVA, AddrSpace, AP); - - if (const ConstantStruct *CVS = dyn_cast(CV)) - return EmitGlobalConstantStruct(CVS, AddrSpace, AP); - if (const ConstantFP *CFP = dyn_cast(CV)) return EmitGlobalConstantFP(CFP, AddrSpace, AP); @@ -1827,9 +1871,24 @@ return; } - if (CV->getType()->isVectorTy()) - return LowerVectorConstant(CV, AddrSpace, AP); + if (const ConstantDataSequential *CDS = dyn_cast(CV)) + return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP); + + if (const ConstantArray *CVA = dyn_cast(CV)) + return EmitGlobalConstantArray(CVA, AddrSpace, AP); + if (const ConstantStruct *CVS = dyn_cast(CV)) + return EmitGlobalConstantStruct(CVS, AddrSpace, AP); + + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). + if (const ConstantExpr *CE = dyn_cast(CV)) + if (CE->getOpcode() == Instruction::BitCast) + return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + + if (const ConstantVector *V = dyn_cast(CV)) + return EmitGlobalConstantVector(V, AddrSpace, AP); + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148793&r1=148792&r2=148793&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 03:31:43 2012 @@ -1994,6 +1994,11 @@ return getType()->getElementType(); } +StringRef ConstantDataSequential::getRawDataValues() const { + return StringRef(DataElements, + getType()->getNumElements()*getElementByteSize()); +} + /// isElementTypeCompatible - Return true if a ConstantDataSequential can be /// formed with a vector or array of the specified element type. /// ConstantDataArray only works with normal float and int types that are @@ -2067,14 +2072,12 @@ } void ConstantDataSequential::destroyConstant() { - uint64_t ByteSize = getElementByteSize() * getElementType()->getNumElements(); - // Remove the constant from the StringMap. StringMap &CDSConstants = getType()->getContext().pImpl->CDSConstants; StringMap::iterator Slot = - CDSConstants.find(StringRef(DataElements, ByteSize)); + CDSConstants.find(getRawDataValues()); assert(Slot != CDSConstants.end() && "CDS not found in uniquing table"); @@ -2226,15 +2229,6 @@ return isa(getType()) && getElementType()->isIntegerTy(8); } -/// getAsString - If this array is isString(), then this method returns the -/// array as a StringRef. Otherwise, it asserts out. -/// -StringRef ConstantDataSequential::getAsString() const { - assert(isString() && "Not a string"); - return StringRef(DataElements, getType()->getNumElements()); -} - - /// isCString - This method returns true if the array "isString", ends with a /// nul byte, and does not contains any other nul bytes. bool ConstantDataSequential::isCString() const { From echristo at apple.com Tue Jan 24 03:43:28 2012 From: echristo at apple.com (Eric Christopher) Date: Tue, 24 Jan 2012 09:43:28 -0000 Subject: [llvm-commits] [llvm] r148794 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DIE.cpp DIE.h DwarfDebug.cpp Message-ID: <20120124094328.F2B182A6C12C@llvm.org> Author: echristo Date: Tue Jan 24 03:43:28 2012 New Revision: 148794 URL: http://llvm.org/viewvc/llvm-project?rev=148794&view=rev Log: Remove generation of DW_AT_sibling. Nothing as far as I can tell uses it. Saves about 1.5% on debug info size. rdar://10278198 Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DIE.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DIE.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DIE.cpp?rev=148794&r1=148793&r2=148794&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DIE.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DIE.cpp Tue Jan 24 03:43:28 2012 @@ -112,15 +112,6 @@ delete Children[i]; } -/// addSiblingOffset - Add a sibling offset field to the front of the DIE. -/// -DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) { - DIEInteger *DI = new (A) DIEInteger(0); - Values.insert(Values.begin(), DI); - Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); - return DI; -} - #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h?rev=148794&r1=148793&r2=148794&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h Tue Jan 24 03:43:28 2012 @@ -163,16 +163,6 @@ Values.push_back(Value); } - /// SiblingOffset - Return the offset of the debug information entry's - /// sibling. - unsigned getSiblingOffset() const { return Offset + Size; } - - /// addSiblingOffset - Add a sibling offset field to the front of the DIE. - /// The caller is responsible for deleting the return value at or after the - /// same time it destroys this DIE. - /// - DIEValue *addSiblingOffset(BumpPtrAllocator &A); - /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=148794&r1=148793&r2=148794&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Jan 24 03:43:28 2012 @@ -1488,10 +1488,6 @@ // Get the children. const std::vector &Children = Die->getChildren(); - // If not last sibling and has children then add sibling offset attribute. - if (!Last && !Children.empty()) - Die->addSiblingOffset(DIEValueAllocator); - // Record the abbreviation. assignAbbrevNumber(Die->getAbbrev()); @@ -1602,9 +1598,6 @@ Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); switch (Attr) { - case dwarf::DW_AT_sibling: - Asm->EmitInt32(Die->getSiblingOffset()); - break; case dwarf::DW_AT_abstract_origin: { DIEEntry *E = cast(Values[i]); DIE *Origin = E->getEntry(); From echristo at apple.com Tue Jan 24 04:11:49 2012 From: echristo at apple.com (Eric Christopher) Date: Tue, 24 Jan 2012 10:11:49 -0000 Subject: [llvm-commits] [llvm] r148795 - in /llvm/trunk/test/DebugInfo: 2010-06-29-InlinedFnLocalVar.ll X86/DW_AT_specification.ll X86/concrete_out_of_line.ll X86/pr11300.ll Message-ID: <20120124101149.834F52A6C12C@llvm.org> Author: echristo Date: Tue Jan 24 04:11:49 2012 New Revision: 148795 URL: http://llvm.org/viewvc/llvm-project?rev=148795&view=rev Log: Fix the testcases for the previous patch. rdar://10278198 Modified: llvm/trunk/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll llvm/trunk/test/DebugInfo/X86/DW_AT_specification.ll llvm/trunk/test/DebugInfo/X86/concrete_out_of_line.ll llvm/trunk/test/DebugInfo/X86/pr11300.ll Modified: llvm/trunk/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll?rev=148795&r1=148794&r2=148795&view=diff ============================================================================== --- llvm/trunk/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll (original) +++ llvm/trunk/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll Tue Jan 24 04:11:49 2012 @@ -2,7 +2,6 @@ ; Check struct X for dead variable xyz from inlined function foo. ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_sibling ; CHECK-NEXT: DW_AT_name Modified: llvm/trunk/test/DebugInfo/X86/DW_AT_specification.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/DW_AT_specification.ll?rev=148795&r1=148794&r2=148795&view=diff ============================================================================== --- llvm/trunk/test/DebugInfo/X86/DW_AT_specification.ll (original) +++ llvm/trunk/test/DebugInfo/X86/DW_AT_specification.ll Tue Jan 24 04:11:49 2012 @@ -3,8 +3,8 @@ ; test that the DW_AT_specification is a back edge in the file. -; CHECK: 0x0000003c: DW_TAG_subprogram [5] * -; CHECK: 0x00000064: DW_AT_specification [DW_FORM_ref4] (cu + 0x003c => {0x0000003c}) +; CHECK: 0x00000038: DW_TAG_subprogram [5] * +; CHECK: 0x00000060: DW_AT_specification [DW_FORM_ref4] (cu + 0x0038 => {0x00000038}) @_ZZN3foo3barEvE1x = constant i32 0, align 4 Modified: llvm/trunk/test/DebugInfo/X86/concrete_out_of_line.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/concrete_out_of_line.ll?rev=148795&r1=148794&r2=148795&view=diff ============================================================================== --- llvm/trunk/test/DebugInfo/X86/concrete_out_of_line.ll (original) +++ llvm/trunk/test/DebugInfo/X86/concrete_out_of_line.ll Tue Jan 24 04:11:49 2012 @@ -7,8 +7,7 @@ ; first check that we have a TAG_subprogram at a given offset and it has ; AT_inline. -; CHECK: 0x0000014a: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_sibling +; CHECK: 0x00000136: DW_TAG_subprogram ; CHECK-NEXT: DW_AT_MIPS_linkage_name ; CHECK-NEXT: DW_AT_specification ; CHECK-NEXT: DW_AT_inline @@ -16,8 +15,8 @@ ; and then that a TAG_subprogram refers to it with AT_abstract_origin. -; CHECK: 0x000001a6: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4] (cu + 0x014a => {0x0000014a}) +; CHECK: 0x00000186: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4] (cu + 0x0136 => {0x00000136}) define i32 @_ZN17nsAutoRefCnt7ReleaseEv() { entry: Modified: llvm/trunk/test/DebugInfo/X86/pr11300.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/pr11300.ll?rev=148795&r1=148794&r2=148795&view=diff ============================================================================== --- llvm/trunk/test/DebugInfo/X86/pr11300.ll (original) +++ llvm/trunk/test/DebugInfo/X86/pr11300.ll Tue Jan 24 04:11:49 2012 @@ -3,8 +3,8 @@ ; test that the DW_AT_specification is a back edge in the file. -; CHECK: 0x00000063: DW_TAG_subprogram [5] -; CHECK: 0x00000089: DW_AT_specification [DW_FORM_ref4] (cu + 0x0063 => {0x00000063}) +; CHECK: 0x0000005b: DW_TAG_subprogram [5] +; CHECK: 0x0000007d: DW_AT_specification [DW_FORM_ref4] (cu + 0x005b => {0x0000005b}) %struct.foo = type { i8 } From anton at korobeynikov.info Tue Jan 24 04:39:12 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 24 Jan 2012 13:39:12 +0300 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <4f1e7ae7.7349b40a.46a0.3a96SMTPIN_ADDED@mx.google.com> References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> <4f1e7885.13e9d80a.3fb8.ffffb41dSMTPIN_ADDED@mx.google.com> <4f1e7ae7.7349b40a.46a0.3a96SMTPIN_ADDED@mx.google.com> Message-ID: Hi James, > VFPv4 is a superset of VFPv3+fp16, same with NEONv2. > > "VFPv4 and VFPv4U add both the Half-precision Extension and the fused multiply-add instructions to the features of VFPv3." > > For the register set: "VFPv4 can be implemented with either thirty-two or sixteen doubleword registers" > "Where necessary, these implementation options are distinguished using the terms: VFPv4-D32 or VFPv4-D16" > > "where the term VFPv4 is used it covers both options". > > So, VFP4 should imply VFPv4-D16, i.e. the smaller register file variant. There should be a way to optionally enable the 32 register variant. Ok... So, we have the following set of features: 1. VFPv2, VFPv3, VFPv4. Each is a superset of the former. 2. NEON, NEONv2. Each is a superset of the former. 3. Additionally we have fp16 feature, which is available for VPFv3/NEON. VFPv4 implies fp16. 4. VFPv3/VFPv4 (and corresponding NEONs) might be D16 and D32. While by spec it seems that D16 should be default, this is different from current defaults. So, I'd suggest to have D32 default. 5. MUL+ADD variant: separate, VMLA, VFMA. VFM should be enabled for VFPv4/NEONv2 and only (?) if excess precision / fast-math is requested. The only problem I'm seeing is supporting of "vfpv4 + neon implies neonv2", because it's based on combination of features and having just neonv2 does not imply vfpv4. Is there anything I missed? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From STPWORLD at narod.ru Tue Jan 24 04:41:40 2012 From: STPWORLD at narod.ru (Stepan Dyatkovskiy) Date: Tue, 24 Jan 2012 14:41:40 +0400 Subject: [llvm-commits] [LLVM] SwitchInst PATCH: Changes in semantics and usage. In-Reply-To: <124601327309857@web136.yandex.ru> References: <216291326628464@web57.yandex.ru> <188821326736101@web38.yandex.ru> <179511326997999@web49.yandex.ru> <4F1A789C.6040808@narod.ru> <124601327309857@web136.yandex.ru> Message-ID: <437101327401700@web45.yandex.ru> Hi all. I think, that we need re-factor SwitchInst, since current SwitchInst state blocks any further development in PR1255 direction ("Should enhance LLVM switch instruction to take case ranges"). Please find the attached patch for review. I quoted my first post notes for this patch: [post] I refactored SwitchInst a little bit. The purpose of refactoring is to hide operand roles from SwitchInst user (programmer). If you want to play with operands directly, probably you will need lower level methods than SwitchInst ones (TerminatorInst or may be User). After this patch we can reorganize SwitchInst operands and successors as we want. What was done: 1. Changed semantics of index inside the getCaseValue method: getCaseValue(0) means "get first case", not a condition. Use getCondition() if you want to resolve the condition. I propose don't mix SwitchInst case indexing with low level indexing (TI successors indexing, User's operands indexing), since it may be dangerous. 2. By the same reason findCaseValue(ConstantInt*) returns actual number of case value. 0 means first case, not default. If there is no case with given value, ErrorIndex will returned. 3. Added getCaseSuccessor method. I propose to avoid usage of TerminatorInst::getSuccessor if you want to resolve case successor BB. Use getCaseSuccessor instead, since internal SwitchInst organization of operands/successors is hidden and may be changed in any moment. 4. Added resolveSuccessorIndex and resolveCaseIndex. The main purpose of these methods is to see how case successors are really mapped in TerminatorInst. 4.1 "resolveSuccessorIndex" was created if you need to level down from SwitchInst to TerminatorInst. It returns TerminatorInst's successor index for given case successor. 4.2 "resolveCaseIndex" converts low level successors index to case index that curresponds to the given successor. [/post] Thanks! -Stepan. 23.01.2012, 13:10, "Stepan Dyatkovskiy" : > Hi all. > I also updated serializing of SwitchInst in BitcodeWriter. Done the same like in other files - replaced low level operands usage with SwitchInst analogues. > Please find the updated patch in attachment for review. > > -Stepan. > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- A non-text attachment was scrubbed... Name: si-cleanup.patch Type: application/octet-stream Size: 40745 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/0208b592/attachment-0001.obj From zwarich at apple.com Tue Jan 24 04:41:34 2012 From: zwarich at apple.com (Cameron Zwarich) Date: Tue, 24 Jan 2012 02:41:34 -0800 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> Message-ID: On Jan 24, 2012, at 1:10 AM, Anton Korobeynikov wrote: >> Also, the patch doesn't hook the VMLAs up to "Requires<[UseFPVMLx]>" - is there any reason for this? I know that flag isn't really used but when we do hook VMLAs up to fast-math or disable-excess-fp-precision, it'd be nice to have all implementations orthogonal. > I think we reached the point where we should have a clean set of > features. Given the mess we have already.... > > So, let's summarize. We have the following set of target features: > > VFPvN {N=2,3,4} > NEON (do we need NEONv2?) > UseFPVMLx, flag to enable codegen in excess precision. UseFPVMLx was originally about the usage of instructions like VMLA that are just combined multiply-adds without excess precision. The reason for the flag is that ARM CPUs have had a lot of hazards for the use of the combined instructions, and it is difficult to use them for a performance win outside of hand-tuned code. On Cortex-A9 the situation is arguably different, hence the flag. There needs to be a new flag for the automatic use of excess precision, if that behavior is even desired. Cameron From anton at korobeynikov.info Tue Jan 24 04:44:56 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 24 Jan 2012 13:44:56 +0300 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> Message-ID: Cameron, > There needs to be a new flag for the automatic use of excess precision, if that behavior is even desired. Right. Maybe -ffast-math should imply this flag as well. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From james.molloy at arm.com Tue Jan 24 04:47:19 2012 From: james.molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 10:47:19 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> Message-ID: <001201ccda85$8b9ba280$a2d2e780$@molloy@arm.com> Hi Anton, I can't think of anything you missed from your list (apart from that VMLA is also in VFPv3). Granted NEONv2 without VFPv4 might be difficult to support, but I see that being a pretty unlikely combination anyway... Cheers, James -----Original Message----- From: Anton Korobeynikov [mailto:anton at korobeynikov.info] Sent: 24 January 2012 10:45 To: Cameron Zwarich Cc: James Molloy; rajav at codeaurora.org; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Cameron, > There needs to be a new flag for the automatic use of excess precision, if that behavior is even desired. Right. Maybe -ffast-math should imply this flag as well. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From glider at google.com Tue Jan 24 04:44:44 2012 From: glider at google.com (Alexander Potapenko) Date: Tue, 24 Jan 2012 10:44:44 -0000 Subject: [llvm-commits] [compiler-rt] r148798 - in /compiler-rt/trunk/lib/asan: scripts/asan_symbolize.py tests/test_output.sh Message-ID: <20120124104444.878032A6C12C@llvm.org> Author: glider Date: Tue Jan 24 04:44:44 2012 New Revision: 148798 URL: http://llvm.org/viewvc/llvm-project?rev=148798&view=rev Log: Tune asan_symbolize.py to symbolize 64-bit binaries correctly. Add the DEBUG flag to test_output.sh Modified: compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py compiler-rt/trunk/lib/asan/tests/test_output.sh Modified: compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py?rev=148798&r1=148797&r2=148798&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py (original) +++ compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py Tue Jan 24 04:44:44 2012 @@ -72,15 +72,20 @@ addr = patch_address(frameno, addr) load_addr = int(addr, 16) - int(offset, 16) if not pipes.has_key(binary): - #print "atos -o %s -l %s" % (binary, hex(load_addr)) - pipes[binary] = subprocess.Popen(["atos", "-o", binary], + # Guess which arch we're running. 10 = len("0x") + 8 hex digits. + if len(addr) > 10: + arch = "x86_64" + else: + arch = "i386" + #print "atos -o %s -arch %s " % (binary, arch) + pipes[binary] = subprocess.Popen(["atos", "-o", binary, "-arch", arch], stdin=subprocess.PIPE, stdout=subprocess.PIPE,) p = pipes[binary] # TODO(glider): how to tell if the address is absolute? if ".app/" in binary and not ".framework" in binary: print >>p.stdin, "%s" % addr else: - print >>p.stdin, "%s" % offset + print >>p.stdin, "%s" % addr # TODO(glider): it's more efficient to make a batch atos run for each binary. p.stdin.close() atos_line = p.stdout.readline().rstrip() Modified: compiler-rt/trunk/lib/asan/tests/test_output.sh URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/test_output.sh?rev=148798&r1=148797&r2=148798&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/tests/test_output.sh (original) +++ compiler-rt/trunk/lib/asan/tests/test_output.sh Tue Jan 24 04:44:44 2012 @@ -27,7 +27,9 @@ exe=$c.$b.O$O so=$c.$b.O$O-so.so echo testing $exe - $CXX $CXXFLAGS -g -m$b -faddress-sanitizer -O$O $c.cc -o $exe + build_command="$CXX $CXXFLAGS -g -m$b -faddress-sanitizer -O$O $c.cc -o $exe" + [ "$DEBUG" == "1" ] && echo $build_command + $build_command [ -e "$c_so.cc" ] && $CXX $CXXFLAGS -g -m$b -faddress-sanitizer -O$O $c_so.cc -fPIC -shared -o $so # If there's an OS-specific template, use it. # Please minimize the use of OS-specific templates. From chandlerc at gmail.com Tue Jan 24 05:59:00 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Tue, 24 Jan 2012 03:59:00 -0800 Subject: [llvm-commits] PATCH: Add support for tracking and exposing the host triple Message-ID: Attached is a patch to add support for tracking and exposing the host triple. This is one of the many steps along the way to cleaning up the Clang driver. In many places within Clang, we need to reason about the *host*, not the target, in order to decide how to behave. For example, this is at issue when deciding where within the host machine we should look for the target's toolchain. While in some cases, there is a canonical location that the target *always* uses, in other cases both can be relevant. Consider the Debian multiarch case where we will often want to use '/usr/lib//gcc///crtbegin.o'. In other cases, we may need to check whether the host triple might contain a toolchain capable of targeting our target. This happens frequently when working in a bi-arch world, where the GCC installed under 'i386-pc-linux-gnu' is actually capable of targeting 'x86_64-pc-linux-gnu' thanks to the '-m64' flag. Some time back an effort was undertaken which simply blanket renamed 'Host' to 'Target' in many places. This didn't really address the fundamental problem, and in my mind made things a bit more confusing. Thus in most places in the driver, we treat the DefaultTargetTriple as the *host* triple, compute the HostInfo from it, and then *modify* it to produce the target triple. Ow. Other times, we modify it first, and assume that the post-modification triple is still a viable host triple. All of this gets simple if we simply have access to both triples the entire time. We can make rational decisions about when we should and should not use the host information or the target information. Also, it will become immediately clear from the code which is in fact in use. Again, this is just the first step, but it touches autoconf and a bunch of other stuff so I wanted someone else to glance at it before I checked it in. I'll also need someone to regenerate the config.h and the configure scripts as I don't have easy access to the proper versions. If someone can send me those patched files i can submit as one patch to keep the build bots happy. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/9ddb0bca/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: host-triple.diff Type: text/x-patch Size: 5519 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/9ddb0bca/attachment.bin From benny.kra at googlemail.com Tue Jan 24 06:08:28 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Tue, 24 Jan 2012 12:08:28 -0000 Subject: [llvm-commits] [llvm] r148799 - /llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h Message-ID: <20120124120828.A21D02A6C12C@llvm.org> Author: d0k Date: Tue Jan 24 06:08:28 2012 New Revision: 148799 URL: http://llvm.org/viewvc/llvm-project?rev=148799&view=rev Log: Bit pack DIE structures better. 16 bits are sufficient to store attributes, tags and forms. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h?rev=148799&r1=148798&r2=148799&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h Tue Jan 24 06:08:28 2012 @@ -31,17 +31,17 @@ class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// - unsigned Attribute; + uint16_t Attribute; /// Form - Dwarf form code. /// - unsigned Form; + uint16_t Form; public: - DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {} + DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} // Accessors. - unsigned getAttribute() const { return Attribute; } - unsigned getForm() const { return Form; } + uint16_t getAttribute() const { return Attribute; } + uint16_t getForm() const { return Form; } /// Profile - Used to gather unique data for the abbreviation folding set. /// @@ -54,41 +54,41 @@ class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// - unsigned Tag; + uint16_t Tag; - /// Unique number for node. + /// ChildrenFlag - Dwarf children flag. /// - unsigned Number; + uint16_t ChildrenFlag; - /// ChildrenFlag - Dwarf children flag. + /// Unique number for node. /// - unsigned ChildrenFlag; + unsigned Number; /// Data - Raw data bytes for abbreviation. /// SmallVector Data; public: - DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} + DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} // Accessors. - unsigned getTag() const { return Tag; } + uint16_t getTag() const { return Tag; } unsigned getNumber() const { return Number; } - unsigned getChildrenFlag() const { return ChildrenFlag; } + uint16_t getChildrenFlag() const { return ChildrenFlag; } const SmallVector &getData() const { return Data; } - void setTag(unsigned T) { Tag = T; } - void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; } + void setTag(uint16_t T) { Tag = T; } + void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } /// AddAttribute - Adds another set of attribute information to the /// abbreviation. - void AddAttribute(unsigned Attribute, unsigned Form) { + void AddAttribute(uint16_t Attribute, uint16_t Form) { Data.push_back(DIEAbbrevData(Attribute, Form)); } /// AddFirstAttribute - Adds a set of attribute information to the front /// of the abbreviation. - void AddFirstAttribute(unsigned Attribute, unsigned Form) { + void AddFirstAttribute(uint16_t Attribute, uint16_t Form) { Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); } @@ -113,10 +113,6 @@ class DIE { protected: - /// Abbrev - Buffer for constructing abbreviation. - /// - DIEAbbrev Abbrev; - /// Offset - Offset in debug info section. /// unsigned Offset; @@ -125,6 +121,10 @@ /// unsigned Size; + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + /// Children DIEs. /// std::vector Children; @@ -139,8 +139,8 @@ mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), - Size(0), Parent(0), IndentCount(0) {} + : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0), + IndentCount(0) {} virtual ~DIE(); // Accessors. From jay.foad at gmail.com Tue Jan 24 06:32:55 2012 From: jay.foad at gmail.com (Jay Foad) Date: Tue, 24 Jan 2012 12:32:55 +0000 Subject: [llvm-commits] [llvm] r132581 - in /llvm/trunk: lib/CodeGen/AllocationOrder.cpp lib/CodeGen/AllocationOrder.h lib/CodeGen/RegAllocBase.h lib/CodeGen/RegAllocBasic.cpp lib/CodeGen/RegAllocGreedy.cpp test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll In-Reply-To: <20110603203454.2E8092A6C12D@llvm.org> References: <20110603203454.2E8092A6C12D@llvm.org> Message-ID: Hi Jakob, On 3 June 2011 21:34, Jakob Stoklund Olesen wrote: > Author: stoklund > Date: Fri Jun ?3 15:34:53 2011 > New Revision: 132581 > > URL: http://llvm.org/viewvc/llvm-project?rev=132581&view=rev > Log: > Switch AllocationOrder to using RegisterClassInfo instead of a BitVector > of reserved registers. > > Use RegisterClassInfo in RABasic as well. This slightly changes som > allocation orders because RegisterClassInfo puts CSR aliases last. > > Modified: > ? ?llvm/trunk/lib/CodeGen/AllocationOrder.cpp > ? ?llvm/trunk/lib/CodeGen/AllocationOrder.h > ? ?llvm/trunk/lib/CodeGen/RegAllocBase.h > ? ?llvm/trunk/lib/CodeGen/RegAllocBasic.cpp > ? ?llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp > ? ?llvm/trunk/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll > Modified: llvm/trunk/lib/CodeGen/AllocationOrder.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AllocationOrder.h?rev=132581&r1=132580&r2=132581&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/AllocationOrder.h (original) > +++ llvm/trunk/lib/CodeGen/AllocationOrder.h Fri Jun ?3 15:34:53 2011 > @@ -19,14 +19,14 @@ > > ?namespace llvm { > > -class BitVector; > +class RegisterClassInfo; > ?class VirtRegMap; > > ?class AllocationOrder { > ? const unsigned *Begin; > ? const unsigned *End; > ? const unsigned *Pos; > - ?const BitVector &Reserved; > + ?const RegisterClassInfo &RCI; > ? unsigned Hint; > ?public: > > @@ -37,7 +37,7 @@ > ? /// ? ? ? ?TargetRegisterInfo::getReservedRegs(). > ? AllocationOrder(unsigned VirtReg, > ? ? ? ? ? ? ? ? ? const VirtRegMap &VRM, > - ? ? ? ? ? ? ? ? ?const BitVector &ReservedRegs); > + ? ? ? ? ? ? ? ? ?const RegisterClassInfo &RegClassInfo); The comment above this function needs updating for this change. Thanks, Jay. From jay.foad at gmail.com Tue Jan 24 06:54:30 2012 From: jay.foad at gmail.com (Jay Foad) Date: Tue, 24 Jan 2012 12:54:30 +0000 Subject: [llvm-commits] [llvm] r146168 - in /llvm/trunk: include/llvm/CodeGen/MachineInstr.h lib/CodeGen/MachineInstr.cpp In-Reply-To: <20111208192310.977E82A6C12C@llvm.org> References: <20111208192310.977E82A6C12C@llvm.org> Message-ID: Hi Evan, On 8 December 2011 19:23, Evan Cheng wrote: > Author: evancheng > Date: Thu Dec ?8 13:23:10 2011 > New Revision: 146168 > > URL: http://llvm.org/viewvc/llvm-project?rev=146168&view=rev > Log: > Make MachineInstr instruction property queries more flexible. This change all > clients to decide whether to look inside bundled instructions and whether > the query should return true if any / all bundled instructions have the > queried property. > > Modified: > ? ?llvm/trunk/include/llvm/CodeGen/MachineInstr.h > ? ?llvm/trunk/lib/CodeGen/MachineInstr.cpp > > Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=146168&r1=146167&r2=146168&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original) > +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Thu Dec ?8 13:23:10 2011 > @@ -277,6 +277,12 @@ > ? /// API for querying MachineInstr properties. They are the same as MCInstrDesc > ? /// queries but they are bundle aware. > > + ?enum QueryType { > + ? ?IgnoreBundle, ? ?// Ignore bundles > + ? ?AnyInBundle, ? ? // Return true if any instruction in bundle has property > + ? ?AllInBundle ? ? ?// Return true if all instructions in bundle have property > + ?}; > + > ? /// hasProperty - Return true if the instruction (or in the case of a bundle, > ? /// the instructions inside the bundle) has the specified property. > ? /// The first argument is the property being queried. > @@ -285,43 +291,42 @@ > ? /// If the third argument is true, than the query can return true when *any* > ? /// of the bundled instructions has the queried property. If it's false, then > ? /// this can return true iff *all* of the instructions have the property. > - ?bool hasProperty(unsigned Flag, > - ? ? ? ? ? ? ? ? ? bool PeekInBundle = true, bool IsOr = true) const; > + ?bool hasProperty(unsigned Flag, QueryType Type = AnyInBundle) const; The comment still talks about a third argument. It needs updating for this change. Thanks, Jay. From eli.bendersky at intel.com Tue Jan 24 07:02:40 2012 From: eli.bendersky at intel.com (Bendersky, Eli) Date: Tue, 24 Jan 2012 13:02:40 +0000 Subject: [llvm-commits] [PATCH] enabling generation of ELF objects on Windows with the help of the triple Message-ID: <9BBE4537D1BAAB479E9E8F9D4234619D32305D@HASMSX103.ger.corp.intel.com> Hello, Earlier this month I initiated a llvmdev discussion on the possibility to make MC generate code into an ELF container on Windows (http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046583.html). Currently in several places in the code the decision is made based on the Triple's OS component. When it's Windows, a decision is made automatically to generate COFF, so a way is needed to let MC know that we still want ELF, even if we're on Windows. There are several approaches to this: 1. Add this information somewhere which isn't the Triple 2. Add this information into the Triple, making it a 5-tuple instead of 4-tuple - the 5th component being "container" or something like that 3. Add this information into the Triple, overlaying the "environment" component The attached patch takes approach (3) since this appears to make the minimal overall impact on the code. It adds an "ELF" option to the EnvironmentType enum. Since we're interested in ELF on Windows on x86, this environment option doesn't conflict with the others. In other words, it enables us to generate and run MCJIT-ted code on Windows, without interfering with other code in LLVM. Although approach (1) would perhaps be cleaner, it is not easy to see how to go about it, since in many places where the modification is required the triple is the only accessible piece of information about the compiler target. The decision to generate COFF on Windows is based on the Triple, not on something else. I'll be happy to hear about other options, or to get this patch reviewed so I can commit it. Thanks in advance, Eli --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. -------------- next part -------------- A non-text attachment was scrubbed... Name: windows_elf_triple.2.patch Type: application/octet-stream Size: 4996 bytes Desc: windows_elf_triple.2.patch Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/e6227953/attachment.obj From eugeni.stepanov at gmail.com Tue Jan 24 07:05:34 2012 From: eugeni.stepanov at gmail.com (Evgeniy Stepanov) Date: Tue, 24 Jan 2012 13:05:34 -0000 Subject: [llvm-commits] [llvm] r148800 - in /llvm/trunk: lib/CodeGen/AsmPrinter/ARMException.cpp test/CodeGen/ARM/ehabi-unwind.ll Message-ID: <20120124130534.1D7402A6C12C@llvm.org> Author: eugenis Date: Tue Jan 24 07:05:33 2012 New Revision: 148800 URL: http://llvm.org/viewvc/llvm-project?rev=148800&view=rev Log: An option to selectively enable part of ARM EHABI support. This change adds an new option --arm-enable-ehabi-descriptors that enables emitting unwinding descriptors. This provides a mode with a working backtrace() without the (currently broken) exception support. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll Modified: llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp?rev=148800&r1=148799&r2=148800&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/ARMException.cpp Tue Jan 24 07:05:33 2012 @@ -29,6 +29,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" @@ -36,6 +37,12 @@ #include "llvm/ADT/Twine.h" using namespace llvm; +cl::opt +EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, + cl::desc("Generate ARM EHABI tables with unwinding descriptors"), + cl::init(false)); + + ARMException::ARMException(AsmPrinter *A) : DwarfException(A), shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) @@ -72,13 +79,15 @@ Asm->OutStreamer.EmitPersonality(PerSym); } - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + if (EnableARMEHABIDescriptors) { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - Asm->OutStreamer.EmitHandlerData(); + Asm->OutStreamer.EmitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); + // Emit actual exception table + EmitExceptionTable(); + } } Asm->OutStreamer.EmitFnEnd(); Modified: llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll?rev=148800&r1=148799&r2=148800&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll (original) +++ llvm/trunk/test/CodeGen/ARM/ehabi-unwind.ll Tue Jan 24 07:05:33 2012 @@ -2,6 +2,7 @@ ; unfamiliar instructions. ; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim ; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors define void @_Z1fv() nounwind { entry: From benny.kra at googlemail.com Tue Jan 24 07:21:05 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Tue, 24 Jan 2012 14:21:05 +0100 Subject: [llvm-commits] [test-suite] r148727 - /test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile In-Reply-To: <20120123212653.83A842A6C12C@llvm.org> References: <20120123212653.83A842A6C12C@llvm.org> Message-ID: <02F378F9-7A42-4CCF-AC73-A2B346BF4314@googlemail.com> On 23.01.2012, at 22:26, Chad Rosier wrote: > Author: mcrosier > Date: Mon Jan 23 15:26:53 2012 > New Revision: 148727 > > URL: http://llvm.org/viewvc/llvm-project?rev=148727&view=rev > Log: > nbench exceeds the 500s default, so bump it a bit. If it runs too slowly on embedded machines it may be a good idea to reduce the BASE_ITERATIONS number to scale the number of iterations down. I tweaked it to take between 10 and 20 seconds on my test machines, which are intel boxes not older than a couple of years. - Ben > > Modified: > test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile > > Modified: test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile > URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile?rev=148727&r1=148726&r2=148727&view=diff > ============================================================================== > --- test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile (original) > +++ test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Mon Jan 23 15:26:53 2012 > @@ -6,6 +6,9 @@ > > include ../../Makefile.multisrc > > +# This test can take more than the default 500s timeout at -O0. > +RUNTIMELIMIT:=750 > + > # Always copy NNET.DAT so it's available with SRCDIR != OBJDIR builds. > # FIXME: Hack > $(shell cp -n $(PROJ_SRC_DIR)/NNET.DAT .) > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From sabre at nondot.org Tue Jan 24 07:35:19 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 05:35:19 -0800 Subject: [llvm-commits] [llvm] r148791 - /llvm/trunk/include/llvm/ADT/StringRef.h In-Reply-To: References: <20120124085858.1CAFC2A6C12C@llvm.org> Message-ID: On Jan 24, 2012, at 1:10 AM, Chandler Carruth wrote: > On Tue, Jan 24, 2012 at 12:58 AM, Chris Lattner wrote: > Author: lattner > Date: Tue Jan 24 02:58:57 2012 > New Revision: 148791 > > URL: http://llvm.org/viewvc/llvm-project?rev=148791&view=rev > Log: > add ::drop_back() and ::drop_front() methods, which are like pop_front/pop_back on a vector, but a) aren't destructive to "this", and b) can take a # elements to drop. > > FWIW, the string_ref proposal for standardization provides pop_front and pop_back with an N that is the number of elements to pop... Would it be worth following that lead, and just copying the string ref before calling them? I'm particularly interested in the use case for returning the adjusted string in case we should add these methods to the standards proposal. I'd be fine with adding that as well if people would find it useful. -Chris -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/067ca2d9/attachment.html From sabre at nondot.org Tue Jan 24 07:41:11 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 13:41:11 -0000 Subject: [llvm-commits] [llvm] r148802 - in /llvm/trunk: include/llvm/Constants.h lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/ExecutionEngine/ExecutionEngine.cpp lib/Linker/LinkModules.cpp lib/VMCore/Constants.cpp Message-ID: <20120124134111.A7D512A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 07:41:11 2012 New Revision: 148802 URL: http://llvm.org/viewvc/llvm-project?rev=148802&view=rev Log: add more support for ConstantDataSequential Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp llvm/trunk/lib/Linker/LinkModules.cpp llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148802&r1=148801&r2=148802&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 07:41:11 2012 @@ -623,6 +623,9 @@ /// getElementType - Return the element type of the array/vector. Type *getElementType() const; + + /// getNumElements - Return the number of elements in the array or vector. + unsigned getNumElements() const; /// getElementByteSize - Return the size (in bytes) of each element in the /// array/vector. The size of the elements is known to be a multiple of one Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=148802&r1=148801&r2=148802&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jan 24 07:41:11 2012 @@ -1055,6 +1055,23 @@ return DAG.getMergeValues(&Constants[0], Constants.size(), getCurDebugLoc()); } + + if (const ConstantDataSequential *CDS = + dyn_cast(C)) { + SmallVector Ops; + for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa(CDS->getType())) + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa(C) || isa(C)) && @@ -1089,9 +1106,9 @@ // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector Ops; - if (const ConstantVector *CP = dyn_cast(C)) { + if (const ConstantVector *CV = dyn_cast(C)) { for (unsigned i = 0; i != NumElements; ++i) - Ops.push_back(getValue(CP->getOperand(i))); + Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(VecTy->getElementType()); Modified: llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp?rev=148802&r1=148801&r2=148802&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp Tue Jan 24 07:41:11 2012 @@ -307,13 +307,12 @@ // Should be an array of '{ i32, void ()* }' structs. The first value is // the init priority, which we ignore. - if (isa(GV->getInitializer())) + ConstantArray *InitList = dyn_cast(GV->getInitializer()); + if (InitList == 0) return; - ConstantArray *InitList = cast(GV->getInitializer()); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - if (isa(InitList->getOperand(i))) - continue; - ConstantStruct *CS = cast(InitList->getOperand(i)); + ConstantStruct *CS = dyn_cast(InitList->getOperand(i)); + if (CS == 0) continue; Constant *FP = CS->getOperand(1); if (FP->isNullValue()) @@ -954,30 +953,47 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { DEBUG(dbgs() << "JIT: Initializing " << Addr << " "); DEBUG(Init->dump()); - if (isa(Init)) { + if (isa(Init)) return; - } else if (const ConstantVector *CP = dyn_cast(Init)) { + + if (const ConstantVector *CP = dyn_cast(Init)) { unsigned ElementSize = getTargetData()->getTypeAllocSize(CP->getType()->getElementType()); for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize); return; - } else if (isa(Init)) { + } + + if (isa(Init)) { memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType())); return; - } else if (const ConstantArray *CPA = dyn_cast(Init)) { + } + + if (const ConstantArray *CPA = dyn_cast(Init)) { unsigned ElementSize = getTargetData()->getTypeAllocSize(CPA->getType()->getElementType()); for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize); return; - } else if (const ConstantStruct *CPS = dyn_cast(Init)) { + } + + if (const ConstantStruct *CPS = dyn_cast(Init)) { const StructLayout *SL = getTargetData()->getStructLayout(cast(CPS->getType())); for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i)); return; - } else if (Init->getType()->isFirstClassType()) { + } + + if (const ConstantDataSequential *CDS = + dyn_cast(Init)) { + // CDS is already laid out in host memory order. + StringRef Data = CDS->getRawDataValues(); + memcpy(Addr, Data.data(), Data.size()); + return; + } + + if (Init->getType()->isFirstClassType()) { GenericValue Val = getConstantValue(Init); StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType()); return; Modified: llvm/trunk/lib/Linker/LinkModules.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Linker/LinkModules.cpp?rev=148802&r1=148801&r2=148802&view=diff ============================================================================== --- llvm/trunk/lib/Linker/LinkModules.cpp (original) +++ llvm/trunk/lib/Linker/LinkModules.cpp Tue Jan 24 07:41:11 2012 @@ -843,29 +843,32 @@ return false; } +static void getArrayElements(Constant *C, SmallVectorImpl &Dest) { + if (ConstantArray *I = dyn_cast(C)) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + Dest.push_back(I->getOperand(i)); + return; + } + + if (ConstantDataSequential *CDS = dyn_cast(C)) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) + Dest.push_back(CDS->getElementAsConstant(i)); + return; + } + + ConstantAggregateZero *CAZ = cast(C); + Dest.append(cast(C->getType())->getNumElements(), + CAZ->getSequentialElement()); +} + void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) { // Merge the initializer. SmallVector Elements; - if (ConstantArray *I = dyn_cast(AVI.DstInit)) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - Elements.push_back(I->getOperand(i)); - } else { - assert(isa(AVI.DstInit)); - ArrayType *DstAT = cast(AVI.DstInit->getType()); - Type *EltTy = DstAT->getElementType(); - Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy)); - } + getArrayElements(AVI.DstInit, Elements); Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap); - if (const ConstantArray *I = dyn_cast(SrcInit)) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - Elements.push_back(I->getOperand(i)); - } else { - assert(isa(SrcInit)); - ArrayType *SrcAT = cast(SrcInit->getType()); - Type *EltTy = SrcAT->getElementType(); - Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy)); - } + getArrayElements(SrcInit, Elements); + ArrayType *NewType = cast(AVI.NewGV->getType()->getElementType()); AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements)); } Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148802&r1=148801&r2=148802&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 07:41:11 2012 @@ -1995,8 +1995,7 @@ } StringRef ConstantDataSequential::getRawDataValues() const { - return StringRef(DataElements, - getType()->getNumElements()*getElementByteSize()); + return StringRef(DataElements, getNumElements()*getElementByteSize()); } /// isElementTypeCompatible - Return true if a ConstantDataSequential can be @@ -2018,6 +2017,12 @@ return false; } +/// getNumElements - Return the number of elements in the array or vector. +unsigned ConstantDataSequential::getNumElements() const { + return getType()->getNumElements(); +} + + /// getElementByteSize - Return the size in bytes of the elements in the data. uint64_t ConstantDataSequential::getElementByteSize() const { return getElementType()->getPrimitiveSizeInBits()/8; @@ -2025,7 +2030,7 @@ /// getElementPointer - Return the start of the specified element. const char *ConstantDataSequential::getElementPointer(unsigned Elt) const { - assert(Elt < getElementType()->getNumElements() && "Invalid Elt"); + assert(Elt < getNumElements() && "Invalid Elt"); return DataElements+Elt*getElementByteSize(); } From elena.demikhovsky at intel.com Tue Jan 24 07:54:13 2012 From: elena.demikhovsky at intel.com (Elena Demikhovsky) Date: Tue, 24 Jan 2012 13:54:13 -0000 Subject: [llvm-commits] [llvm] r148803 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx-zext.ll Message-ID: <20120124135413.EED592A6C12C@llvm.org> Author: delena Date: Tue Jan 24 07:54:13 2012 New Revision: 148803 URL: http://llvm.org/viewvc/llvm-project?rev=148803&view=rev Log: ZERO_EXTEND operation is optimized for AVX. v8i16 -> v8i32, v4i32 -> v4i64 - used vpunpck* instructions. Added: llvm/trunk/test/CodeGen/X86/avx-zext.ll (with props) Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148803&r1=148802&r2=148803&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 24 07:54:13 2012 @@ -14349,7 +14349,8 @@ return SDValue(); } -static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) // This eliminates the zext. This transformation is necessary because @@ -14357,6 +14358,8 @@ DebugLoc dl = N->getDebugLoc(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { @@ -14371,6 +14374,38 @@ N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, VT)); } + // Optimize vectors in AVX mode: + // + // v8i16 -> v8i32 + // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. + // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. + // Concat upper and lower parts. + // + // v4i32 -> v4i64 + // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. + // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. + // Concat upper and lower parts. + // + if (Subtarget->hasAVX()) { + + if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || + ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) { + + SDValue ZeroVec = getZeroVector(OpVT, Subtarget->hasSSE2(), Subtarget->hasAVX2(), + DAG, dl); + SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG); + SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + } + } + return SDValue(); } @@ -14558,7 +14593,7 @@ case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: Added: llvm/trunk/test/CodeGen/X86/avx-zext.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-zext.ll?rev=148803&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-zext.ll (added) +++ llvm/trunk/test/CodeGen/X86/avx-zext.ll Tue Jan 24 07:54:13 2012 @@ -0,0 +1,17 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { +;CHECK: zext_8i16_to_8i32 +;CHECK: vpunpckhwd + + %B = zext <8 x i16> %A to <8 x i32> + ret <8 x i32>%B +} + +define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { +;CHECK: zext_4i32_to_4i64 +;CHECK: vpunpckhdq + + %B = zext <4 x i32> %A to <4 x i64> + ret <4 x i64>%B +} Propchange: llvm/trunk/test/CodeGen/X86/avx-zext.ll ------------------------------------------------------------------------------ svn:executable = * From sabre at nondot.org Tue Jan 24 08:04:41 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 14:04:41 -0000 Subject: [llvm-commits] [llvm] r148804 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120124140441.2EA842A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 08:04:40 2012 New Revision: 148804 URL: http://llvm.org/viewvc/llvm-project?rev=148804&view=rev Log: Rearrange argument order of ::get methods so that LLVMContext comes first, add a ConstantDataArray::getString method that corresponds to the (to be removed) StringRef version of ConstantArray::get, but is dramatically more efficient. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148804&r1=148803&r2=148804&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 08:04:40 2012 @@ -358,6 +358,8 @@ /// of the array by one (you've been warned). However, in some situations /// this is not desired so if AddNull==false then the string is copied without /// null termination. + + // FIXME Remove this. static Constant *get(LLVMContext &Context, StringRef Initializer, bool AddNull = true); @@ -697,13 +699,21 @@ /// get() constructors - Return a constant with array type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + + /// getString - This method constructs a CDS and initializes it with a text + /// string. The default behavior (AddNull==true) causes a null terminator to + /// be placed at the end of the array (increasing the length of the string by + /// one more than the StringRef would normally indicate. Pass AddNull=false + /// to disable this behavior. + static Constant *getString(LLVMContext &Context, StringRef Initializer, + bool AddNull = true); + /// getType - Specialize the getType() method to always return an ArrayType, /// which reduces the amount of casting needed in parts of the compiler. /// @@ -740,12 +750,12 @@ /// get() constructors - Return a constant with vector type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); - static Constant *get(ArrayRef Elts, LLVMContext &Context); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); /// getType - Specialize the getType() method to always return a VectorType, /// which reduces the amount of casting needed in parts of the compiler. Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148804&r1=148803&r2=148804&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 08:04:40 2012 @@ -718,9 +718,8 @@ ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i])); // Add a null terminator to the string... - if (AddNull) { + if (AddNull) ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0)); - } ArrayType *ATy = ArrayType::get(Type::getInt8Ty(Context), ElementVals.size()); return get(ATy, ElementVals); @@ -2119,56 +2118,71 @@ /// get() constructors - Return a constant with array type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. -Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context) { +Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty); } -Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context){ +Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty); } -Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context){ +Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); } -Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context){ +Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } -Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context) { +Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); } -Constant *ConstantDataArray::get(ArrayRef Elts, LLVMContext &Context) { +Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } +/// getString - This method constructs a CDS and initializes it with a text +/// string. The default behavior (AddNull==true) causes a null terminator to +/// be placed at the end of the array (increasing the length of the string by +/// one more than the StringRef would normally indicate. Pass AddNull=false +/// to disable this behavior. +Constant *ConstantDataArray::getString(LLVMContext &Context, + StringRef Str, bool AddNull) { + if (!AddNull) + return get(Context, ArrayRef((uint8_t*)Str.data(), Str.size())); + + SmallVector ElementVals; + ElementVals.append(Str.begin(), Str.end()); + ElementVals.push_back(0); + return get(Context, ElementVals); +} /// get() constructors - Return a constant with vector type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. -Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context) { +Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty); } -Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context){ +Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty); } -Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context){ +Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); } -Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context){ +Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } -Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context) { +Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); } -Constant *ConstantDataVector::get(ArrayRef Elts, LLVMContext &Context) { +Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } From sabre at nondot.org Tue Jan 24 08:17:05 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 14:17:05 -0000 Subject: [llvm-commits] [llvm] r148805 - in /llvm/trunk/lib: Target/CBackend/CBackend.cpp Target/CppBackend/CPPBackend.cpp Target/TargetLoweringObjectFile.cpp VMCore/Constants.cpp Message-ID: <20120124141705.AFDA72A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 08:17:05 2012 New Revision: 148805 URL: http://llvm.org/viewvc/llvm-project?rev=148805&view=rev Log: C++, CBE, and TLOF support for ConstantDataSequential Modified: llvm/trunk/lib/Target/CBackend/CBackend.cpp llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp llvm/trunk/lib/Target/TargetLoweringObjectFile.cpp llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/lib/Target/CBackend/CBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CBackend/CBackend.cpp?rev=148805&r1=148804&r2=148805&view=diff ============================================================================== --- llvm/trunk/lib/Target/CBackend/CBackend.cpp (original) +++ llvm/trunk/lib/Target/CBackend/CBackend.cpp Tue Jan 24 08:17:05 2012 @@ -215,6 +215,8 @@ bool printConstExprCast(const ConstantExpr *CE, bool Static); void printConstantArray(ConstantArray *CPA, bool Static); void printConstantVector(ConstantVector *CV, bool Static); + void printConstantDataSequential(ConstantDataSequential *CDS, bool Static); + /// isAddressExposed - Return true if the specified value's name needs to /// have its address taken in order to get a C value of the correct type. @@ -556,20 +558,10 @@ } void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { - // As a special case, print the array as a string if it is an array of // ubytes or an array of sbytes with positive values. // - Type *ETy = CPA->getType()->getElementType(); - bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) || - ETy == Type::getInt8Ty(CPA->getContext())); - - // Make sure the last character is a null char, as automatically added by C - if (isString && (CPA->getNumOperands() == 0 || - !cast(*(CPA->op_end()-1))->isNullValue())) - isString = false; - - if (isString) { + if (CPA->isCString()) { Out << '\"'; // Keep track of whether the last number was a hexadecimal escape. bool LastWasHex = false; @@ -637,6 +629,66 @@ Out << " }"; } +void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, + bool Static) { + // As a special case, print the array as a string if it is an array of + // ubytes or an array of sbytes with positive values. + // + if (CDS->isCString()) { + Out << '\"'; + // Keep track of whether the last number was a hexadecimal escape. + bool LastWasHex = false; + + StringRef Bytes = CDS->getAsCString(); + + // Do not include the last character, which we know is null + for (unsigned i = 0, e = Bytes.size(); i != e; ++i) { + unsigned char C = Bytes[i]; + + // Print it out literally if it is a printable character. The only thing + // to be careful about is when the last letter output was a hex escape + // code, in which case we have to be careful not to print out hex digits + // explicitly (the C compiler thinks it is a continuation of the previous + // character, sheesh...) + // + if (isprint(C) && (!LastWasHex || !isxdigit(C))) { + LastWasHex = false; + if (C == '"' || C == '\\') + Out << "\\" << (char)C; + else + Out << (char)C; + } else { + LastWasHex = false; + switch (C) { + case '\n': Out << "\\n"; break; + case '\t': Out << "\\t"; break; + case '\r': Out << "\\r"; break; + case '\v': Out << "\\v"; break; + case '\a': Out << "\\a"; break; + case '\"': Out << "\\\""; break; + case '\'': Out << "\\\'"; break; + default: + Out << "\\x"; + Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')); + Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); + LastWasHex = true; + break; + } + } + } + Out << '\"'; + } else { + Out << "{ "; + printConstant(CDS->getElementAsConstant(0), Static); + for (unsigned i = 1, e = CDS->getNumOperands(); i != e; ++i) { + Out << ", "; + printConstant(CDS->getElementAsConstant(i), Static); + } + Out << " }"; + } +} + + // isFPCSafeToPrint - Returns true if we may assume that CFP may be written out // textually as a double (rather than as a reference to a stack-allocated // variable). We decide this by converting CFP to a string and back into a @@ -1024,6 +1076,9 @@ Out << "{ "; // Arrays are wrapped in struct types. if (ConstantArray *CA = dyn_cast(CPV)) { printConstantArray(CA, Static); + } else if (ConstantDataSequential *CDS = + dyn_cast(CPV)) { + printConstantDataSequential(CDS, Static); } else { assert(isa(CPV) || isa(CPV)); ArrayType *AT = cast(CPV->getType()); @@ -1051,6 +1106,9 @@ } if (ConstantVector *CV = dyn_cast(CPV)) { printConstantVector(CV, Static); + } else if (ConstantDataSequential *CDS = + dyn_cast(CPV)) { + printConstantDataSequential(CDS, Static); } else { assert(isa(CPV) || isa(CPV)); VectorType *VT = cast(CPV->getType()); Modified: llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp?rev=148805&r1=148804&r2=148805&view=diff ============================================================================== --- llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp (original) +++ llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Tue Jan 24 08:17:05 2012 @@ -698,9 +698,7 @@ printCFP(CFP); Out << ";"; } else if (const ConstantArray *CA = dyn_cast(CV)) { - if (CA->isString() && - CA->getType()->getElementType() == - Type::getInt8Ty(CA->getContext())) { + if (CA->isString()) { Out << "Constant* " << constName << " = ConstantArray::get(mod->getContext(), \""; std::string tmp = CA->getAsString(); @@ -757,6 +755,41 @@ } else if (isa(CV)) { Out << "UndefValue* " << constName << " = UndefValue::get(" << typeName << ");"; + } else if (const ConstantDataSequential *CDS = + dyn_cast(CV)) { + if (CDS->isString()) { + Out << "Constant *" << constName << + " = ConstantDataArray::getString(mod->getContext(), \""; + StringRef Str = CA->getAsString(); + bool nullTerminate = false; + if (Str.back() == 0) { + Str = Str.drop_back(); + nullTerminate = true; + } + printEscapedString(Str); + // Determine if we want null termination or not. + if (nullTerminate) + Out << "\", true);"; + else + Out << "\", false);";// No null terminator + } else { + // TODO: Could generate more efficient code generating CDS calls instead. + Out << "std::vector " << constName << "_elems;"; + nl(Out); + for (unsigned i = 0; i != CDS->getNumElements(); ++i) { + Constant *Elt = CDS->getElementAsConstant(i); + printConstant(Elt); + Out << constName << "_elems.push_back(" << getCppName(Elt) << ");"; + nl(Out); + } + Out << "Constant* " << constName; + + if (isa(CDS->getType())) + Out << " = ConstantArray::get("; + else + Out << " = ConstantVector::get("; + Out << typeName << ", " << constName << "_elems);"; + } } else if (const ConstantExpr *CE = dyn_cast(CV)) { if (CE->getOpcode() == Instruction::GetElementPtr) { Out << "std::vector " << constName << "_indices;"; Modified: llvm/trunk/lib/Target/TargetLoweringObjectFile.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetLoweringObjectFile.cpp?rev=148805&r1=148804&r2=148805&view=diff ============================================================================== --- llvm/trunk/lib/Target/TargetLoweringObjectFile.cpp (original) +++ llvm/trunk/lib/Target/TargetLoweringObjectFile.cpp Tue Jan 24 08:17:05 2012 @@ -73,12 +73,12 @@ /// IsNullTerminatedString - Return true if the specified constant (which is /// known to have a type that is an array of 1/2/4 byte elements) ends with a -/// nul value and contains no other nuls in it. +/// nul value and contains no other nuls in it. Note that this is more general +/// than ConstantDataSequential::isString because we allow 2 & 4 byte strings. static bool IsNullTerminatedString(const Constant *C) { - ArrayType *ATy = cast(C->getType()); - - // First check: is we have constant array of i8 terminated with zero + // First check: is we have constant array terminated with zero if (const ConstantArray *CVA = dyn_cast(C)) { + ArrayType *ATy = cast(C->getType()); if (ATy->getNumElements() == 0) return false; ConstantInt *Null = @@ -94,10 +94,23 @@ return false; return true; } + if (const ConstantDataSequential *CDS = dyn_cast(C)) { + unsigned NumElts = CDS->getNumElements(); + assert(NumElts != 0 && "Can't have an empty CDS"); + + if (CDS->getElementAsInteger(NumElts-1) != 0) + return false; // Not null terminated. + + // Verify that the null doesn't occur anywhere else in the string. + for (unsigned i = 0; i != NumElts-1; ++i) + if (CDS->getElementAsInteger(i) == 0) + return false; + return true; + } // Another possibility: [1 x i8] zeroinitializer if (isa(C)) - return ATy->getNumElements() == 1; + return cast(C->getType())->getNumElements() == 1; return false; } Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148805&r1=148804&r2=148805&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 08:17:05 2012 @@ -2048,7 +2048,8 @@ /// we *want* an underlying "char*" to avoid TBAA type punning violations. Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { assert(isElementTypeCompatible(cast(Ty)->getElementType())); - // If the elements are all zero, return a CAZ, which is more dense. + // If the elements are all zero or there are no elements, return a CAZ, which + // is more dense and canonical. if (isAllZeros(Elements)) return ConstantAggregateZero::get(Ty); From sabre at nondot.org Tue Jan 24 08:31:22 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 14:31:22 -0000 Subject: [llvm-commits] [llvm] r148806 - in /llvm/trunk/lib/Transforms/InstCombine: InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp Message-ID: <20120124143122.C832F2A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 08:31:22 2012 New Revision: 148806 URL: http://llvm.org/viewvc/llvm-project?rev=148806&view=rev Log: basic instcombine support for CDS. Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=148806&r1=148805&r2=148806&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Tue Jan 24 08:31:22 2012 @@ -855,23 +855,36 @@ return NewCP != CV ? NewCP : 0; } - if (isa(V)) { - // Simplify the CAZ to a ConstantVector where the non-demanded elements are - // set to undef. + if (ConstantDataVector *CDV = dyn_cast(V)) { + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts.isAllOnesValue()) + return 0; + + // Simplify to a ConstantVector where the non-demanded elements are undef. + Constant *Undef = UndefValue::get(CDV->getElementType()); + SmallVector Elts; + for (unsigned i = 0; i != VWidth; ++i) + Elts.push_back(DemandedElts[i] ? CDV->getElementAsConstant(i) : Undef); + UndefElts = DemandedElts ^ EltMask; + return ConstantVector::get(Elts); + + } + + if (ConstantAggregateZero *CAZ = dyn_cast(V)) { // Check if this is identity. If so, return 0 since we are not simplifying // anything. if (DemandedElts.isAllOnesValue()) return 0; - Type *EltTy = cast(V->getType())->getElementType(); - Constant *Zero = Constant::getNullValue(EltTy); - Constant *Undef = UndefValue::get(EltTy); - std::vector Elts; - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elt = DemandedElts[i] ? Zero : Undef; - Elts.push_back(Elt); - } + // Simplify the CAZ to a ConstantVector where the non-demanded elements are + // set to undef. + Constant *Zero = CAZ->getSequentialElement(); + Constant *Undef = UndefValue::get(Zero->getType()); + SmallVector Elts; + for (unsigned i = 0; i != VWidth; ++i) + Elts.push_back(DemandedElts[i] ? Zero : Undef); UndefElts = DemandedElts ^ EltMask; return ConstantVector::get(Elts); } Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=148806&r1=148805&r2=148806&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Tue Jan 24 08:31:22 2012 @@ -16,7 +16,8 @@ using namespace llvm; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it -/// is to leave as a vector operation. +/// is to leave as a vector operation. isConstant indicates whether we're +/// extracting one known element. If false we're extracting a variable index. static bool CheapToScalarize(Value *V, bool isConstant) { if (isa(V)) return true; @@ -335,10 +336,14 @@ if (isa(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; - } else if (isa(V)) { + } + + if (isa(V)) { Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); return V; - } else if (InsertElementInst *IEI = dyn_cast(V)) { + } + + if (InsertElementInst *IEI = dyn_cast(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); From james.molloy at arm.com Tue Jan 24 08:46:21 2012 From: james.molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 14:46:21 -0000 Subject: [llvm-commits] PATCH: Add support for tracking and exposing the host triple In-Reply-To: References: Message-ID: <001301ccdaa6$f0744600$d15cd200$@molloy@arm.com> Hi Chandler, The patch looks good to me, although I?m wondering why Clang needs to care what it was *built* to run on, as opposed to what it is actually running on at runtime. For example, I compile on RHE3 for RHE3, then run on RHE5. Clang should surely understand that it is hosted on RHE5 and act accordingly. So why is this information needed as a build time constant? Cheers, James From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Chandler Carruth Sent: 24 January 2012 11:59 To: ; Eric Christopher Subject: [llvm-commits] PATCH: Add support for tracking and exposing the host triple Attached is a patch to add support for tracking and exposing the host triple. This is one of the many steps along the way to cleaning up the Clang driver. In many places within Clang, we need to reason about the *host*, not the target, in order to decide how to behave. For example, this is at issue when deciding where within the host machine we should look for the target's toolchain. While in some cases, there is a canonical location that the target *always* uses, in other cases both can be relevant. Consider the Debian multiarch case where we will often want to use '/usr/lib//gcc///crtbegin.o'. In other cases, we may need to check whether the host triple might contain a toolchain capable of targeting our target. This happens frequently when working in a bi-arch world, where the GCC installed under 'i386-pc-linux-gnu' is actually capable of targeting 'x86_64-pc-linux-gnu' thanks to the '-m64' flag. Some time back an effort was undertaken which simply blanket renamed 'Host' to 'Target' in many places. This didn't really address the fundamental problem, and in my mind made things a bit more confusing. Thus in most places in the driver, we treat the DefaultTargetTriple as the *host* triple, compute the HostInfo from it, and then *modify* it to produce the target triple. Ow. Other times, we modify it first, and assume that the post-modification triple is still a viable host triple. All of this gets simple if we simply have access to both triples the entire time. We can make rational decisions about when we should and should not use the host information or the target information. Also, it will become immediately clear from the code which is in fact in use. Again, this is just the first step, but it touches autoconf and a bunch of other stuff so I wanted someone else to glance at it before I checked it in. I'll also need someone to regenerate the config.h and the configure scripts as I don't have easy access to the proper versions. If someone can send me those patched files i can submit as one patch to keep the build bots happy. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/56a6348a/attachment.html From hfinkel at anl.gov Tue Jan 24 09:12:48 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Tue, 24 Jan 2012 09:12:48 -0600 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <4f1e6df7.ce3ed80a.6ea5.383fSMTPIN_ADDED@mx.google.com> <4f1e7364.4fecd80a.2a66.ffff972eSMTPIN_ADDED@mx.google.com> Message-ID: <1327417968.11266.8.camel@sapling> On Tue, 2012-01-24 at 13:44 +0300, Anton Korobeynikov wrote: > Cameron, > > > There needs to be a new flag for the automatic use of excess precision, if that behavior is even desired. > Right. Maybe -ffast-math should imply this flag as well. I think that we already have a way for dealing with this (which is what PPC uses for its FMA patterns); see Target/TargetOptions.h which includes the following: /// LessPreciseFPMAD - This flag is enabled when the /// -enable-fp-mad is specified on the command line. When this flag is off /// (the default), the code generator is not allowed to generate mad /// (multiply add) if the result is "less precise" than doing those /// operations individually. unsigned LessPreciseFPMADOption : 1; ... /// NoExcessFPPrecision - This flag is enabled when the /// -disable-excess-fp-precision flag is specified on the command line. /// When this flag is off (the default), the code generator is allowed to /// produce results that are "more precise" than IEEE allows. This includes /// use of FMA-like operations and use of the X86 FP registers without /// rounding all over the place. unsigned NoExcessFPPrecision : 1; /// UnsafeFPMath - This flag is enabled when the /// -enable-unsafe-fp-math flag is specified on the command line. When /// this flag is off (the default), the code generator is not allowed to /// produce results that are "less precise" than IEEE allows. This includes /// use of X86 instructions like FSIN and FCOS instead of libcalls. /// UnsafeFPMath implies LessPreciseFPMAD. unsigned UnsafeFPMath : 1; So depending on whether your FMA instructions are more or less precise than the original instruction combination, you can predicate them on one of these flags as necessary. For example, the PPC backend defines: def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; and then uses this to restrict the application of the patterns for fpmadd and similar instructions. -Hal > -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From tobias at grosser.es Tue Jan 24 09:53:48 2012 From: tobias at grosser.es (Tobias Grosser) Date: Tue, 24 Jan 2012 16:53:48 +0100 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327378420.32397.1603.camel@sapling> References: <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling > Message-ID: <4F1ED40C.80306@grosser.es> On 01/24/2012 05:13 AM, Hal Finkel wrote: > On Tue, 2012-01-17 at 13:25 -0600, Sebastian Pop wrote: >> Hi, >> >> On Fri, Dec 30, 2011 at 3:09 AM, Tobias Grosser wrote: >>> As it seems my intuition is wrong, I am very eager to see and understand >>> an example where a search limit of 4000 is really needed. >>> >> >> To make the ball roll again, I attached a testcase that can be tuned >> to understand the impact on compile time for different sizes of a >> basic block. One can also set the number of iterations in the loop to >> 1 to test the vectorizer with no loops around. >> >> Hal, could you please report the compile times with/without the >> vectorizer for different basic block sizes? > > I've looked at your test case, and I am pleased to report a negligible > compile-time increase! That is nice. But does this example actually trigger the search limit of 4000? I think that is the case I am especially interested in. Cheers Tobi From hfinkel at anl.gov Tue Jan 24 10:17:29 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Tue, 24 Jan 2012 10:17:29 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <4F1ED40C.80306@grosser.es> References: <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling > <4F1ED40C.80306@grosser.es> Message-ID: <1327421849.11266.69.camel@sapling> On Tue, 2012-01-24 at 16:53 +0100, Tobias Grosser wrote: > On 01/24/2012 05:13 AM, Hal Finkel wrote: > > On Tue, 2012-01-17 at 13:25 -0600, Sebastian Pop wrote: > >> Hi, > >> > >> On Fri, Dec 30, 2011 at 3:09 AM, Tobias Grosser wrote: > >>> As it seems my intuition is wrong, I am very eager to see and understand > >>> an example where a search limit of 4000 is really needed. > >>> > >> > >> To make the ball roll again, I attached a testcase that can be tuned > >> to understand the impact on compile time for different sizes of a > >> basic block. One can also set the number of iterations in the loop to > >> 1 to test the vectorizer with no loops around. > >> > >> Hal, could you please report the compile times with/without the > >> vectorizer for different basic block sizes? > > > > I've looked at your test case, and I am pleased to report a negligible > > compile-time increase! > That is nice. But does this example actually trigger the search limit of > 4000? I think that is the case I am especially interested in. I know (and the answer is yes, it could, but not in an interesting way), but I reduced the default search limit to 400. I did this because, when used in combination with my load/store-reordering patch, such a high limit is no longer optimal. As I suspected, it appears that the high limit was compensating for the lack of the ability to schedule non-aliasing loads after stores. I would like to deal with the load/store reording problem on its own merits (and have already submitted a patch that does this), and so I'll leave the lower default on the vectorizer search limit. In addition, Sebastian's test case highlights why, with the current implementation, having such a high search limit would be bad for compile times. A limit in the hundreds, not thousands, is necessary to provide reasonable compile times for unrolled loops with long dependency chains such as the ones in his example. Thanks again, Hal > > Cheers > Tobi -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From dblaikie at gmail.com Tue Jan 24 10:18:30 2012 From: dblaikie at gmail.com (David Blaikie) Date: Tue, 24 Jan 2012 16:18:30 -0000 Subject: [llvm-commits] [llvm] r148809 - /llvm/trunk/include/llvm/Support/CommandLine.h Message-ID: <20120124161830.62A992A6C12C@llvm.org> Author: dblaikie Date: Tue Jan 24 10:18:30 2012 New Revision: 148809 URL: http://llvm.org/viewvc/llvm-project?rev=148809&view=rev Log: Reword comment based on feedback by Duncan Sands. Modified: llvm/trunk/include/llvm/Support/CommandLine.h Modified: llvm/trunk/include/llvm/Support/CommandLine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/CommandLine.h?rev=148809&r1=148808&r2=148809&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/CommandLine.h (original) +++ llvm/trunk/include/llvm/Support/CommandLine.h Tue Jan 24 10:18:30 2012 @@ -164,7 +164,7 @@ int NumOccurrences; // The number of times specified // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid - // with signed enums in bitfields in MSVC we'll store them as unsigned + // problems with signed enums in bitfields. unsigned Occurrences : 3; // enum NumOccurrencesFlag // not using the enum type for 'Value' because zero is an implementation // detail representing the non-value From dblaikie at gmail.com Tue Jan 24 10:24:04 2012 From: dblaikie at gmail.com (David Blaikie) Date: Tue, 24 Jan 2012 08:24:04 -0800 Subject: [llvm-commits] [llvm] r148746 - /llvm/trunk/include/llvm/Support/CommandLine.h In-Reply-To: <4F1E7478.3080006@free.fr> References: <20120123232747.8CF302A6C12C@llvm.org> <4F1E7478.3080006@free.fr> Message-ID: On Tue, Jan 24, 2012 at 1:06 AM, Duncan Sands wrote: > Hi David, > >> --- llvm/trunk/include/llvm/Support/CommandLine.h (original) >> +++ llvm/trunk/include/llvm/Support/CommandLine.h Mon Jan 23 17:27:47 2012 >> @@ -163,12 +163,14 @@ >> ? ? virtual void anchor(); >> >> ? ? int NumOccurrences; ? ? // The number of times specified >> - ?enum NumOccurrencesFlag Occurrences : 3; >> + ?// Occurrences, HiddenFlag, and Formatting are all enum types but to avoid > > to avoid -> to avoid problems Right >> + ?// with signed enums in bitfields in MSVC we'll store them as unsigned > > I'm not sure it is fair to name and shame MSVC here since (AFAIK) it is being > perfectly standards conformant in its behaviour. Agreed - when Chandler mentioned this it wasn't entirely clear whether the issue was intrinsic to signed enums or to MSVC's implementation of them. I'll reword the comment to give MSVC the benefit of the doubt. committed in r148809 Thanks, - David From joerg at britannica.bec.de Tue Jan 24 10:23:41 2012 From: joerg at britannica.bec.de (Joerg Sonnenberger) Date: Tue, 24 Jan 2012 17:23:41 +0100 Subject: [llvm-commits] PATCH: Add support for tracking and exposing the host triple In-Reply-To: References: Message-ID: <20120124162341.GA12631@britannica.bec.de> On Tue, Jan 24, 2012 at 03:59:00AM -0800, Chandler Carruth wrote: > Attached is a patch to add support for tracking and exposing the host > triple. This is one of the many steps along the way to cleaning up the > Clang driver. In many places within Clang, we need to reason about the > *host*, not the target, in order to decide how to behave. For example, this > is at issue when deciding where within the host machine we should look for > the target's toolchain. While in some cases, there is a canonical location > that the target *always* uses, in other cases both can be relevant. > Consider the Debian multiarch case where we will often want to use > '/usr/lib//gcc///crtbegin.o'. In other > cases, we may need to check whether the host triple might contain a > toolchain capable of targeting our target. This happens frequently when > working in a bi-arch world, where the GCC installed under > 'i386-pc-linux-gnu' is actually capable of targeting 'x86_64-pc-linux-gnu' > thanks to the '-m64' flag. I disagree with this. I don't the host OS is involved in any of the decisions. Let me summarize how NetBSD handles this case, I think the same applies to other platforms and can be moved into the upper layers. When clang is invoked, it derives the base target either from program name, command line or configuration. During the NetBSD build, it is always the program name (e.g. x86_64--netbsd-clang). This base target is memorised and used for finding the tool chain, so that e.g. x86_64--netbsd-ld is used as linker. If the driver finds -m32 on the command line, three things change. The triple for the compiler invocation is changed to i386--netbsd. This changes the code generation only. Second, the library search path is changed from =/usr/lib to =/usr/lib/i386. That's where crt0.o and friends can be found. This differs from a "native" i386--netbsd compiler, since that expects the path correctly in =/usr/lib to match the native environment. Third, -m elf_i386 is passed to ld. I don't see how the Debian case is different. Use the compiler target triple (after -m32 / -m64 modifications) to look up the library path for crt*.o. Pick the correct ld emulation, if it differs from the base triple. Not sure how consistent the emulation list is on Linux to be able to always specify it. Joerg From grosser at fim.uni-passau.de Tue Jan 24 10:42:21 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Tue, 24 Jan 2012 16:42:21 -0000 Subject: [llvm-commits] [polly] r148811 - /polly/trunk/lib/CodeGeneration.cpp Message-ID: <20120124164221.A71662A6C12D@llvm.org> Author: grosser Date: Tue Jan 24 10:42:21 2012 New Revision: 148811 URL: http://llvm.org/viewvc/llvm-project?rev=148811&view=rev Log: CodeGen: Separate declaration and definitions of BlockGenerator Modified: polly/trunk/lib/CodeGeneration.cpp Modified: polly/trunk/lib/CodeGeneration.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGeneration.cpp?rev=148811&r1=148810&r2=148811&view=diff ============================================================================== --- polly/trunk/lib/CodeGeneration.cpp (original) +++ polly/trunk/lib/CodeGeneration.cpp Tue Jan 24 10:42:21 2012 @@ -152,83 +152,21 @@ ValueMapT &VMap; VectorValueMapT &ValueMaps; Scop &S; - ScopStmt &statement; - isl_set *scatteringDomain; + ScopStmt &Statement; + isl_set *ScatteringDomain; public: BlockGenerator(IRBuilder<> &B, ValueMapT &vmap, VectorValueMapT &vmaps, - ScopStmt &Stmt, isl_set *domain) - : Builder(B), VMap(vmap), ValueMaps(vmaps), S(*Stmt.getParent()), - statement(Stmt), scatteringDomain(domain) {} - - const Region &getRegion() { - return S.getRegion(); - } - - Value *makeVectorOperand(Value *operand, int vectorWidth) { - if (operand->getType()->isVectorTy()) - return operand; + ScopStmt &Stmt, __isl_keep isl_set *domain); - VectorType *vectorType = VectorType::get(operand->getType(), vectorWidth); - Value *vector = UndefValue::get(vectorType); - vector = Builder.CreateInsertElement(vector, operand, Builder.getInt32(0)); + const Region &getRegion(); - std::vector splat; - - for (int i = 0; i < vectorWidth; i++) - splat.push_back (Builder.getInt32(0)); - - Constant *splatVector = ConstantVector::get(splat); - - return Builder.CreateShuffleVector(vector, vector, splatVector); - } + Value *makeVectorOperand(Value *operand, int vectorWidth); Value *getOperand(const Value *oldOperand, ValueMapT &BBMap, - ValueMapT *VectorMap = 0) { - const Instruction *OpInst = dyn_cast(oldOperand); - - if (!OpInst) - return const_cast(oldOperand); - - if (VectorMap && VectorMap->count(oldOperand)) - return (*VectorMap)[oldOperand]; + ValueMapT *VectorMap = 0); - // IVS and Parameters. - if (VMap.count(oldOperand)) { - Value *NewOperand = VMap[oldOperand]; - - // Insert a cast if types are different - if (oldOperand->getType()->getScalarSizeInBits() - < NewOperand->getType()->getScalarSizeInBits()) - NewOperand = Builder.CreateTruncOrBitCast(NewOperand, - oldOperand->getType()); - - return NewOperand; - } - - // Instructions calculated in the current BB. - if (BBMap.count(oldOperand)) { - return BBMap[oldOperand]; - } - - // Ignore instructions that are referencing ops in the old BB. These - // instructions are unused. They where replace by new ones during - // createIndependentBlocks(). - if (getRegion().contains(OpInst->getParent())) - return NULL; - - return const_cast(oldOperand); - } - - Type *getVectorPtrTy(const Value *V, int vectorWidth) { - PointerType *pointerType = dyn_cast(V->getType()); - assert(pointerType && "PointerType expected"); - - Type *scalarType = pointerType->getElementType(); - VectorType *vectorType = VectorType::get(scalarType, vectorWidth); - - return PointerType::getUnqual(vectorType); - } + Type *getVectorPtrTy(const Value *V, int vectorWidth); /// @brief Load a vector from a set of adjacent scalars /// @@ -239,19 +177,7 @@ /// %vec_full = load <4 x double>* %vector_ptr /// Value *generateStrideOneLoad(const LoadInst *load, ValueMapT &BBMap, - int size) { - const Value *pointer = load->getPointerOperand(); - Type *vectorPtrType = getVectorPtrTy(pointer, size); - Value *newPointer = getOperand(pointer, BBMap); - Value *VectorPtr = Builder.CreateBitCast(newPointer, vectorPtrType, - "vector_ptr"); - LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, - load->getName() + "_p_vec_full"); - if (!Aligned) - VecLoad->setAlignment(8); - - return VecLoad; - } + int size); /// @brief Load a vector initialized from a single scalar in memory /// @@ -264,31 +190,7 @@ /// double> %splat_one, <4 x i32> zeroinitializer /// Value *generateStrideZeroLoad(const LoadInst *load, ValueMapT &BBMap, - int size) { - const Value *pointer = load->getPointerOperand(); - Type *vectorPtrType = getVectorPtrTy(pointer, 1); - Value *newPointer = getOperand(pointer, BBMap); - Value *vectorPtr = Builder.CreateBitCast(newPointer, vectorPtrType, - load->getName() + "_p_vec_p"); - LoadInst *scalarLoad= Builder.CreateLoad(vectorPtr, - load->getName() + "_p_splat_one"); - - if (!Aligned) - scalarLoad->setAlignment(8); - - std::vector splat; - - for (int i = 0; i < size; i++) - splat.push_back (Builder.getInt32(0)); - - Constant *splatVector = ConstantVector::get(splat); - - Value *vectorLoad = Builder.CreateShuffleVector(scalarLoad, scalarLoad, - splatVector, - load->getName() - + "_p_splat"); - return vectorLoad; - } + int size); /// @Load a vector from scalars distributed in memory /// @@ -302,369 +204,539 @@ /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 /// Value *generateUnknownStrideLoad(const LoadInst *load, - VectorValueMapT &scalarMaps, - int size) { - const Value *pointer = load->getPointerOperand(); - VectorType *vectorType = VectorType::get( - dyn_cast(pointer->getType())->getElementType(), size); - - Value *vector = UndefValue::get(vectorType); - - for (int i = 0; i < size; i++) { - Value *newPointer = getOperand(pointer, scalarMaps[i]); - Value *scalarLoad = Builder.CreateLoad(newPointer, - load->getName() + "_p_scalar_"); - vector = Builder.CreateInsertElement(vector, scalarLoad, - Builder.getInt32(i), - load->getName() + "_p_vec_"); - } - - return vector; - } + VectorValueMapT &scalarMaps, int size); static Value* islAffToValue(__isl_take isl_aff *Aff, - IslPwAffUserInfo *UserInfo) { - assert(isl_aff_is_cst(Aff) && "Only constant access functions supported"); - - IRBuilder<> *Builder = UserInfo->Builder; - - isl_int OffsetIsl; - mpz_t OffsetMPZ; - - isl_int_init(OffsetIsl); - mpz_init(OffsetMPZ); - isl_aff_get_constant(Aff, &OffsetIsl); - isl_int_get_gmp(OffsetIsl, OffsetMPZ); - - Value *OffsetValue = NULL; - APInt Offset = APInt_from_MPZ(OffsetMPZ); - OffsetValue = ConstantInt::get(Builder->getContext(), Offset); - - mpz_clear(OffsetMPZ); - isl_int_clear(OffsetIsl); - isl_aff_free(Aff); - - return OffsetValue; - } + IslPwAffUserInfo *UserInfo); static int mergeIslAffValues(__isl_take isl_set *Set, - __isl_take isl_aff *Aff, void *User) { - IslPwAffUserInfo *UserInfo = (IslPwAffUserInfo *)User; + __isl_take isl_aff *Aff, void *User); - assert((UserInfo->Result == NULL) && "Result is already set." - "Currently only single isl_aff is supported"); - assert(isl_set_plain_is_universe(Set) - && "Code generation failed because the set is not universe"); - - UserInfo->Result = islAffToValue(Aff, UserInfo); - - isl_set_free(Set); - return 0; - } - - Value* islPwAffToValue(__isl_take isl_pw_aff *PwAff, Value *BaseAddress) { - IslPwAffUserInfo UserInfo; - UserInfo.BaseAddress = BaseAddress; - UserInfo.Result = NULL; - UserInfo.Builder = &Builder; - isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &UserInfo); - assert(UserInfo.Result && "Code generation for isl_pw_aff failed"); - - isl_pw_aff_free(PwAff); - return UserInfo.Result; - } + Value* islPwAffToValue(__isl_take isl_pw_aff *PwAff, Value *BaseAddress); /// @brief Get the memory access offset to be added to the base address std::vector getMemoryAccessIndex(__isl_keep isl_map *AccessRelation, - Value *BaseAddress) { - assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) - && "Only single dimensional access functions supported"); - - isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); - Value *OffsetValue = islPwAffToValue(PwAff, BaseAddress); - - PointerType *BaseAddressType = dyn_cast( - BaseAddress->getType()); - Type *ArrayTy = BaseAddressType->getElementType(); - Type *ArrayElementType = dyn_cast(ArrayTy)->getElementType(); - OffsetValue = Builder.CreateSExtOrBitCast(OffsetValue, ArrayElementType); - - std::vector IndexArray; - Value *NullValue = Constant::getNullValue(ArrayElementType); - IndexArray.push_back(NullValue); - IndexArray.push_back(OffsetValue); - return IndexArray; - } + Value *BaseAddress); /// @brief Get the new operand address according to the changed access in /// JSCOP file. Value *getNewAccessOperand(__isl_keep isl_map *NewAccessRelation, Value *BaseAddress, const Value *OldOperand, - ValueMapT &BBMap) { - std::vector IndexArray = getMemoryAccessIndex(NewAccessRelation, - BaseAddress); - Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, - "p_newarrayidx_"); - return NewOperand; - } + ValueMapT &BBMap); /// @brief Generate the operand address Value *generateLocationAccessed(const Instruction *Inst, - const Value *Pointer, ValueMapT &BBMap ) { - MemoryAccess &Access = statement.getAccessFor(Inst); - isl_map *CurrentAccessRelation = Access.getAccessRelation(); - isl_map *NewAccessRelation = Access.getNewAccessRelation(); - - assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) - && "Current and new access function use different spaces"); + const Value *Pointer, ValueMapT &BBMap ); - Value *NewPointer; - - if (!NewAccessRelation) { - NewPointer = getOperand(Pointer, BBMap); - } else { - Value *BaseAddress = const_cast(Access.getBaseAddr()); - NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, Pointer, - BBMap); - } - - isl_map_free(CurrentAccessRelation); - isl_map_free(NewAccessRelation); - return NewPointer; - } - - Value *generateScalarLoad(const LoadInst *load, ValueMapT &BBMap) { - const Value *pointer = load->getPointerOperand(); - const Instruction *Inst = dyn_cast(load); - Value *newPointer = generateLocationAccessed(Inst, pointer, BBMap); - Value *scalarLoad = Builder.CreateLoad(newPointer, - load->getName() + "_p_scalar_"); - return scalarLoad; - } + Value *generateScalarLoad(const LoadInst *load, ValueMapT &BBMap); /// @brief Load a value (or several values as a vector) from memory. void generateLoad(const LoadInst *load, ValueMapT &vectorMap, - VectorValueMapT &scalarMaps, int vectorWidth) { - if (scalarMaps.size() == 1) { - scalarMaps[0][load] = generateScalarLoad(load, scalarMaps[0]); - return; - } + VectorValueMapT &scalarMaps, int vectorWidth); - Value *newLoad; + void copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &BBMap, + ValueMapT &VectorMap, int VectorDimension, + int VectorWidth); - MemoryAccess &Access = statement.getAccessFor(load); + void copyBinInst(const BinaryOperator *Inst, ValueMapT &BBMap, + ValueMapT &vectorMap, int vectorDimension, int vectorWidth); + + void copyVectorStore(const StoreInst *store, ValueMapT &BBMap, + ValueMapT &vectorMap, VectorValueMapT &scalarMaps, + int vectorDimension, int vectorWidth); - assert(scatteringDomain && "No scattering domain available"); + void copyInstScalar(const Instruction *Inst, ValueMapT &BBMap); - if (Access.isStrideZero(isl_set_copy(scatteringDomain))) - newLoad = generateStrideZeroLoad(load, scalarMaps[0], vectorWidth); - else if (Access.isStrideOne(isl_set_copy(scatteringDomain))) - newLoad = generateStrideOneLoad(load, scalarMaps[0], vectorWidth); - else - newLoad = generateUnknownStrideLoad(load, scalarMaps, vectorWidth); + bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); + + int getVectorSize(); - vectorMap[load] = newLoad; + bool isVectorBlock(); + + void copyInstruction(const Instruction *Inst, ValueMapT &BBMap, + ValueMapT &vectorMap, VectorValueMapT &scalarMaps, + int vectorDimension, int vectorWidth); + + // Insert a copy of a basic block in the newly generated code. + // + // @param Builder The builder used to insert the code. It also specifies + // where to insert the code. + // @param BB The basic block to copy + // @param VMap A map returning for any old value its new equivalent. This + // is used to update the operands of the statements. + // For new statements a relation old->new is inserted in this + // map. + void copyBB(BasicBlock *BB, DominatorTree *DT); +}; + +BlockGenerator::BlockGenerator(IRBuilder<> &B, ValueMapT &vmap, + VectorValueMapT &vmaps, ScopStmt &Stmt, + __isl_keep isl_set *domain) + : Builder(B), VMap(vmap), ValueMaps(vmaps), S(*Stmt.getParent()), + Statement(Stmt), ScatteringDomain(domain) {} + +const Region &BlockGenerator::getRegion() { + return S.getRegion(); +} + +Value *BlockGenerator::makeVectorOperand(Value *Operand, int VectorWidth) { + if (Operand->getType()->isVectorTy()) + return Operand; + + VectorType *VectorType = VectorType::get(Operand->getType(), VectorWidth); + Value *Vector = UndefValue::get(VectorType); + Vector = Builder.CreateInsertElement(Vector, Operand, Builder.getInt32(0)); + + std::vector Splat; + + for (int i = 0; i < VectorWidth; i++) + Splat.push_back (Builder.getInt32(0)); + + Constant *SplatVector = ConstantVector::get(Splat); + + return Builder.CreateShuffleVector(Vector, Vector, SplatVector); +} + +Value *BlockGenerator::getOperand(const Value *OldOperand, ValueMapT &BBMap, + ValueMapT *VectorMap) { + const Instruction *OpInst = dyn_cast(OldOperand); + + if (!OpInst) + return const_cast(OldOperand); + + if (VectorMap && VectorMap->count(OldOperand)) + return (*VectorMap)[OldOperand]; + + // IVS and Parameters. + if (VMap.count(OldOperand)) { + Value *NewOperand = VMap[OldOperand]; + + // Insert a cast if types are different + if (OldOperand->getType()->getScalarSizeInBits() + < NewOperand->getType()->getScalarSizeInBits()) + NewOperand = Builder.CreateTruncOrBitCast(NewOperand, + OldOperand->getType()); + + return NewOperand; } - void copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &BBMap, - ValueMapT &VectorMap, int VectorDimension, - int VectorWidth) { - Value *NewOperand = getOperand(Inst->getOperand(0), BBMap, &VectorMap); - NewOperand = makeVectorOperand(NewOperand, VectorWidth); - - if (const CastInst *Cast = dyn_cast(Inst)) { - VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); - VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, - DestType); - } else - llvm_unreachable("Can not generate vector code for instruction"); - return; + // Instructions calculated in the current BB. + if (BBMap.count(OldOperand)) { + return BBMap[OldOperand]; } - void copyBinInst(const BinaryOperator *Inst, ValueMapT &BBMap, - ValueMapT &vectorMap, int vectorDimension, int vectorWidth) { - Value *opZero = Inst->getOperand(0); - Value *opOne = Inst->getOperand(1); - - Value *newOpZero, *newOpOne; - newOpZero = getOperand(opZero, BBMap, &vectorMap); - newOpOne = getOperand(opOne, BBMap, &vectorMap); - - newOpZero = makeVectorOperand(newOpZero, vectorWidth); - newOpOne = makeVectorOperand(newOpOne, vectorWidth); - - Value *newInst = Builder.CreateBinOp(Inst->getOpcode(), newOpZero, - newOpOne, - Inst->getName() + "p_vec"); - vectorMap[Inst] = newInst; + // Ignore instructions that are referencing ops in the old BB. These + // instructions are unused. They where replace by new ones during + // createIndependentBlocks(). + if (getRegion().contains(OpInst->getParent())) + return NULL; - return; + return const_cast(OldOperand); +} + +Type *BlockGenerator::getVectorPtrTy(const Value *Val, int VectorWidth) { + PointerType *PointerTy = dyn_cast(Val->getType()); + assert(PointerTy && "PointerType expected"); + + Type *ScalarType = PointerTy->getElementType(); + VectorType *VectorType = VectorType::get(ScalarType, VectorWidth); + + return PointerType::getUnqual(VectorType); +} + +Value *BlockGenerator::generateStrideOneLoad(const LoadInst *Load, + ValueMapT &BBMap, int Size) { + const Value *Pointer = Load->getPointerOperand(); + Type *VectorPtrType = getVectorPtrTy(Pointer, Size); + Value *NewPointer = getOperand(Pointer, BBMap); + Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, + "vector_ptr"); + LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, + Load->getName() + "_p_vec_full"); + if (!Aligned) + VecLoad->setAlignment(8); + + return VecLoad; +} + +Value *BlockGenerator::generateStrideZeroLoad(const LoadInst *Load, + ValueMapT &BBMap, int Size) { + const Value *Pointer = Load->getPointerOperand(); + Type *VectorPtrType = getVectorPtrTy(Pointer, 1); + Value *NewPointer = getOperand(Pointer, BBMap); + Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, + Load->getName() + "_p_vec_p"); + LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, + Load->getName() + "_p_splat_one"); + + if (!Aligned) + ScalarLoad->setAlignment(8); + + std::vector Splat; + + for (int i = 0; i < Size; i++) + Splat.push_back (Builder.getInt32(0)); + + Constant *SplatVector = ConstantVector::get(Splat); + + Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, + SplatVector, + Load->getName() + + "_p_splat"); + return VectorLoad; +} + +Value *BlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, + VectorValueMapT &ScalarMaps, + int Size) { + const Value *Pointer = Load->getPointerOperand(); + VectorType *VectorType = VectorType::get( + dyn_cast(Pointer->getType())->getElementType(), Size); + + Value *Vector = UndefValue::get(VectorType); + + for (int i = 0; i < Size; i++) { + Value *NewPointer = getOperand(Pointer, ScalarMaps[i]); + Value *ScalarLoad = Builder.CreateLoad(NewPointer, + Load->getName() + "_p_scalar_"); + Vector = Builder.CreateInsertElement(Vector, ScalarLoad, + Builder.getInt32(i), + Load->getName() + "_p_vec_"); } - void copyVectorStore(const StoreInst *store, ValueMapT &BBMap, - ValueMapT &vectorMap, VectorValueMapT &scalarMaps, - int vectorDimension, int vectorWidth) { - // In vector mode we only generate a store for the first dimension. - if (vectorDimension > 0) - return; + return Vector; +} - MemoryAccess &Access = statement.getAccessFor(store); +Value *BlockGenerator::islAffToValue(__isl_take isl_aff *Aff, + IslPwAffUserInfo *UserInfo) { + assert(isl_aff_is_cst(Aff) && "Only constant access functions supported"); - assert(scatteringDomain && "No scattering domain available"); + IRBuilder<> *Builder = UserInfo->Builder; - const Value *pointer = store->getPointerOperand(); - Value *vector = getOperand(store->getValueOperand(), BBMap, &vectorMap); + isl_int OffsetIsl; + mpz_t OffsetMPZ; - if (Access.isStrideOne(isl_set_copy(scatteringDomain))) { - Type *vectorPtrType = getVectorPtrTy(pointer, vectorWidth); - Value *newPointer = getOperand(pointer, BBMap, &vectorMap); + isl_int_init(OffsetIsl); + mpz_init(OffsetMPZ); + isl_aff_get_constant(Aff, &OffsetIsl); + isl_int_get_gmp(OffsetIsl, OffsetMPZ); - Value *VectorPtr = Builder.CreateBitCast(newPointer, vectorPtrType, - "vector_ptr"); - StoreInst *Store = Builder.CreateStore(vector, VectorPtr); + Value *OffsetValue = NULL; + APInt Offset = APInt_from_MPZ(OffsetMPZ); + OffsetValue = ConstantInt::get(Builder->getContext(), Offset); - if (!Aligned) - Store->setAlignment(8); - } else { - for (unsigned i = 0; i < scalarMaps.size(); i++) { - Value *scalar = Builder.CreateExtractElement(vector, - Builder.getInt32(i)); - Value *newPointer = getOperand(pointer, scalarMaps[i]); - Builder.CreateStore(scalar, newPointer); - } - } + mpz_clear(OffsetMPZ); + isl_int_clear(OffsetIsl); + isl_aff_free(Aff); + + return OffsetValue; +} + +int BlockGenerator::mergeIslAffValues(__isl_take isl_set *Set, + __isl_take isl_aff *Aff, void *User) { + IslPwAffUserInfo *UserInfo = (IslPwAffUserInfo *)User; + + assert((UserInfo->Result == NULL) && "Result is already set." + "Currently only single isl_aff is supported"); + assert(isl_set_plain_is_universe(Set) + && "Code generation failed because the set is not universe"); + + UserInfo->Result = islAffToValue(Aff, UserInfo); + + isl_set_free(Set); + return 0; +} + +Value *BlockGenerator::islPwAffToValue(__isl_take isl_pw_aff *PwAff, + Value *BaseAddress) { + IslPwAffUserInfo UserInfo; + UserInfo.BaseAddress = BaseAddress; + UserInfo.Result = NULL; + UserInfo.Builder = &Builder; + isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &UserInfo); + assert(UserInfo.Result && "Code generation for isl_pw_aff failed"); + + isl_pw_aff_free(PwAff); + return UserInfo.Result; +} +std::vector BlockGenerator::getMemoryAccessIndex( + __isl_keep isl_map *AccessRelation, Value *BaseAddress) { + assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) + && "Only single dimensional access functions supported"); + + isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); + Value *OffsetValue = islPwAffToValue(PwAff, BaseAddress); + + PointerType *BaseAddressType = dyn_cast( + BaseAddress->getType()); + Type *ArrayTy = BaseAddressType->getElementType(); + Type *ArrayElementType = dyn_cast(ArrayTy)->getElementType(); + OffsetValue = Builder.CreateSExtOrBitCast(OffsetValue, ArrayElementType); + + std::vector IndexArray; + Value *NullValue = Constant::getNullValue(ArrayElementType); + IndexArray.push_back(NullValue); + IndexArray.push_back(OffsetValue); + return IndexArray; +} + +Value *BlockGenerator::getNewAccessOperand( + __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, const Value + *OldOperand, ValueMapT &BBMap) { + std::vector IndexArray = getMemoryAccessIndex(NewAccessRelation, + BaseAddress); + Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, + "p_newarrayidx_"); + return NewOperand; +} + +Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, + const Value *Pointer, + ValueMapT &BBMap ) { + MemoryAccess &Access = Statement.getAccessFor(Inst); + isl_map *CurrentAccessRelation = Access.getAccessRelation(); + isl_map *NewAccessRelation = Access.getNewAccessRelation(); + + assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) + && "Current and new access function use different spaces"); + + Value *NewPointer; + + if (!NewAccessRelation) { + NewPointer = getOperand(Pointer, BBMap); + } else { + Value *BaseAddress = const_cast(Access.getBaseAddr()); + NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, Pointer, + BBMap); + } + + isl_map_free(CurrentAccessRelation); + isl_map_free(NewAccessRelation); + return NewPointer; +} + +Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, + ValueMapT &BBMap) { + const Value *Pointer = Load->getPointerOperand(); + const Instruction *Inst = dyn_cast(Load); + Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap); + Value *ScalarLoad = Builder.CreateLoad(NewPointer, + Load->getName() + "_p_scalar_"); + return ScalarLoad; +} + +void BlockGenerator::generateLoad(const LoadInst *Load, ValueMapT &VectorMap, + VectorValueMapT &ScalarMaps, + int VectorWidth) { + if (ScalarMaps.size() == 1) { + ScalarMaps[0][Load] = generateScalarLoad(Load, ScalarMaps[0]); return; } - void copyInstScalar(const Instruction *Inst, ValueMapT &BBMap) { - Instruction *NewInst = Inst->clone(); + Value *NewLoad; - // Replace old operands with the new ones. - for (Instruction::const_op_iterator OI = Inst->op_begin(), - OE = Inst->op_end(); OI != OE; ++OI) { - Value *OldOperand = *OI; - Value *NewOperand = getOperand(OldOperand, BBMap); - - if (!NewOperand) { - assert(!isa(NewInst) - && "Store instructions are always needed!"); - delete NewInst; - return; - } + MemoryAccess &Access = Statement.getAccessFor(Load); - NewInst->replaceUsesOfWith(OldOperand, NewOperand); - } + assert(ScatteringDomain && "No scattering domain available"); - Builder.Insert(NewInst); - BBMap[Inst] = NewInst; + if (Access.isStrideZero(isl_set_copy(ScatteringDomain))) + NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0], VectorWidth); + else if (Access.isStrideOne(isl_set_copy(ScatteringDomain))) + NewLoad = generateStrideOneLoad(Load, ScalarMaps[0], VectorWidth); + else + NewLoad = generateUnknownStrideLoad(Load, ScalarMaps, VectorWidth); - if (!NewInst->getType()->isVoidTy()) - NewInst->setName("p_" + Inst->getName()); - } + VectorMap[Load] = NewLoad; +} - bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap) { - for (Instruction::const_op_iterator OI = Inst->op_begin(), - OE = Inst->op_end(); OI != OE; ++OI) - if (VectorMap.count(*OI)) - return true; - return false; - } +void BlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, + ValueMapT &BBMap, ValueMapT &VectorMap, + int VectorDimension, int VectorWidth) { + Value *NewOperand = getOperand(Inst->getOperand(0), BBMap, &VectorMap); + NewOperand = makeVectorOperand(NewOperand, VectorWidth); + + assert(isa(Inst) && "Can not generate vector code for instruction"); + + const CastInst *Cast = dyn_cast(Inst); + VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); + VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); +} - int getVectorSize() { - return ValueMaps.size(); - } +void BlockGenerator::copyBinInst(const BinaryOperator *Inst, ValueMapT &BBMap, + ValueMapT &VectorMap, int VectorDimension, + int VectorWidth) { + Value *OpZero = Inst->getOperand(0); + Value *OpOne = Inst->getOperand(1); + + Value *NewOpZero, *NewOpOne; + NewOpZero = getOperand(OpZero, BBMap, &VectorMap); + NewOpOne = getOperand(OpOne, BBMap, &VectorMap); + + NewOpZero = makeVectorOperand(NewOpZero, VectorWidth); + NewOpOne = makeVectorOperand(NewOpOne, VectorWidth); + + Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, + NewOpOne, + Inst->getName() + "p_vec"); + VectorMap[Inst] = NewInst; +} - bool isVectorBlock() { - return getVectorSize() > 1; - } +void BlockGenerator::copyVectorStore(const StoreInst *Store, ValueMapT &BBMap, + ValueMapT &VectorMap, + VectorValueMapT &ScalarMaps, + int VectorDimension, int VectorWidth) { + // In vector mode we only generate a store for the first dimension. + if (VectorDimension > 0) + return; - void copyInstruction(const Instruction *Inst, ValueMapT &BBMap, - ValueMapT &vectorMap, VectorValueMapT &scalarMaps, - int vectorDimension, int vectorWidth) { - // Terminator instructions control the control flow. They are explicitally - // expressed in the clast and do not need to be copied. - if (Inst->isTerminator()) - return; + MemoryAccess &Access = Statement.getAccessFor(Store); + + assert(ScatteringDomain && "No scattering domain available"); + + const Value *Pointer = Store->getPointerOperand(); + Value *Vector = getOperand(Store->getValueOperand(), BBMap, &VectorMap); + + if (Access.isStrideOne(isl_set_copy(ScatteringDomain))) { + Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); + Value *NewPointer = getOperand(Pointer, BBMap, &VectorMap); - if (isVectorBlock()) { - // If this instruction is already in the vectorMap, a vector instruction - // was already issued, that calculates the values of all dimensions. No - // need to create any more instructions. - if (vectorMap.count(Inst)) - return; + Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, + "vector_ptr"); + StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); + + if (!Aligned) + Store->setAlignment(8); + } else { + for (unsigned i = 0; i < ScalarMaps.size(); i++) { + Value *Scalar = Builder.CreateExtractElement(Vector, + Builder.getInt32(i)); + Value *NewPointer = getOperand(Pointer, ScalarMaps[i]); + Builder.CreateStore(Scalar, NewPointer); } + } +} + +void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap) { + Instruction *NewInst = Inst->clone(); - if (const LoadInst *load = dyn_cast(Inst)) { - generateLoad(load, vectorMap, scalarMaps, vectorWidth); + // Replace old operands with the new ones. + for (Instruction::const_op_iterator OI = Inst->op_begin(), + OE = Inst->op_end(); OI != OE; ++OI) { + Value *OldOperand = *OI; + Value *NewOperand = getOperand(OldOperand, BBMap); + + if (!NewOperand) { + assert(!isa(NewInst) + && "Store instructions are always needed!"); + delete NewInst; return; } - if (isVectorBlock() && hasVectorOperands(Inst, vectorMap)) { - if (const UnaryInstruction *UnaryInst = dyn_cast(Inst)) - copyUnaryInst(UnaryInst, BBMap, vectorMap, vectorDimension, - vectorWidth); - else if - (const BinaryOperator *binaryInst = dyn_cast(Inst)) - copyBinInst(binaryInst, BBMap, vectorMap, vectorDimension, vectorWidth); - else if (const StoreInst *store = dyn_cast(Inst)) - copyVectorStore(store, BBMap, vectorMap, scalarMaps, vectorDimension, - vectorWidth); - else - llvm_unreachable("Cannot issue vector code for this instruction"); + NewInst->replaceUsesOfWith(OldOperand, NewOperand); + } + Builder.Insert(NewInst); + BBMap[Inst] = NewInst; + + if (!NewInst->getType()->isVoidTy()) + NewInst->setName("p_" + Inst->getName()); +} + +bool BlockGenerator::hasVectorOperands(const Instruction *Inst, + ValueMapT &VectorMap) { + for (Instruction::const_op_iterator OI = Inst->op_begin(), + OE = Inst->op_end(); OI != OE; ++OI) + if (VectorMap.count(*OI)) + return true; + return false; +} + +int BlockGenerator::getVectorSize() { + return ValueMaps.size(); +} + +bool BlockGenerator::isVectorBlock() { + return getVectorSize() > 1; +} + +void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, + ValueMapT &VectorMap, + VectorValueMapT &ScalarMaps, + int VectorDimension, int VectorWidth) { + // Terminator instructions control the control flow. They are explicitally + // expressed in the clast and do not need to be copied. + if (Inst->isTerminator()) + return; + + if (isVectorBlock()) { + // If this instruction is already in the vectorMap, a vector instruction + // was already issued, that calculates the values of all dimensions. No + // need to create any more instructions. + if (VectorMap.count(Inst)) return; - } + } - copyInstScalar(Inst, BBMap); + if (const LoadInst *Load = dyn_cast(Inst)) { + generateLoad(Load, VectorMap, ScalarMaps, VectorWidth); + return; } - // Insert a copy of a basic block in the newly generated code. - // - // @param Builder The builder used to insert the code. It also specifies - // where to insert the code. - // @param BB The basic block to copy - // @param VMap A map returning for any old value its new equivalent. This - // is used to update the operands of the statements. - // For new statements a relation old->new is inserted in this - // map. - void copyBB(BasicBlock *BB, DominatorTree *DT) { - Function *F = Builder.GetInsertBlock()->getParent(); - LLVMContext &Context = F->getContext(); - BasicBlock *CopyBB = BasicBlock::Create(Context, - "polly." + BB->getName() + ".stmt", - F); - Builder.CreateBr(CopyBB); - DT->addNewBlock(CopyBB, Builder.GetInsertBlock()); - Builder.SetInsertPoint(CopyBB); - - // Create two maps that store the mapping from the original instructions of - // the old basic block to their copies in the new basic block. Those maps - // are basic block local. - // - // As vector code generation is supported there is one map for scalar values - // and one for vector values. - // - // In case we just do scalar code generation, the vectorMap is not used and - // the scalarMap has just one dimension, which contains the mapping. - // - // In case vector code generation is done, an instruction may either appear - // in the vector map once (as it is calculating >vectorwidth< values at a - // time. Or (if the values are calculated using scalar operations), it - // appears once in every dimension of the scalarMap. - VectorValueMapT scalarBlockMap(getVectorSize()); - ValueMapT vectorBlockMap; - - for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); - II != IE; ++II) - for (int i = 0; i < getVectorSize(); i++) { - if (isVectorBlock()) - VMap = ValueMaps[i]; - copyInstruction(II, scalarBlockMap[i], vectorBlockMap, - scalarBlockMap, i, getVectorSize()); - } + if (isVectorBlock() && hasVectorOperands(Inst, VectorMap)) { + if (const UnaryInstruction *UnaryInst = dyn_cast(Inst)) + copyUnaryInst(UnaryInst, BBMap, VectorMap, VectorDimension, VectorWidth); + else if + (const BinaryOperator *BinaryInst = dyn_cast(Inst)) + copyBinInst(BinaryInst, BBMap, VectorMap, VectorDimension, VectorWidth); + else if (const StoreInst *Store = dyn_cast(Inst)) + copyVectorStore(Store, BBMap, VectorMap, ScalarMaps, VectorDimension, + VectorWidth); + else + llvm_unreachable("Cannot issue vector code for this instruction"); + + return; } -}; + + copyInstScalar(Inst, BBMap); +} + +void BlockGenerator::copyBB(BasicBlock *BB, DominatorTree *DT) { + Function *F = Builder.GetInsertBlock()->getParent(); + LLVMContext &Context = F->getContext(); + BasicBlock *CopyBB = BasicBlock::Create(Context, + "polly." + BB->getName() + ".stmt", + F); + Builder.CreateBr(CopyBB); + DT->addNewBlock(CopyBB, Builder.GetInsertBlock()); + Builder.SetInsertPoint(CopyBB); + + // Create two maps that store the mapping from the original instructions of + // the old basic block to their copies in the new basic block. Those maps + // are basic block local. + // + // As vector code generation is supported there is one map for scalar values + // and one for vector values. + // + // In case we just do scalar code generation, the vectorMap is not used and + // the scalarMap has just one dimension, which contains the mapping. + // + // In case vector code generation is done, an instruction may either appear + // in the vector map once (as it is calculating >vectorwidth< values at a + // time. Or (if the values are calculated using scalar operations), it + // appears once in every dimension of the scalarMap. + VectorValueMapT ScalarBlockMap(getVectorSize()); + ValueMapT VectorBlockMap; + + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) + for (int i = 0; i < getVectorSize(); i++) { + if (isVectorBlock()) + VMap = ValueMaps[i]; + + copyInstruction(II, ScalarBlockMap[i], VectorBlockMap, + ScalarBlockMap, i, getVectorSize()); + } +} /// Class to generate LLVM-IR that calculates the value of a clast_expr. class ClastExpCodeGen { From grosser at fim.uni-passau.de Tue Jan 24 10:42:25 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Tue, 24 Jan 2012 16:42:25 -0000 Subject: [llvm-commits] [polly] r148812 - /polly/trunk/lib/CodeGeneration.cpp Message-ID: <20120124164225.7BB022A6C12D@llvm.org> Author: grosser Date: Tue Jan 24 10:42:25 2012 New Revision: 148812 URL: http://llvm.org/viewvc/llvm-project?rev=148812&view=rev Log: CodeGen: Use getNullValue to simplify some code Modified: polly/trunk/lib/CodeGeneration.cpp Modified: polly/trunk/lib/CodeGeneration.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGeneration.cpp?rev=148812&r1=148811&r2=148812&view=diff ============================================================================== --- polly/trunk/lib/CodeGeneration.cpp (original) +++ polly/trunk/lib/CodeGeneration.cpp Tue Jan 24 10:42:25 2012 @@ -372,12 +372,8 @@ if (!Aligned) ScalarLoad->setAlignment(8); - std::vector Splat; - - for (int i = 0; i < Size; i++) - Splat.push_back (Builder.getInt32(0)); - - Constant *SplatVector = ConstantVector::get(Splat); + Constant *SplatVector = + Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), Size)); Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, SplatVector, From grosser at fim.uni-passau.de Tue Jan 24 10:42:28 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Tue, 24 Jan 2012 16:42:28 -0000 Subject: [llvm-commits] [polly] r148813 - /polly/trunk/lib/CodeGeneration.cpp Message-ID: <20120124164228.A73302A6C12D@llvm.org> Author: grosser Date: Tue Jan 24 10:42:28 2012 New Revision: 148813 URL: http://llvm.org/viewvc/llvm-project?rev=148813&view=rev Log: CodeGen: Separate declaration and definition of ClastExpCodeGen Modified: polly/trunk/lib/CodeGeneration.cpp Modified: polly/trunk/lib/CodeGeneration.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGeneration.cpp?rev=148813&r1=148812&r2=148813&view=diff ============================================================================== --- polly/trunk/lib/CodeGeneration.cpp (original) +++ polly/trunk/lib/CodeGeneration.cpp Tue Jan 24 10:42:28 2012 @@ -739,146 +739,154 @@ IRBuilder<> &Builder; const CharMapT *IVS; - Value *codegen(const clast_name *e, Type *Ty) { - CharMapT::const_iterator I = IVS->find(e->name); + Value *codegen(const clast_name *e, Type *Ty); + Value *codegen(const clast_term *e, Type *Ty); + Value *codegen(const clast_binary *e, Type *Ty); + Value *codegen(const clast_reduction *r, Type *Ty); +public: - if (I != IVS->end()) - return Builder.CreateSExtOrBitCast(I->second, Ty); - else - llvm_unreachable("Clast name not found"); - } + // A generator for clast expressions. + // + // @param B The IRBuilder that defines where the code to calculate the + // clast expressions should be inserted. + // @param IVMAP A Map that translates strings describing the induction + // variables to the Values* that represent these variables + // on the LLVM side. + ClastExpCodeGen(IRBuilder<> &B, CharMapT *IVMap); - Value *codegen(const clast_term *e, Type *Ty) { - APInt a = APInt_from_MPZ(e->val); + // Generates code to calculate a given clast expression. + // + // @param e The expression to calculate. + // @return The Value that holds the result. + Value *codegen(const clast_expr *e, Type *Ty); - Value *ConstOne = ConstantInt::get(Builder.getContext(), a); - ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty); + // @brief Reset the CharMap. + // + // This function is called to reset the CharMap to new one, while generating + // OpenMP code. + void setIVS(CharMapT *IVSNew); +}; - if (e->var) { - Value *var = codegen(e->var, Ty); - return Builder.CreateMul(ConstOne, var); - } +Value *ClastExpCodeGen::codegen(const clast_name *e, Type *Ty) { + CharMapT::const_iterator I = IVS->find(e->name); + + assert(I != IVS->end() && "Clast name not found"); + + return Builder.CreateSExtOrBitCast(I->second, Ty); +} + +Value *ClastExpCodeGen::codegen(const clast_term *e, Type *Ty) { + APInt a = APInt_from_MPZ(e->val); + + Value *ConstOne = ConstantInt::get(Builder.getContext(), a); + ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty); + if (!e->var) return ConstOne; - } - Value *codegen(const clast_binary *e, Type *Ty) { - Value *LHS = codegen(e->LHS, Ty); + Value *var = codegen(e->var, Ty); + return Builder.CreateMul(ConstOne, var); +} + +Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) { + Value *LHS = codegen(e->LHS, Ty); + + APInt RHS_AP = APInt_from_MPZ(e->RHS); + + Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP); + RHS = Builder.CreateSExtOrBitCast(RHS, Ty); + + switch (e->type) { + case clast_bin_mod: + return Builder.CreateSRem(LHS, RHS); + case clast_bin_fdiv: + { + // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d + Value *One = ConstantInt::get(Builder.getInt1Ty(), 1); + Value *Zero = ConstantInt::get(Builder.getInt1Ty(), 0); + One = Builder.CreateZExtOrBitCast(One, Ty); + Zero = Builder.CreateZExtOrBitCast(Zero, Ty); + Value *Sum1 = Builder.CreateSub(LHS, RHS); + Value *Sum2 = Builder.CreateAdd(Sum1, One); + Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); + Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); + return Builder.CreateSDiv(Dividend, RHS); + } + case clast_bin_cdiv: + { + // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d + Value *One = ConstantInt::get(Builder.getInt1Ty(), 1); + Value *Zero = ConstantInt::get(Builder.getInt1Ty(), 0); + One = Builder.CreateZExtOrBitCast(One, Ty); + Zero = Builder.CreateZExtOrBitCast(Zero, Ty); + Value *Sum1 = Builder.CreateAdd(LHS, RHS); + Value *Sum2 = Builder.CreateSub(Sum1, One); + Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); + Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); + return Builder.CreateSDiv(Dividend, RHS); + } + case clast_bin_div: + return Builder.CreateSDiv(LHS, RHS); + }; + + llvm_unreachable("Unknown clast binary expression type"); +} - APInt RHS_AP = APInt_from_MPZ(e->RHS); +Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) { + assert(( r->type == clast_red_min + || r->type == clast_red_max + || r->type == clast_red_sum) + && "Clast reduction type not supported"); + Value *old = codegen(r->elts[0], Ty); - Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP); - RHS = Builder.CreateSExtOrBitCast(RHS, Ty); + for (int i=1; i < r->n; ++i) { + Value *exprValue = codegen(r->elts[i], Ty); - switch (e->type) { - case clast_bin_mod: - return Builder.CreateSRem(LHS, RHS); - case clast_bin_fdiv: + switch (r->type) { + case clast_red_min: { - // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d - Value *One = ConstantInt::get(Builder.getInt1Ty(), 1); - Value *Zero = ConstantInt::get(Builder.getInt1Ty(), 0); - One = Builder.CreateZExtOrBitCast(One, Ty); - Zero = Builder.CreateZExtOrBitCast(Zero, Ty); - Value *Sum1 = Builder.CreateSub(LHS, RHS); - Value *Sum2 = Builder.CreateAdd(Sum1, One); - Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); - Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); - return Builder.CreateSDiv(Dividend, RHS); + Value *cmp = Builder.CreateICmpSLT(old, exprValue); + old = Builder.CreateSelect(cmp, old, exprValue); + break; } - case clast_bin_cdiv: + case clast_red_max: { - // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d - Value *One = ConstantInt::get(Builder.getInt1Ty(), 1); - Value *Zero = ConstantInt::get(Builder.getInt1Ty(), 0); - One = Builder.CreateZExtOrBitCast(One, Ty); - Zero = Builder.CreateZExtOrBitCast(Zero, Ty); - Value *Sum1 = Builder.CreateAdd(LHS, RHS); - Value *Sum2 = Builder.CreateSub(Sum1, One); - Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); - Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); - return Builder.CreateSDiv(Dividend, RHS); - } - case clast_bin_div: - return Builder.CreateSDiv(LHS, RHS); - default: - llvm_unreachable("Unknown clast binary expression type"); - }; - } - - Value *codegen(const clast_reduction *r, Type *Ty) { - assert(( r->type == clast_red_min - || r->type == clast_red_max - || r->type == clast_red_sum) - && "Clast reduction type not supported"); - Value *old = codegen(r->elts[0], Ty); - - for (int i=1; i < r->n; ++i) { - Value *exprValue = codegen(r->elts[i], Ty); - - switch (r->type) { - case clast_red_min: - { - Value *cmp = Builder.CreateICmpSLT(old, exprValue); - old = Builder.CreateSelect(cmp, old, exprValue); - break; - } - case clast_red_max: - { - Value *cmp = Builder.CreateICmpSGT(old, exprValue); - old = Builder.CreateSelect(cmp, old, exprValue); - break; - } - case clast_red_sum: - old = Builder.CreateAdd(old, exprValue); + Value *cmp = Builder.CreateICmpSGT(old, exprValue); + old = Builder.CreateSelect(cmp, old, exprValue); break; - default: - llvm_unreachable("Clast unknown reduction type"); } + case clast_red_sum: + old = Builder.CreateAdd(old, exprValue); + break; + default: + llvm_unreachable("Clast unknown reduction type"); } - - return old; } -public: + return old; +} - // A generator for clast expressions. - // - // @param B The IRBuilder that defines where the code to calculate the - // clast expressions should be inserted. - // @param IVMAP A Map that translates strings describing the induction - // variables to the Values* that represent these variables - // on the LLVM side. - ClastExpCodeGen(IRBuilder<> &B, CharMapT *IVMap) : Builder(B), IVS(IVMap) {} +ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT *IVMap) + : Builder(B), IVS(IVMap) {} - // Generates code to calculate a given clast expression. - // - // @param e The expression to calculate. - // @return The Value that holds the result. - Value *codegen(const clast_expr *e, Type *Ty) { - switch(e->type) { - case clast_expr_name: - return codegen((const clast_name *)e, Ty); - case clast_expr_term: - return codegen((const clast_term *)e, Ty); - case clast_expr_bin: - return codegen((const clast_binary *)e, Ty); - case clast_expr_red: - return codegen((const clast_reduction *)e, Ty); - default: - llvm_unreachable("Unknown clast expression!"); - } +Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) { + switch(e->type) { + case clast_expr_name: + return codegen((const clast_name *)e, Ty); + case clast_expr_term: + return codegen((const clast_term *)e, Ty); + case clast_expr_bin: + return codegen((const clast_binary *)e, Ty); + case clast_expr_red: + return codegen((const clast_reduction *)e, Ty); } - // @brief Reset the CharMap. - // - // This function is called to reset the CharMap to new one, while generating - // OpenMP code. - void setIVS(CharMapT *IVSNew) { - IVS = IVSNew; - } + llvm_unreachable("Unknown clast expression!"); +} -}; +void ClastExpCodeGen::setIVS(CharMapT *IVSNew) { + IVS = IVSNew; +} class ClastStmtCodeGen { // The Scop we code generate. From grosser at fim.uni-passau.de Tue Jan 24 10:42:17 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Tue, 24 Jan 2012 16:42:17 -0000 Subject: [llvm-commits] [polly] r148810 - in /polly/trunk: include/polly/ScopInfo.h lib/Analysis/ScopInfo.cpp lib/CodeGeneration.cpp Message-ID: <20120124164217.1EF502A6C12C@llvm.org> Author: grosser Date: Tue Jan 24 10:42:16 2012 New Revision: 148810 URL: http://llvm.org/viewvc/llvm-project?rev=148810&view=rev Log: ScopInfo: Add isStrideX to unify stride checking Modified: polly/trunk/include/polly/ScopInfo.h polly/trunk/lib/Analysis/ScopInfo.cpp polly/trunk/lib/CodeGeneration.cpp Modified: polly/trunk/include/polly/ScopInfo.h URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScopInfo.h?rev=148810&r1=148809&r2=148810&view=diff ============================================================================== --- polly/trunk/include/polly/ScopInfo.h (original) +++ polly/trunk/include/polly/ScopInfo.h Tue Jan 24 10:42:16 2012 @@ -140,15 +140,18 @@ /// @brief Get the stride of this memory access in the specified domain /// subset. - isl_set *getStride(const isl_set *domainSubset) const; + isl_set *getStride(__isl_take const isl_set *domainSubset) const; + + /// @brief Is the stride of the access equal to a certain width. + bool isStrideX(__isl_take const isl_set *DomainSubset, int StrideWidth) const; /// @brief Is consecutive memory accessed for a given /// statement instance set? - bool isStrideOne(const isl_set *domainSubset) const; + bool isStrideOne(__isl_take const isl_set *domainSubset) const; /// @brief Is always the same memory accessed for a given /// statement instance set? - bool isStrideZero(const isl_set *domainSubset) const; + bool isStrideZero(__isl_take const isl_set *domainSubset) const; /// @brief Get the statement that contains this memory access. ScopStmt *getStatement() const { return statement; } Modified: polly/trunk/lib/Analysis/ScopInfo.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=148810&r1=148809&r2=148810&view=diff ============================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp (original) +++ polly/trunk/lib/Analysis/ScopInfo.cpp Tue Jan 24 10:42:16 2012 @@ -429,9 +429,9 @@ return isl_map_from_basic_map(bmap); } -isl_set *MemoryAccess::getStride(const isl_set *domainSubset) const { +isl_set *MemoryAccess::getStride(__isl_take const isl_set *domainSubset) const { isl_map *accessRelation = getAccessRelation(); - isl_set *scatteringDomain = isl_set_copy(const_cast(domainSubset)); + isl_set *scatteringDomain = const_cast(domainSubset); isl_map *scattering = getStatement()->getScattering(); scattering = isl_map_reverse(scattering); @@ -458,34 +458,28 @@ return isl_map_deltas(nextScatt); } -bool MemoryAccess::isStrideZero(const isl_set *DomainSubset) const { - isl_set *Stride, *StrideZero; - bool IsStrideZero; +bool MemoryAccess::isStrideX(__isl_take const isl_set *DomainSubset, + int StrideWidth) const { + isl_set *Stride, *StrideX; + bool IsStrideX; Stride = getStride(DomainSubset); - StrideZero = isl_set_universe(isl_set_get_space(Stride)); - StrideZero = isl_set_fix_si(StrideZero, isl_dim_set, 0, 0); - IsStrideZero = isl_set_is_equal(Stride, StrideZero); + StrideX = isl_set_universe(isl_set_get_space(Stride)); + StrideX = isl_set_fix_si(StrideX, isl_dim_set, 0, StrideWidth); + IsStrideX = isl_set_is_equal(Stride, StrideX); - isl_set_free(StrideZero); + isl_set_free(StrideX); isl_set_free(Stride); - return IsStrideZero; + return IsStrideX; } -bool MemoryAccess::isStrideOne(const isl_set *DomainSubset) const { - isl_set *Stride, *StrideOne; - bool IsStrideOne; - - Stride = getStride(DomainSubset); - StrideOne = isl_set_universe(isl_set_get_space(Stride)); - StrideOne = isl_set_fix_si(StrideOne, isl_dim_set, 0, 1); - IsStrideOne = isl_set_is_equal(Stride, StrideOne); - - isl_set_free(StrideOne); - isl_set_free(Stride); +bool MemoryAccess::isStrideZero(const isl_set *DomainSubset) const { + return isStrideX(DomainSubset, 0); +} - return IsStrideOne; +bool MemoryAccess::isStrideOne(const isl_set *DomainSubset) const { + return isStrideX(DomainSubset, 1); } void MemoryAccess::setNewAccessRelation(isl_map *newAccess) { Modified: polly/trunk/lib/CodeGeneration.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGeneration.cpp?rev=148810&r1=148809&r2=148810&view=diff ============================================================================== --- polly/trunk/lib/CodeGeneration.cpp (original) +++ polly/trunk/lib/CodeGeneration.cpp Tue Jan 24 10:42:16 2012 @@ -456,9 +456,9 @@ assert(scatteringDomain && "No scattering domain available"); - if (Access.isStrideZero(scatteringDomain)) + if (Access.isStrideZero(isl_set_copy(scatteringDomain))) newLoad = generateStrideZeroLoad(load, scalarMaps[0], vectorWidth); - else if (Access.isStrideOne(scatteringDomain)) + else if (Access.isStrideOne(isl_set_copy(scatteringDomain))) newLoad = generateStrideOneLoad(load, scalarMaps[0], vectorWidth); else newLoad = generateUnknownStrideLoad(load, scalarMaps, vectorWidth); @@ -515,7 +515,7 @@ const Value *pointer = store->getPointerOperand(); Value *vector = getOperand(store->getValueOperand(), BBMap, &vectorMap); - if (Access.isStrideOne(scatteringDomain)) { + if (Access.isStrideOne(isl_set_copy(scatteringDomain))) { Type *vectorPtrType = getVectorPtrTy(pointer, vectorWidth); Value *newPointer = getOperand(pointer, BBMap, &vectorMap); @@ -1259,7 +1259,8 @@ for (int i = 1; i < vectorWidth; i++) IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); - isl_set *scatteringDomain = isl_set_from_cloog_domain(f->domain); + isl_set *scatteringDomain = + isl_set_copy(isl_set_from_cloog_domain(f->domain)); // Add loop iv to symbols. (*clastVars)[f->iterator] = LB; @@ -1273,6 +1274,7 @@ } // Loop is finished, so remove its iv from the live symbols. + isl_set_free(scatteringDomain); clastVars->erase(f->iterator); } From grosser at fim.uni-passau.de Tue Jan 24 10:42:32 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Tue, 24 Jan 2012 16:42:32 -0000 Subject: [llvm-commits] [polly] r148814 - /polly/trunk/lib/CodeGeneration.cpp Message-ID: <20120124164232.B0C202A6C12C@llvm.org> Author: grosser Date: Tue Jan 24 10:42:32 2012 New Revision: 148814 URL: http://llvm.org/viewvc/llvm-project?rev=148814&view=rev Log: CodeGen: Separate declaration and definition of ClastStmtCodeGen Modified: polly/trunk/lib/CodeGeneration.cpp Modified: polly/trunk/lib/CodeGeneration.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGeneration.cpp?rev=148814&r1=148813&r2=148814&view=diff ============================================================================== --- polly/trunk/lib/CodeGeneration.cpp (original) +++ polly/trunk/lib/CodeGeneration.cpp Tue Jan 24 10:42:32 2012 @@ -925,185 +925,42 @@ public: - const std::vector &getParallelLoops() { - return parallelLoops; - } + const std::vector &getParallelLoops(); protected: - void codegen(const clast_assignment *a) { - (*clastVars)[a->LHS] = ExpGen.codegen(a->RHS, - TD->getIntPtrType(Builder.getContext())); - } + void codegen(const clast_assignment *a); void codegen(const clast_assignment *a, ScopStmt *Statement, unsigned Dimension, int vectorDim, - std::vector *VectorVMap = 0) { - Value *RHS = ExpGen.codegen(a->RHS, - TD->getIntPtrType(Builder.getContext())); - - assert(!a->LHS && "Statement assignments do not have left hand side"); - const PHINode *PN; - PN = Statement->getInductionVariableForDimension(Dimension); - const Value *V = PN; - - if (VectorVMap) - (*VectorVMap)[vectorDim][V] = RHS; - - ValueMap[V] = RHS; - } + std::vector *VectorVMap = 0); void codegenSubstitutions(const clast_stmt *Assignment, ScopStmt *Statement, int vectorDim = 0, - std::vector *VectorVMap = 0) { - int Dimension = 0; - - while (Assignment) { - assert(CLAST_STMT_IS_A(Assignment, stmt_ass) - && "Substitions are expected to be assignments"); - codegen((const clast_assignment *)Assignment, Statement, Dimension, - vectorDim, VectorVMap); - Assignment = Assignment->next; - Dimension++; - } - } + std::vector *VectorVMap = 0); void codegen(const clast_user_stmt *u, std::vector *IVS = NULL, - const char *iterator = NULL, isl_set *scatteringDomain = 0) { - ScopStmt *Statement = (ScopStmt *)u->statement->usr; - BasicBlock *BB = Statement->getBasicBlock(); - - if (u->substitutions) - codegenSubstitutions(u->substitutions, Statement); - - int vectorDimensions = IVS ? IVS->size() : 1; - - VectorValueMapT VectorValueMap(vectorDimensions); - - if (IVS) { - assert (u->substitutions && "Substitutions expected!"); - int i = 0; - for (std::vector::iterator II = IVS->begin(), IE = IVS->end(); - II != IE; ++II) { - (*clastVars)[iterator] = *II; - codegenSubstitutions(u->substitutions, Statement, i, &VectorValueMap); - i++; - } - } + const char *iterator = NULL, isl_set *scatteringDomain = 0); - BlockGenerator Generator(Builder, ValueMap, VectorValueMap, *Statement, - scatteringDomain); - Generator.copyBB(BB, DT); - } - - void codegen(const clast_block *b) { - if (b->body) - codegen(b->body); - } + void codegen(const clast_block *b); /// @brief Create a classical sequential loop. void codegenForSequential(const clast_for *f, Value *LowerBound = 0, - Value *UpperBound = 0) { - APInt Stride; - PHINode *IV; - Value *IncrementedIV; - BasicBlock *AfterBB, *HeaderBB, *LastBodyBB; - Type *IntPtrTy; - - Stride = APInt_from_MPZ(f->stride); - IntPtrTy = TD->getIntPtrType(Builder.getContext()); - - // The value of lowerbound and upperbound will be supplied, if this - // function is called while generating OpenMP code. Otherwise get - // the values. - assert(!!LowerBound == !!UpperBound && "Either give both bounds or none"); - - if (LowerBound == 0) { - LowerBound = ExpGen.codegen(f->LB, IntPtrTy); - UpperBound = ExpGen.codegen(f->UB, IntPtrTy); - } - - createLoop(&Builder, LowerBound, UpperBound, Stride, IV, AfterBB, - IncrementedIV, DT); - - // Add loop iv to symbols. - (*clastVars)[f->iterator] = IV; - - if (f->body) - codegen(f->body); - - // Loop is finished, so remove its iv from the live symbols. - clastVars->erase(f->iterator); - - HeaderBB = *pred_begin(AfterBB); - LastBodyBB = Builder.GetInsertBlock(); - Builder.CreateBr(HeaderBB); - IV->addIncoming(IncrementedIV, LastBodyBB); - Builder.SetInsertPoint(AfterBB); - } + Value *UpperBound = 0); /// @brief Add a new definition of an openmp subfunction. - Function *addOpenMPSubfunction(Module *M) { - Function *F = Builder.GetInsertBlock()->getParent(); - std::vector Arguments(1, Builder.getInt8PtrTy()); - FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); - Function *FN = Function::Create(FT, Function::InternalLinkage, - F->getName() + ".omp_subfn", M); - // Do not run any polly pass on the new function. - SD->markFunctionAsInvalid(FN); - - Function::arg_iterator AI = FN->arg_begin(); - AI->setName("omp.userContext"); - - return FN; - } + Function *addOpenMPSubfunction(Module *M); /// @brief Add values to the OpenMP structure. /// /// Create the subfunction structure and add the values from the list. Value *addValuesToOpenMPStruct(SetVector OMPDataVals, - Function *SubFunction) { - std::vector structMembers; - - // Create the structure. - for (unsigned i = 0; i < OMPDataVals.size(); i++) - structMembers.push_back(OMPDataVals[i]->getType()); - - StructType *structTy = StructType::get(Builder.getContext(), - structMembers); - // Store the values into the structure. - Value *structData = Builder.CreateAlloca(structTy, 0, "omp.userContext"); - for (unsigned i = 0; i < OMPDataVals.size(); i++) { - Value *storeAddr = Builder.CreateStructGEP(structData, i); - Builder.CreateStore(OMPDataVals[i], storeAddr); - } - - return structData; - } + Function *SubFunction); /// @brief Create OpenMP structure values. /// /// Create a list of values that has to be stored into the subfuncition /// structure. - SetVector createOpenMPStructValues() { - SetVector OMPDataVals; - - // Push the clast variables available in the clastVars. - for (CharMapT::iterator I = clastVars->begin(), E = clastVars->end(); - I != E; I++) - OMPDataVals.insert(I->second); - - // Push the base addresses of memory references. - for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { - ScopStmt *Stmt = *SI; - for (SmallVector::iterator I = Stmt->memacc_begin(), - E = Stmt->memacc_end(); I != E; ++I) { - Value *BaseAddr = const_cast((*I)->getBaseAddr()); - OMPDataVals.insert((BaseAddr)); - } - } - - return OMPDataVals; - } + SetVector createOpenMPStructValues(); /// @brief Extract the values from the subfunction parameter. /// @@ -1111,371 +968,574 @@ /// variables to point to the new values. void extractValuesFromOpenMPStruct(CharMapT *clastVarsOMP, SetVector OMPDataVals, - Value *userContext) { - // Extract the clast variables. - unsigned i = 0; - for (CharMapT::iterator I = clastVars->begin(), E = clastVars->end(); - I != E; I++) { - Value *loadAddr = Builder.CreateStructGEP(userContext, i); - (*clastVarsOMP)[I->first] = Builder.CreateLoad(loadAddr); - i++; - } - - // Extract the base addresses of memory references. - for (unsigned j = i; j < OMPDataVals.size(); j++) { - Value *loadAddr = Builder.CreateStructGEP(userContext, j); - Value *baseAddr = OMPDataVals[j]; - ValueMap[baseAddr] = Builder.CreateLoad(loadAddr); - } - - } + Value *userContext); /// @brief Add body to the subfunction. void addOpenMPSubfunctionBody(Function *FN, const clast_for *f, Value *structData, - SetVector OMPDataVals) { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - LLVMContext &Context = FN->getContext(); - IntegerType *intPtrTy = TD->getIntPtrType(Context); - - // Store the previous basic block. - BasicBlock *PrevBB = Builder.GetInsertBlock(); - - // Create basic blocks. - BasicBlock *HeaderBB = BasicBlock::Create(Context, "omp.setup", FN); - BasicBlock *ExitBB = BasicBlock::Create(Context, "omp.exit", FN); - BasicBlock *checkNextBB = BasicBlock::Create(Context, "omp.checkNext", FN); - BasicBlock *loadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", - FN); - - DT->addNewBlock(HeaderBB, PrevBB); - DT->addNewBlock(ExitBB, HeaderBB); - DT->addNewBlock(checkNextBB, HeaderBB); - DT->addNewBlock(loadIVBoundsBB, HeaderBB); - - // Fill up basic block HeaderBB. - Builder.SetInsertPoint(HeaderBB); - Value *lowerBoundPtr = Builder.CreateAlloca(intPtrTy, 0, - "omp.lowerBoundPtr"); - Value *upperBoundPtr = Builder.CreateAlloca(intPtrTy, 0, - "omp.upperBoundPtr"); - Value *userContext = Builder.CreateBitCast(FN->arg_begin(), - structData->getType(), - "omp.userContext"); - - CharMapT clastVarsOMP; - extractValuesFromOpenMPStruct(&clastVarsOMP, OMPDataVals, userContext); - - Builder.CreateBr(checkNextBB); - - // Add code to check if another set of iterations will be executed. - Builder.SetInsertPoint(checkNextBB); - Function *runtimeNextFunction = M->getFunction("GOMP_loop_runtime_next"); - Value *ret1 = Builder.CreateCall2(runtimeNextFunction, - lowerBoundPtr, upperBoundPtr); - Value *hasNextSchedule = Builder.CreateTrunc(ret1, Builder.getInt1Ty(), - "omp.hasNextScheduleBlock"); - Builder.CreateCondBr(hasNextSchedule, loadIVBoundsBB, ExitBB); - - // Add code to to load the iv bounds for this set of iterations. - Builder.SetInsertPoint(loadIVBoundsBB); - Value *lowerBound = Builder.CreateLoad(lowerBoundPtr, "omp.lowerBound"); - Value *upperBound = Builder.CreateLoad(upperBoundPtr, "omp.upperBound"); - - // Subtract one as the upper bound provided by openmp is a < comparison - // whereas the codegenForSequential function creates a <= comparison. - upperBound = Builder.CreateSub(upperBound, ConstantInt::get(intPtrTy, 1), - "omp.upperBoundAdjusted"); - - // Use clastVarsOMP during code generation of the OpenMP subfunction. - CharMapT *oldClastVars = clastVars; - clastVars = &clastVarsOMP; - ExpGen.setIVS(&clastVarsOMP); - - codegenForSequential(f, lowerBound, upperBound); - - // Restore the old clastVars. - clastVars = oldClastVars; - ExpGen.setIVS(oldClastVars); - - Builder.CreateBr(checkNextBB); - - // Add code to terminate this openmp subfunction. - Builder.SetInsertPoint(ExitBB); - Function *endnowaitFunction = M->getFunction("GOMP_loop_end_nowait"); - Builder.CreateCall(endnowaitFunction); - Builder.CreateRetVoid(); - - // Restore the builder back to previous basic block. - Builder.SetInsertPoint(PrevBB); - } + SetVector OMPDataVals); /// @brief Create an OpenMP parallel for loop. /// /// This loop reflects a loop as if it would have been created by an OpenMP /// statement. - void codegenForOpenMP(const clast_for *f) { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - IntegerType *intPtrTy = TD->getIntPtrType(Builder.getContext()); + void codegenForOpenMP(const clast_for *f); - Function *SubFunction = addOpenMPSubfunction(M); - SetVector OMPDataVals = createOpenMPStructValues(); - Value *structData = addValuesToOpenMPStruct(OMPDataVals, SubFunction); - - addOpenMPSubfunctionBody(SubFunction, f, structData, OMPDataVals); - - // Create call for GOMP_parallel_loop_runtime_start. - Value *subfunctionParam = Builder.CreateBitCast(structData, - Builder.getInt8PtrTy(), - "omp_data"); - - Value *numberOfThreads = Builder.getInt32(0); - Value *lowerBound = ExpGen.codegen(f->LB, intPtrTy); - Value *upperBound = ExpGen.codegen(f->UB, intPtrTy); - - // Add one as the upper bound provided by openmp is a < comparison - // whereas the codegenForSequential function creates a <= comparison. - upperBound = Builder.CreateAdd(upperBound, ConstantInt::get(intPtrTy, 1)); - APInt APStride = APInt_from_MPZ(f->stride); - Value *stride = ConstantInt::get(intPtrTy, - APStride.zext(intPtrTy->getBitWidth())); - - SmallVector Arguments; - Arguments.push_back(SubFunction); - Arguments.push_back(subfunctionParam); - Arguments.push_back(numberOfThreads); - Arguments.push_back(lowerBound); - Arguments.push_back(upperBound); - Arguments.push_back(stride); - - Function *parallelStartFunction = - M->getFunction("GOMP_parallel_loop_runtime_start"); - Builder.CreateCall(parallelStartFunction, Arguments); - - // Create call to the subfunction. - Builder.CreateCall(SubFunction, subfunctionParam); - - // Create call for GOMP_parallel_end. - Function *FN = M->getFunction("GOMP_parallel_end"); - Builder.CreateCall(FN); - } - - bool isInnermostLoop(const clast_for *f) { - const clast_stmt *stmt = f->body; - - while (stmt) { - if (!CLAST_STMT_IS_A(stmt, stmt_user)) - return false; + bool isInnermostLoop(const clast_for *f); - stmt = stmt->next; - } + /// @brief Get the number of loop iterations for this loop. + /// @param f The clast for loop to check. + int getNumberOfIterations(const clast_for *f); - return true; + /// @brief Create vector instructions for this loop. + void codegenForVector(const clast_for *f); + + void codegen(const clast_for *f); + + Value *codegen(const clast_equation *eq); + + void codegen(const clast_guard *g); + + void codegen(const clast_stmt *stmt); + + void addParameters(const CloogNames *names); + + public: + void codegen(const clast_root *r); + + ClastStmtCodeGen(Scop *scop, ScalarEvolution &se, DominatorTree *dt, + ScopDetection *sd, Dependences *dp, TargetData *td, + IRBuilder<> &B); +}; +} + +const std::vector &ClastStmtCodeGen::getParallelLoops() { + return parallelLoops; +} + +void ClastStmtCodeGen::codegen(const clast_assignment *a) { + Value *V= ExpGen.codegen(a->RHS, TD->getIntPtrType(Builder.getContext())); + (*clastVars)[a->LHS] = V; +} + +void ClastStmtCodeGen::codegen(const clast_assignment *a, ScopStmt *Statement, + unsigned Dimension, int vectorDim, + std::vector *VectorVMap) { + Value *RHS = ExpGen.codegen(a->RHS, + TD->getIntPtrType(Builder.getContext())); + + assert(!a->LHS && "Statement assignments do not have left hand side"); + const PHINode *PN; + PN = Statement->getInductionVariableForDimension(Dimension); + const Value *V = PN; + + if (VectorVMap) + (*VectorVMap)[vectorDim][V] = RHS; + + ValueMap[V] = RHS; +} + +void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment, + ScopStmt *Statement, int vectorDim, + std::vector *VectorVMap) { + int Dimension = 0; + + while (Assignment) { + assert(CLAST_STMT_IS_A(Assignment, stmt_ass) + && "Substitions are expected to be assignments"); + codegen((const clast_assignment *)Assignment, Statement, Dimension, + vectorDim, VectorVMap); + Assignment = Assignment->next; + Dimension++; } +} - /// @brief Get the number of loop iterations for this loop. - /// @param f The clast for loop to check. - int getNumberOfIterations(const clast_for *f) { - isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain)); - isl_set *tmp = isl_set_copy(loopDomain); - - // Calculate a map similar to the identity map, but with the last input - // and output dimension not related. - // [i0, i1, i2, i3] -> [i0, i1, i2, o0] - isl_space *Space = isl_set_get_space(loopDomain); - Space = isl_space_drop_outputs(Space, - isl_set_dim(loopDomain, isl_dim_set) - 2, 1); - Space = isl_space_map_from_set(Space); - isl_map *identity = isl_map_identity(Space); - identity = isl_map_add_dims(identity, isl_dim_in, 1); - identity = isl_map_add_dims(identity, isl_dim_out, 1); - - isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain); - map = isl_map_intersect(map, identity); - - isl_map *lexmax = isl_map_lexmax(isl_map_copy(map)); - isl_map *lexmin = isl_map_lexmin(map); - isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin)); - - isl_set *elements = isl_map_range(sub); - - if (!isl_set_is_singleton(elements)) { - isl_set_free(elements); - return -1; - } +void ClastStmtCodeGen::codegen(const clast_user_stmt *u, + std::vector *IVS , const char *iterator, + isl_set *scatteringDomain) { + ScopStmt *Statement = (ScopStmt *)u->statement->usr; + BasicBlock *BB = Statement->getBasicBlock(); + + if (u->substitutions) + codegenSubstitutions(u->substitutions, Statement); - isl_point *p = isl_set_sample_point(elements); + int vectorDimensions = IVS ? IVS->size() : 1; - isl_int v; - isl_int_init(v); - isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v); - int numberIterations = isl_int_get_si(v); - isl_int_clear(v); - isl_point_free(p); + VectorValueMapT VectorValueMap(vectorDimensions); - return (numberIterations) / isl_int_get_si(f->stride) + 1; + if (IVS) { + assert (u->substitutions && "Substitutions expected!"); + int i = 0; + for (std::vector::iterator II = IVS->begin(), IE = IVS->end(); + II != IE; ++II) { + (*clastVars)[iterator] = *II; + codegenSubstitutions(u->substitutions, Statement, i, &VectorValueMap); + i++; + } } - /// @brief Create vector instructions for this loop. - void codegenForVector(const clast_for *f) { - DEBUG(dbgs() << "Vectorizing loop '" << f->iterator << "'\n";); - int vectorWidth = getNumberOfIterations(f); + BlockGenerator Generator(Builder, ValueMap, VectorValueMap, *Statement, + scatteringDomain); + Generator.copyBB(BB, DT); +} + +void ClastStmtCodeGen::codegen(const clast_block *b) { + if (b->body) + codegen(b->body); +} - Value *LB = ExpGen.codegen(f->LB, - TD->getIntPtrType(Builder.getContext())); +void ClastStmtCodeGen::codegenForSequential(const clast_for *f, + Value *LowerBound, + Value *UpperBound) { + APInt Stride; + PHINode *IV; + Value *IncrementedIV; + BasicBlock *AfterBB, *HeaderBB, *LastBodyBB; + Type *IntPtrTy; + + Stride = APInt_from_MPZ(f->stride); + IntPtrTy = TD->getIntPtrType(Builder.getContext()); + + // The value of lowerbound and upperbound will be supplied, if this + // function is called while generating OpenMP code. Otherwise get + // the values. + assert(!!LowerBound == !!UpperBound && "Either give both bounds or none"); + + if (LowerBound == 0) { + LowerBound = ExpGen.codegen(f->LB, IntPtrTy); + UpperBound = ExpGen.codegen(f->UB, IntPtrTy); + } + + createLoop(&Builder, LowerBound, UpperBound, Stride, IV, AfterBB, + IncrementedIV, DT); + + // Add loop iv to symbols. + (*clastVars)[f->iterator] = IV; + + if (f->body) + codegen(f->body); + + // Loop is finished, so remove its iv from the live symbols. + clastVars->erase(f->iterator); + + HeaderBB = *pred_begin(AfterBB); + LastBodyBB = Builder.GetInsertBlock(); + Builder.CreateBr(HeaderBB); + IV->addIncoming(IncrementedIV, LastBodyBB); + Builder.SetInsertPoint(AfterBB); +} - APInt Stride = APInt_from_MPZ(f->stride); - IntegerType *LoopIVType = dyn_cast(LB->getType()); - Stride = Stride.zext(LoopIVType->getBitWidth()); - Value *StrideValue = ConstantInt::get(LoopIVType, Stride); +Function *ClastStmtCodeGen::addOpenMPSubfunction(Module *M) { + Function *F = Builder.GetInsertBlock()->getParent(); + std::vector Arguments(1, Builder.getInt8PtrTy()); + FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); + Function *FN = Function::Create(FT, Function::InternalLinkage, + F->getName() + ".omp_subfn", M); + // Do not run any polly pass on the new function. + SD->markFunctionAsInvalid(FN); - std::vector IVS(vectorWidth); - IVS[0] = LB; + Function::arg_iterator AI = FN->arg_begin(); + AI->setName("omp.userContext"); - for (int i = 1; i < vectorWidth; i++) - IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); + return FN; +} - isl_set *scatteringDomain = - isl_set_copy(isl_set_from_cloog_domain(f->domain)); +Value *ClastStmtCodeGen::addValuesToOpenMPStruct(SetVector OMPDataVals, + Function *SubFunction) { + std::vector structMembers; + + // Create the structure. + for (unsigned i = 0; i < OMPDataVals.size(); i++) + structMembers.push_back(OMPDataVals[i]->getType()); + + StructType *structTy = StructType::get(Builder.getContext(), + structMembers); + // Store the values into the structure. + Value *structData = Builder.CreateAlloca(structTy, 0, "omp.userContext"); + for (unsigned i = 0; i < OMPDataVals.size(); i++) { + Value *storeAddr = Builder.CreateStructGEP(structData, i); + Builder.CreateStore(OMPDataVals[i], storeAddr); + } - // Add loop iv to symbols. - (*clastVars)[f->iterator] = LB; + return structData; +} - const clast_stmt *stmt = f->body; +SetVector ClastStmtCodeGen::createOpenMPStructValues() { + SetVector OMPDataVals; - while (stmt) { - codegen((const clast_user_stmt *)stmt, &IVS, f->iterator, - scatteringDomain); - stmt = stmt->next; - } + // Push the clast variables available in the clastVars. + for (CharMapT::iterator I = clastVars->begin(), E = clastVars->end(); + I != E; I++) + OMPDataVals.insert(I->second); - // Loop is finished, so remove its iv from the live symbols. - isl_set_free(scatteringDomain); - clastVars->erase(f->iterator); - } - - void codegen(const clast_for *f) { - if (Vector && isInnermostLoop(f) && DP->isParallelFor(f) - && (-1 != getNumberOfIterations(f)) - && (getNumberOfIterations(f) <= 16)) { - codegenForVector(f); - } else if (OpenMP && !parallelCodeGeneration && DP->isParallelFor(f)) { - parallelCodeGeneration = true; - parallelLoops.push_back(f->iterator); - codegenForOpenMP(f); - parallelCodeGeneration = false; - } else - codegenForSequential(f); - } - - Value *codegen(const clast_equation *eq) { - Value *LHS = ExpGen.codegen(eq->LHS, - TD->getIntPtrType(Builder.getContext())); - Value *RHS = ExpGen.codegen(eq->RHS, - TD->getIntPtrType(Builder.getContext())); - CmpInst::Predicate P; - - if (eq->sign == 0) - P = ICmpInst::ICMP_EQ; - else if (eq->sign > 0) - P = ICmpInst::ICMP_SGE; - else - P = ICmpInst::ICMP_SLE; - - return Builder.CreateICmp(P, LHS, RHS); - } - - void codegen(const clast_guard *g) { - Function *F = Builder.GetInsertBlock()->getParent(); - LLVMContext &Context = F->getContext(); - BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); - BasicBlock *MergeBB = BasicBlock::Create(Context, "polly.merge", F); - DT->addNewBlock(ThenBB, Builder.GetInsertBlock()); - DT->addNewBlock(MergeBB, Builder.GetInsertBlock()); - - Value *Predicate = codegen(&(g->eq[0])); - - for (int i = 1; i < g->n; ++i) { - Value *TmpPredicate = codegen(&(g->eq[i])); - Predicate = Builder.CreateAnd(Predicate, TmpPredicate); + // Push the base addresses of memory references. + for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { + ScopStmt *Stmt = *SI; + for (SmallVector::iterator I = Stmt->memacc_begin(), + E = Stmt->memacc_end(); I != E; ++I) { + Value *BaseAddr = const_cast((*I)->getBaseAddr()); + OMPDataVals.insert((BaseAddr)); } + } + + return OMPDataVals; +} - Builder.CreateCondBr(Predicate, ThenBB, MergeBB); - Builder.SetInsertPoint(ThenBB); +void ClastStmtCodeGen::extractValuesFromOpenMPStruct(CharMapT *clastVarsOMP, + SetVector OMPDataVals, Value *userContext) { + // Extract the clast variables. + unsigned i = 0; + for (CharMapT::iterator I = clastVars->begin(), E = clastVars->end(); + I != E; I++) { + Value *loadAddr = Builder.CreateStructGEP(userContext, i); + (*clastVarsOMP)[I->first] = Builder.CreateLoad(loadAddr); + i++; + } + + // Extract the base addresses of memory references. + for (unsigned j = i; j < OMPDataVals.size(); j++) { + Value *loadAddr = Builder.CreateStructGEP(userContext, j); + Value *baseAddr = OMPDataVals[j]; + ValueMap[baseAddr] = Builder.CreateLoad(loadAddr); + } +} - codegen(g->then); +void ClastStmtCodeGen::addOpenMPSubfunctionBody(Function *FN, + const clast_for *f, + Value *structData, + SetVector OMPDataVals) { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + LLVMContext &Context = FN->getContext(); + IntegerType *intPtrTy = TD->getIntPtrType(Context); + + // Store the previous basic block. + BasicBlock *PrevBB = Builder.GetInsertBlock(); + + // Create basic blocks. + BasicBlock *HeaderBB = BasicBlock::Create(Context, "omp.setup", FN); + BasicBlock *ExitBB = BasicBlock::Create(Context, "omp.exit", FN); + BasicBlock *checkNextBB = BasicBlock::Create(Context, "omp.checkNext", FN); + BasicBlock *loadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", + FN); + + DT->addNewBlock(HeaderBB, PrevBB); + DT->addNewBlock(ExitBB, HeaderBB); + DT->addNewBlock(checkNextBB, HeaderBB); + DT->addNewBlock(loadIVBoundsBB, HeaderBB); + + // Fill up basic block HeaderBB. + Builder.SetInsertPoint(HeaderBB); + Value *lowerBoundPtr = Builder.CreateAlloca(intPtrTy, 0, + "omp.lowerBoundPtr"); + Value *upperBoundPtr = Builder.CreateAlloca(intPtrTy, 0, + "omp.upperBoundPtr"); + Value *userContext = Builder.CreateBitCast(FN->arg_begin(), + structData->getType(), + "omp.userContext"); + + CharMapT clastVarsOMP; + extractValuesFromOpenMPStruct(&clastVarsOMP, OMPDataVals, userContext); + + Builder.CreateBr(checkNextBB); + + // Add code to check if another set of iterations will be executed. + Builder.SetInsertPoint(checkNextBB); + Function *runtimeNextFunction = M->getFunction("GOMP_loop_runtime_next"); + Value *ret1 = Builder.CreateCall2(runtimeNextFunction, + lowerBoundPtr, upperBoundPtr); + Value *hasNextSchedule = Builder.CreateTrunc(ret1, Builder.getInt1Ty(), + "omp.hasNextScheduleBlock"); + Builder.CreateCondBr(hasNextSchedule, loadIVBoundsBB, ExitBB); + + // Add code to to load the iv bounds for this set of iterations. + Builder.SetInsertPoint(loadIVBoundsBB); + Value *lowerBound = Builder.CreateLoad(lowerBoundPtr, "omp.lowerBound"); + Value *upperBound = Builder.CreateLoad(upperBoundPtr, "omp.upperBound"); + + // Subtract one as the upper bound provided by openmp is a < comparison + // whereas the codegenForSequential function creates a <= comparison. + upperBound = Builder.CreateSub(upperBound, ConstantInt::get(intPtrTy, 1), + "omp.upperBoundAdjusted"); + + // Use clastVarsOMP during code generation of the OpenMP subfunction. + CharMapT *oldClastVars = clastVars; + clastVars = &clastVarsOMP; + ExpGen.setIVS(&clastVarsOMP); + + codegenForSequential(f, lowerBound, upperBound); + + // Restore the old clastVars. + clastVars = oldClastVars; + ExpGen.setIVS(oldClastVars); + + Builder.CreateBr(checkNextBB); + + // Add code to terminate this openmp subfunction. + Builder.SetInsertPoint(ExitBB); + Function *endnowaitFunction = M->getFunction("GOMP_loop_end_nowait"); + Builder.CreateCall(endnowaitFunction); + Builder.CreateRetVoid(); + + // Restore the builder back to previous basic block. + Builder.SetInsertPoint(PrevBB); +} + +void ClastStmtCodeGen::codegenForOpenMP(const clast_for *f) { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + IntegerType *intPtrTy = TD->getIntPtrType(Builder.getContext()); + + Function *SubFunction = addOpenMPSubfunction(M); + SetVector OMPDataVals = createOpenMPStructValues(); + Value *structData = addValuesToOpenMPStruct(OMPDataVals, SubFunction); + + addOpenMPSubfunctionBody(SubFunction, f, structData, OMPDataVals); + + // Create call for GOMP_parallel_loop_runtime_start. + Value *subfunctionParam = Builder.CreateBitCast(structData, + Builder.getInt8PtrTy(), + "omp_data"); + + Value *numberOfThreads = Builder.getInt32(0); + Value *lowerBound = ExpGen.codegen(f->LB, intPtrTy); + Value *upperBound = ExpGen.codegen(f->UB, intPtrTy); + + // Add one as the upper bound provided by openmp is a < comparison + // whereas the codegenForSequential function creates a <= comparison. + upperBound = Builder.CreateAdd(upperBound, ConstantInt::get(intPtrTy, 1)); + APInt APStride = APInt_from_MPZ(f->stride); + Value *stride = ConstantInt::get(intPtrTy, + APStride.zext(intPtrTy->getBitWidth())); + + SmallVector Arguments; + Arguments.push_back(SubFunction); + Arguments.push_back(subfunctionParam); + Arguments.push_back(numberOfThreads); + Arguments.push_back(lowerBound); + Arguments.push_back(upperBound); + Arguments.push_back(stride); + + Function *parallelStartFunction = + M->getFunction("GOMP_parallel_loop_runtime_start"); + Builder.CreateCall(parallelStartFunction, Arguments); + + // Create call to the subfunction. + Builder.CreateCall(SubFunction, subfunctionParam); + + // Create call for GOMP_parallel_end. + Function *FN = M->getFunction("GOMP_parallel_end"); + Builder.CreateCall(FN); +} - Builder.CreateBr(MergeBB); - Builder.SetInsertPoint(MergeBB); +bool ClastStmtCodeGen::isInnermostLoop(const clast_for *f) { + const clast_stmt *stmt = f->body; + + while (stmt) { + if (!CLAST_STMT_IS_A(stmt, stmt_user)) + return false; + + stmt = stmt->next; } - void codegen(const clast_stmt *stmt) { - if (CLAST_STMT_IS_A(stmt, stmt_root)) - assert(false && "No second root statement expected"); - else if (CLAST_STMT_IS_A(stmt, stmt_ass)) - codegen((const clast_assignment *)stmt); - else if (CLAST_STMT_IS_A(stmt, stmt_user)) - codegen((const clast_user_stmt *)stmt); - else if (CLAST_STMT_IS_A(stmt, stmt_block)) - codegen((const clast_block *)stmt); - else if (CLAST_STMT_IS_A(stmt, stmt_for)) - codegen((const clast_for *)stmt); - else if (CLAST_STMT_IS_A(stmt, stmt_guard)) - codegen((const clast_guard *)stmt); - - if (stmt->next) - codegen(stmt->next); - } - - void addParameters(const CloogNames *names) { - SCEVExpander Rewriter(SE, "polly"); - - // Create an instruction that specifies the location where the parameters - // are expanded. - CastInst::CreateIntegerCast(ConstantInt::getTrue(Builder.getContext()), - Builder.getInt16Ty(), false, "insertInst", - Builder.GetInsertBlock()); + return true; +} - int i = 0; - for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end(); - PI != PE; ++PI) { - assert(i < names->nb_parameters && "Not enough parameter names"); - - const SCEV *Param = *PI; - Type *Ty = Param->getType(); - - Instruction *insertLocation = --(Builder.GetInsertBlock()->end()); - Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation); - (*clastVars)[names->parameters[i]] = V; +int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) { + isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain)); + isl_set *tmp = isl_set_copy(loopDomain); + + // Calculate a map similar to the identity map, but with the last input + // and output dimension not related. + // [i0, i1, i2, i3] -> [i0, i1, i2, o0] + isl_space *Space = isl_set_get_space(loopDomain); + Space = isl_space_drop_outputs(Space, + isl_set_dim(loopDomain, isl_dim_set) - 2, 1); + Space = isl_space_map_from_set(Space); + isl_map *identity = isl_map_identity(Space); + identity = isl_map_add_dims(identity, isl_dim_in, 1); + identity = isl_map_add_dims(identity, isl_dim_out, 1); + + isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain); + map = isl_map_intersect(map, identity); + + isl_map *lexmax = isl_map_lexmax(isl_map_copy(map)); + isl_map *lexmin = isl_map_lexmin(map); + isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin)); + + isl_set *elements = isl_map_range(sub); + + if (!isl_set_is_singleton(elements)) { + isl_set_free(elements); + return -1; + } + + isl_point *p = isl_set_sample_point(elements); + + isl_int v; + isl_int_init(v); + isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v); + int numberIterations = isl_int_get_si(v); + isl_int_clear(v); + isl_point_free(p); - ++i; - } + return (numberIterations) / isl_int_get_si(f->stride) + 1; +} + +void ClastStmtCodeGen::codegenForVector(const clast_for *f) { + DEBUG(dbgs() << "Vectorizing loop '" << f->iterator << "'\n";); + int vectorWidth = getNumberOfIterations(f); + + Value *LB = ExpGen.codegen(f->LB, + TD->getIntPtrType(Builder.getContext())); + + APInt Stride = APInt_from_MPZ(f->stride); + IntegerType *LoopIVType = dyn_cast(LB->getType()); + Stride = Stride.zext(LoopIVType->getBitWidth()); + Value *StrideValue = ConstantInt::get(LoopIVType, Stride); + + std::vector IVS(vectorWidth); + IVS[0] = LB; + + for (int i = 1; i < vectorWidth; i++) + IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); + + isl_set *scatteringDomain = + isl_set_copy(isl_set_from_cloog_domain(f->domain)); + + // Add loop iv to symbols. + (*clastVars)[f->iterator] = LB; + + const clast_stmt *stmt = f->body; + + while (stmt) { + codegen((const clast_user_stmt *)stmt, &IVS, f->iterator, + scatteringDomain); + stmt = stmt->next; } - public: - void codegen(const clast_root *r) { - clastVars = new CharMapT(); - addParameters(r->names); - ExpGen.setIVS(clastVars); + // Loop is finished, so remove its iv from the live symbols. + isl_set_free(scatteringDomain); + clastVars->erase(f->iterator); +} +void ClastStmtCodeGen::codegen(const clast_for *f) { + if (Vector && isInnermostLoop(f) && DP->isParallelFor(f) + && (-1 != getNumberOfIterations(f)) + && (getNumberOfIterations(f) <= 16)) { + codegenForVector(f); + } else if (OpenMP && !parallelCodeGeneration && DP->isParallelFor(f)) { + parallelCodeGeneration = true; + parallelLoops.push_back(f->iterator); + codegenForOpenMP(f); parallelCodeGeneration = false; + } else + codegenForSequential(f); +} + +Value *ClastStmtCodeGen::codegen(const clast_equation *eq) { + Value *LHS = ExpGen.codegen(eq->LHS, + TD->getIntPtrType(Builder.getContext())); + Value *RHS = ExpGen.codegen(eq->RHS, + TD->getIntPtrType(Builder.getContext())); + CmpInst::Predicate P; + + if (eq->sign == 0) + P = ICmpInst::ICMP_EQ; + else if (eq->sign > 0) + P = ICmpInst::ICMP_SGE; + else + P = ICmpInst::ICMP_SLE; + + return Builder.CreateICmp(P, LHS, RHS); +} - const clast_stmt *stmt = (const clast_stmt*) r; - if (stmt->next) - codegen(stmt->next); +void ClastStmtCodeGen::codegen(const clast_guard *g) { + Function *F = Builder.GetInsertBlock()->getParent(); + LLVMContext &Context = F->getContext(); + BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); + BasicBlock *MergeBB = BasicBlock::Create(Context, "polly.merge", F); + DT->addNewBlock(ThenBB, Builder.GetInsertBlock()); + DT->addNewBlock(MergeBB, Builder.GetInsertBlock()); - delete clastVars; + Value *Predicate = codegen(&(g->eq[0])); + + for (int i = 1; i < g->n; ++i) { + Value *TmpPredicate = codegen(&(g->eq[i])); + Predicate = Builder.CreateAnd(Predicate, TmpPredicate); } - ClastStmtCodeGen(Scop *scop, ScalarEvolution &se, DominatorTree *dt, - ScopDetection *sd, Dependences *dp, TargetData *td, - IRBuilder<> &B) : - S(scop), SE(se), DT(dt), SD(sd), DP(dp), TD(td), Builder(B), - ExpGen(Builder, NULL) {} + Builder.CreateCondBr(Predicate, ThenBB, MergeBB); + Builder.SetInsertPoint(ThenBB); -}; + codegen(g->then); + + Builder.CreateBr(MergeBB); + Builder.SetInsertPoint(MergeBB); } +void ClastStmtCodeGen::codegen(const clast_stmt *stmt) { + if (CLAST_STMT_IS_A(stmt, stmt_root)) + assert(false && "No second root statement expected"); + else if (CLAST_STMT_IS_A(stmt, stmt_ass)) + codegen((const clast_assignment *)stmt); + else if (CLAST_STMT_IS_A(stmt, stmt_user)) + codegen((const clast_user_stmt *)stmt); + else if (CLAST_STMT_IS_A(stmt, stmt_block)) + codegen((const clast_block *)stmt); + else if (CLAST_STMT_IS_A(stmt, stmt_for)) + codegen((const clast_for *)stmt); + else if (CLAST_STMT_IS_A(stmt, stmt_guard)) + codegen((const clast_guard *)stmt); + + if (stmt->next) + codegen(stmt->next); +} + +void ClastStmtCodeGen::addParameters(const CloogNames *names) { + SCEVExpander Rewriter(SE, "polly"); + + // Create an instruction that specifies the location where the parameters + // are expanded. + CastInst::CreateIntegerCast(ConstantInt::getTrue(Builder.getContext()), + Builder.getInt16Ty(), false, "insertInst", + Builder.GetInsertBlock()); + + int i = 0; + for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end(); + PI != PE; ++PI) { + assert(i < names->nb_parameters && "Not enough parameter names"); + + const SCEV *Param = *PI; + Type *Ty = Param->getType(); + + Instruction *insertLocation = --(Builder.GetInsertBlock()->end()); + Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation); + (*clastVars)[names->parameters[i]] = V; + + ++i; + } +} + +void ClastStmtCodeGen::codegen(const clast_root *r) { + clastVars = new CharMapT(); + addParameters(r->names); + ExpGen.setIVS(clastVars); + + parallelCodeGeneration = false; + + const clast_stmt *stmt = (const clast_stmt*) r; + if (stmt->next) + codegen(stmt->next); + + delete clastVars; +} + +ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, ScalarEvolution &se, + DominatorTree *dt, ScopDetection *sd, + Dependences *dp, TargetData *td, + IRBuilder<> &B) : + S(scop), SE(se), DT(dt), SD(sd), DP(dp), TD(td), Builder(B), + ExpGen(Builder, NULL) {} + namespace { class CodeGeneration : public ScopPass { Region *region; From grosbach at apple.com Tue Jan 24 11:23:29 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 17:23:29 -0000 Subject: [llvm-commits] [llvm] r148815 - in /llvm/trunk/lib/Target/ARM: ARMInstrFormats.td ARMInstrNEON.td Message-ID: <20120124172329.CF8D12A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 11:23:29 2012 New Revision: 148815 URL: http://llvm.org/viewvc/llvm-project?rev=148815&view=rev Log: NEON add correct predicates for some asm aliases. Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=148815&r1=148814&r2=148815&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Tue Jan 24 11:23:29 2012 @@ -2022,6 +2022,15 @@ def : VFPDataTypeInstAlias; } +multiclass NEONDTAnyInstAlias { + let Predicates = [HasNEON] in { + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; +} +} + // The same alias classes using AsmPseudo instead, for the more complex // stuff in NEON that InstAlias can't quite handle. // Note that we can't use anonymous defm references here like we can Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148815&r1=148814&r2=148815&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 11:23:29 2012 @@ -5605,21 +5605,21 @@ (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; // ... two-operand aliases def : NEONInstAlias<"vand${p} $Vdn, $Vm", @@ -5639,17 +5639,17 @@ def : NEONInstAlias<"vorr${p} $Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; // VMUL two-operand aliases. @@ -6325,9 +6325,9 @@ rGPR:$Rm, pred:$p)>; // VMOV takes an optional datatype suffix -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. @@ -6495,9 +6495,9 @@ (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; // VSWP allows, but does not require, a type suffix. -defm : VFPDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; // "vmov Rd, #-imm" can be handled via "vmvn". From chandlerc at gmail.com Tue Jan 24 11:41:45 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Tue, 24 Jan 2012 09:41:45 -0800 Subject: [llvm-commits] PATCH: Add support for tracking and exposing the host triple In-Reply-To: <4f1ec506.8125ec0a.5f5a.3d52SMTPIN_ADDED@mx.google.com> References: <4f1ec506.8125ec0a.5f5a.3d52SMTPIN_ADDED@mx.google.com> Message-ID: On Tue, Jan 24, 2012 at 6:46 AM, James Molloy wrote: > Hi Chandler,**** > > ** ** > > The patch looks good to me, although I?m wondering why Clang needs to care > what it was **built** to run on, as opposed to what it is actually > running on at runtime.**** > > ** ** > > For example, I compile on RHE3 for RHE3, then run on RHE5. Clang should > surely understand that it is hosted on RHE5 and act accordingly. So why is > this information needed as a build time constant? > This was actually my biggest hesitation as well. It turns out that the only parts of the host triple we could actually use profitably are those which don't vary this much. We're mostly interested in linux vs. darwin vs. windows, and the architecture. However, I've thought more about this overnight, and I wonder if I can deduce more of what I need from the target itself. Essentially, whenever we are targeting an architecture which might be provided by a '-m32' or '-m64' flag to another target's toolchain, we need to try both triples to locate the toolchain. This is true even if neither is the host, so my original plan doesn't actually work. =/ I'll just commit the cleanup parts of this w/o extending anything. We may someday want to at least know the host OS, but that day isn't today. PS: The case I like to use as a thought exercise for the design here is an arm-hosted linux box with cross-toolchains for x86 and ppc64, but with the x86 cross toolchain capable of targeting x86-64 via '-m64', and the ppc64 cross toolchain being capable of targeting ppc32 with '-m32'. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/6beff6a4/attachment.html From chandlerc at google.com Tue Jan 24 11:44:23 2012 From: chandlerc at google.com (Chandler Carruth) Date: Tue, 24 Jan 2012 09:44:23 -0800 Subject: [llvm-commits] PATCH: Add support for tracking and exposing the host triple In-Reply-To: <20120124162341.GA12631@britannica.bec.de> References: <20120124162341.GA12631@britannica.bec.de> Message-ID: On Tue, Jan 24, 2012 at 8:23 AM, Joerg Sonnenberger wrote: > I disagree with this. I don't the host OS is involved in any of the > decisions. Let me summarize how NetBSD handles this case, I think the > same applies to other platforms and can be moved into the upper layers. > Not really... I would look at the other platforms to understand why.... > When clang is invoked, it derives the base target either from program > name, command line or configuration. During the NetBSD build, it is > always the program name (e.g. x86_64--netbsd-clang). This base target is > memorised and used for finding the tool chain, so that e.g. > x86_64--netbsd-ld is used as linker. If the driver finds -m32 on the > command line, three things change. The triple for the compiler invocation > is changed to i386--netbsd. This changes the code generation only. > Second, the library search path is changed from =/usr/lib to > =/usr/lib/i386. > That's where crt0.o and friends can be found. This differs from a > "native" i386--netbsd compiler, since that expects the path correctly in > =/usr/lib to match the native environment. Third, -m elf_i386 is passed > to ld. > There are lots of other issues on non-netbsd systems such as where the GCC toolchain is installed. For some Linux distros there is a nice predictable location, but for others there isn't. For still others, there is a nice predictable location, but it isn't as strictly related to the target as I would like. Anyways, see my response to James... this isn't enough even if I wanted it to be, so I'm going to try a different approach anyways. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/84a82876/attachment.html From grosbach at apple.com Tue Jan 24 11:46:54 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 17:46:54 -0000 Subject: [llvm-commits] [llvm] r148817 - /llvm/trunk/test/MC/ARM/neon-shift-encoding.s Message-ID: <20120124174654.E79522A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 11:46:54 2012 New Revision: 148817 URL: http://llvm.org/viewvc/llvm-project?rev=148817&view=rev Log: Tidy up. Modified: llvm/trunk/test/MC/ARM/neon-shift-encoding.s Modified: llvm/trunk/test/MC/ARM/neon-shift-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-shift-encoding.s?rev=148817&r1=148816&r2=148817&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-shift-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-shift-encoding.s Tue Jan 24 11:46:54 2012 @@ -105,70 +105,79 @@ @ CHECK: vshr.s32 q8, q8, #31 @ encoding: [0x70,0x00,0xe1,0xf2] @ CHECK: vshr.s64 q8, q8, #63 @ encoding: [0xf0,0x00,0xc1,0xf2] -@ CHECK: vsra.u8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf3] vsra.u8 d16, d16, #7 -@ CHECK: vsra.u16 d16, d16, #15 @ encoding: [0x30,0x01,0xd1,0xf3] vsra.u16 d16, d16, #15 -@ CHECK: vsra.u32 d16, d16, #31 @ encoding: [0x30,0x01,0xe1,0xf3] vsra.u32 d16, d16, #31 -@ CHECK: vsra.u64 d16, d16, #63 @ encoding: [0xb0,0x01,0xc1,0xf3] vsra.u64 d16, d16, #63 -@ CHECK: vsra.u8 q8, q8, #7 @ encoding: [0x70,0x01,0xc9,0xf3] vsra.u8 q8, q8, #7 -@ CHECK: vsra.u16 q8, q8, #15 @ encoding: [0x70,0x01,0xd1,0xf3] vsra.u16 q8, q8, #15 -@ CHECK: vsra.u32 q8, q8, #31 @ encoding: [0x70,0x01,0xe1,0xf3] vsra.u32 q8, q8, #31 -@ CHECK: vsra.u64 q8, q8, #63 @ encoding: [0xf0,0x01,0xc1,0xf3] vsra.u64 q8, q8, #63 -@ CHECK: vsra.s8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf2] vsra.s8 d16, d16, #7 -@ CHECK: vsra.s16 d16, d16, #15 @ encoding: [0x30,0x01,0xd1,0xf2] vsra.s16 d16, d16, #15 -@ CHECK: vsra.s32 d16, d16, #31 @ encoding: [0x30,0x01,0xe1,0xf2] vsra.s32 d16, d16, #31 -@ CHECK: vsra.s64 d16, d16, #63 @ encoding: [0xb0,0x01,0xc1,0xf2] vsra.s64 d16, d16, #63 -@ CHECK: vsra.s8 q8, q8, #7 @ encoding: [0x70,0x01,0xc9,0xf2] vsra.s8 q8, q8, #7 -@ CHECK: vsra.s16 q8, q8, #15 @ encoding: [0x70,0x01,0xd1,0xf2] vsra.s16 q8, q8, #15 -@ CHECK: vsra.s32 q8, q8, #31 @ encoding: [0x70,0x01,0xe1,0xf2] vsra.s32 q8, q8, #31 -@ CHECK: vsra.s64 q8, q8, #63 @ encoding: [0xf0,0x01,0xc1,0xf2] vsra.s64 q8, q8, #63 -@ CHECK: vsri.8 d16, d16, #7 @ encoding: [0x30,0x04,0xc9,0xf3] + +@ CHECK: vsra.u8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf3] +@ CHECK: vsra.u16 d16, d16, #15 @ encoding: [0x30,0x01,0xd1,0xf3] +@ CHECK: vsra.u32 d16, d16, #31 @ encoding: [0x30,0x01,0xe1,0xf3] +@ CHECK: vsra.u64 d16, d16, #63 @ encoding: [0xb0,0x01,0xc1,0xf3] +@ CHECK: vsra.u8 q8, q8, #7 @ encoding: [0x70,0x01,0xc9,0xf3] +@ CHECK: vsra.u16 q8, q8, #15 @ encoding: [0x70,0x01,0xd1,0xf3] +@ CHECK: vsra.u32 q8, q8, #31 @ encoding: [0x70,0x01,0xe1,0xf3] +@ CHECK: vsra.u64 q8, q8, #63 @ encoding: [0xf0,0x01,0xc1,0xf3] +@ CHECK: vsra.s8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf2] +@ CHECK: vsra.s16 d16, d16, #15 @ encoding: [0x30,0x01,0xd1,0xf2] +@ CHECK: vsra.s32 d16, d16, #31 @ encoding: [0x30,0x01,0xe1,0xf2] +@ CHECK: vsra.s64 d16, d16, #63 @ encoding: [0xb0,0x01,0xc1,0xf2] +@ CHECK: vsra.s8 q8, q8, #7 @ encoding: [0x70,0x01,0xc9,0xf2] +@ CHECK: vsra.s16 q8, q8, #15 @ encoding: [0x70,0x01,0xd1,0xf2] +@ CHECK: vsra.s32 q8, q8, #31 @ encoding: [0x70,0x01,0xe1,0xf2] +@ CHECK: vsra.s64 q8, q8, #63 @ encoding: [0xf0,0x01,0xc1,0xf2] + + vsri.8 d16, d16, #7 -@ CHECK: vsri.16 d16, d16, #15 @ encoding: [0x30,0x04,0xd1,0xf3] vsri.16 d16, d16, #15 -@ CHECK: vsri.32 d16, d16, #31 @ encoding: [0x30,0x04,0xe1,0xf3] vsri.32 d16, d16, #31 -@ CHECK: vsri.64 d16, d16, #63 @ encoding: [0xb0,0x04,0xc1,0xf3] vsri.64 d16, d16, #63 -@ CHECK: vsri.8 q8, q8, #7 @ encoding: [0x70,0x04,0xc9,0xf3] vsri.8 q8, q8, #7 -@ CHECK: vsri.16 q8, q8, #15 @ encoding: [0x70,0x04,0xd1,0xf3] vsri.16 q8, q8, #15 -@ CHECK: vsri.32 q8, q8, #31 @ encoding: [0x70,0x04,0xe1,0xf3] vsri.32 q8, q8, #31 -@ CHECK: vsri.64 q8, q8, #63 @ encoding: [0xf0,0x04,0xc1,0xf3] vsri.64 q8, q8, #63 -@ CHECK: vsli.8 d16, d16, #7 @ encoding: [0x30,0x05,0xcf,0xf3] + +@ CHECK: vsri.8 d16, d16, #7 @ encoding: [0x30,0x04,0xc9,0xf3] +@ CHECK: vsri.16 d16, d16, #15 @ encoding: [0x30,0x04,0xd1,0xf3] +@ CHECK: vsri.32 d16, d16, #31 @ encoding: [0x30,0x04,0xe1,0xf3] +@ CHECK: vsri.64 d16, d16, #63 @ encoding: [0xb0,0x04,0xc1,0xf3] +@ CHECK: vsri.8 q8, q8, #7 @ encoding: [0x70,0x04,0xc9,0xf3] +@ CHECK: vsri.16 q8, q8, #15 @ encoding: [0x70,0x04,0xd1,0xf3] +@ CHECK: vsri.32 q8, q8, #31 @ encoding: [0x70,0x04,0xe1,0xf3] +@ CHECK: vsri.64 q8, q8, #63 @ encoding: [0xf0,0x04,0xc1,0xf3] + + vsli.8 d16, d16, #7 -@ CHECK: vsli.16 d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf3] vsli.16 d16, d16, #15 -@ CHECK: vsli.32 d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf3] vsli.32 d16, d16, #31 -@ CHECK: vsli.64 d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf3] vsli.64 d16, d16, #63 -@ CHECK: vsli.8 q8, q8, #7 @ encoding: [0x70,0x05,0xcf,0xf3] vsli.8 q8, q8, #7 -@ CHECK: vsli.16 q8, q8, #15 @ encoding: [0x70,0x05,0xdf,0xf3] vsli.16 q8, q8, #15 -@ CHECK: vsli.32 q8, q8, #31 @ encoding: [0x70,0x05,0xff,0xf3] vsli.32 q8, q8, #31 -@ CHECK: vsli.64 q8, q8, #63 @ encoding: [0xf0,0x05,0xff,0xf3] vsli.64 q8, q8, #63 + +@ CHECK: vsli.8 d16, d16, #7 @ encoding: [0x30,0x05,0xcf,0xf3] +@ CHECK: vsli.16 d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf3] +@ CHECK: vsli.32 d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf3] +@ CHECK: vsli.64 d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf3] +@ CHECK: vsli.8 q8, q8, #7 @ encoding: [0x70,0x05,0xcf,0xf3] +@ CHECK: vsli.16 q8, q8, #15 @ encoding: [0x70,0x05,0xdf,0xf3] +@ CHECK: vsli.32 q8, q8, #31 @ encoding: [0x70,0x05,0xff,0xf3] +@ CHECK: vsli.64 q8, q8, #63 @ encoding: [0xf0,0x05,0xff,0xf3] + + @ CHECK: vshll.s8 q8, d16, #7 @ encoding: [0x30,0x0a,0xcf,0xf2] vshll.s8 q8, d16, #7 @ CHECK: vshll.s16 q8, d16, #15 @ encoding: [0x30,0x0a,0xdf,0xf2] From grosbach at apple.com Tue Jan 24 11:46:58 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 17:46:58 -0000 Subject: [llvm-commits] [llvm] r148818 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td test/MC/ARM/neon-shift-encoding.s Message-ID: <20120124174658.92AB62A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 11:46:58 2012 New Revision: 148818 URL: http://llvm.org/viewvc/llvm-project?rev=148818&view=rev Log: NEON Two-operand assembly aliases for VSRI. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/test/MC/ARM/neon-shift-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148818&r1=148817&r2=148818&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 11:46:58 2012 @@ -6494,6 +6494,25 @@ def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +// Two-operand variants for VSRI. +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; Modified: llvm/trunk/test/MC/ARM/neon-shift-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-shift-encoding.s?rev=148818&r1=148817&r2=148818&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-shift-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-shift-encoding.s Tue Jan 24 11:46:58 2012 @@ -140,23 +140,40 @@ @ CHECK: vsra.s64 q8, q8, #63 @ encoding: [0xf0,0x01,0xc1,0xf2] - vsri.8 d16, d16, #7 - vsri.16 d16, d16, #15 - vsri.32 d16, d16, #31 - vsri.64 d16, d16, #63 - vsri.8 q8, q8, #7 - vsri.16 q8, q8, #15 - vsri.32 q8, q8, #31 - vsri.64 q8, q8, #63 - -@ CHECK: vsri.8 d16, d16, #7 @ encoding: [0x30,0x04,0xc9,0xf3] -@ CHECK: vsri.16 d16, d16, #15 @ encoding: [0x30,0x04,0xd1,0xf3] -@ CHECK: vsri.32 d16, d16, #31 @ encoding: [0x30,0x04,0xe1,0xf3] -@ CHECK: vsri.64 d16, d16, #63 @ encoding: [0xb0,0x04,0xc1,0xf3] -@ CHECK: vsri.8 q8, q8, #7 @ encoding: [0x70,0x04,0xc9,0xf3] -@ CHECK: vsri.16 q8, q8, #15 @ encoding: [0x70,0x04,0xd1,0xf3] -@ CHECK: vsri.32 q8, q8, #31 @ encoding: [0x70,0x04,0xe1,0xf3] -@ CHECK: vsri.64 q8, q8, #63 @ encoding: [0xf0,0x04,0xc1,0xf3] + vsri.8 d16, d6, #7 + vsri.16 d26, d18, #15 + vsri.32 d11, d10, #31 + vsri.64 d12, d19, #63 + vsri.8 q1, q8, #7 + vsri.16 q2, q7, #15 + vsri.32 q3, q6, #31 + vsri.64 q4, q5, #63 + + vsri.8 d16, #7 + vsri.16 d15, #15 + vsri.32 d14, #31 + vsri.64 d13, #63 + vsri.8 q4, #7 + vsri.16 q5, #15 + vsri.32 q6, #31 + vsri.64 q7, #63 + +@ CHECK: vsri.8 d16, d6, #7 @ encoding: [0x16,0x04,0xc9,0xf3] +@ CHECK: vsri.16 d26, d18, #15 @ encoding: [0x32,0xa4,0xd1,0xf3] +@ CHECK: vsri.32 d11, d10, #31 @ encoding: [0x1a,0xb4,0xa1,0xf3] +@ CHECK: vsri.64 d12, d19, #63 @ encoding: [0xb3,0xc4,0x81,0xf3] +@ CHECK: vsri.8 q1, q8, #7 @ encoding: [0x70,0x24,0x89,0xf3] +@ CHECK: vsri.16 q2, q7, #15 @ encoding: [0x5e,0x44,0x91,0xf3] +@ CHECK: vsri.32 q3, q6, #31 @ encoding: [0x5c,0x64,0xa1,0xf3] +@ CHECK: vsri.64 q4, q5, #63 @ encoding: [0xda,0x84,0x81,0xf3] +@ CHECK: vsri.8 d16, d16, #7 @ encoding: [0x30,0x04,0xc9,0xf3] +@ CHECK: vsri.16 d15, d15, #15 @ encoding: [0x1f,0xf4,0x91,0xf3] +@ CHECK: vsri.32 d14, d14, #31 @ encoding: [0x1e,0xe4,0xa1,0xf3] +@ CHECK: vsri.64 d13, d13, #63 @ encoding: [0x9d,0xd4,0x81,0xf3] +@ CHECK: vsri.8 q4, q4, #7 @ encoding: [0x58,0x84,0x89,0xf3] +@ CHECK: vsri.16 q5, q5, #15 @ encoding: [0x5a,0xa4,0x91,0xf3] +@ CHECK: vsri.32 q6, q6, #31 @ encoding: [0x5c,0xc4,0xa1,0xf3] +@ CHECK: vsri.64 q7, q7, #63 @ encoding: [0xde,0xe4,0x81,0xf3] vsli.8 d16, d16, #7 From grosbach at apple.com Tue Jan 24 11:49:15 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 17:49:15 -0000 Subject: [llvm-commits] [llvm] r148819 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td test/MC/ARM/neon-shift-encoding.s Message-ID: <20120124174915.7BF072A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 11:49:15 2012 New Revision: 148819 URL: http://llvm.org/viewvc/llvm-project?rev=148819&view=rev Log: NEON Two-operand assembly aliases for VSLI. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/test/MC/ARM/neon-shift-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148819&r1=148818&r2=148819&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 11:49:15 2012 @@ -6513,6 +6513,25 @@ def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; +// Two-operand variants for VSLI. +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; Modified: llvm/trunk/test/MC/ARM/neon-shift-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-shift-encoding.s?rev=148819&r1=148818&r2=148819&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-shift-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-shift-encoding.s Tue Jan 24 11:49:15 2012 @@ -176,23 +176,40 @@ @ CHECK: vsri.64 q7, q7, #63 @ encoding: [0xde,0xe4,0x81,0xf3] - vsli.8 d16, d16, #7 - vsli.16 d16, d16, #15 - vsli.32 d16, d16, #31 - vsli.64 d16, d16, #63 - vsli.8 q8, q8, #7 - vsli.16 q8, q8, #15 - vsli.32 q8, q8, #31 - vsli.64 q8, q8, #63 - -@ CHECK: vsli.8 d16, d16, #7 @ encoding: [0x30,0x05,0xcf,0xf3] -@ CHECK: vsli.16 d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf3] -@ CHECK: vsli.32 d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf3] -@ CHECK: vsli.64 d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf3] -@ CHECK: vsli.8 q8, q8, #7 @ encoding: [0x70,0x05,0xcf,0xf3] -@ CHECK: vsli.16 q8, q8, #15 @ encoding: [0x70,0x05,0xdf,0xf3] -@ CHECK: vsli.32 q8, q8, #31 @ encoding: [0x70,0x05,0xff,0xf3] -@ CHECK: vsli.64 q8, q8, #63 @ encoding: [0xf0,0x05,0xff,0xf3] + vsli.8 d16, d6, #7 + vsli.16 d26, d18, #15 + vsli.32 d11, d10, #31 + vsli.64 d12, d19, #63 + vsli.8 q1, q8, #7 + vsli.16 q2, q7, #15 + vsli.32 q3, q6, #31 + vsli.64 q4, q5, #63 + + vsli.8 d16, #7 + vsli.16 d15, #15 + vsli.32 d14, #31 + vsli.64 d13, #63 + vsli.8 q4, #7 + vsli.16 q5, #15 + vsli.32 q6, #31 + vsli.64 q7, #63 + +@ CHECK: vsli.8 d16, d6, #7 @ encoding: [0x16,0x05,0xcf,0xf3] +@ CHECK: vsli.16 d26, d18, #15 @ encoding: [0x32,0xa5,0xdf,0xf3] +@ CHECK: vsli.32 d11, d10, #31 @ encoding: [0x1a,0xb5,0xbf,0xf3] +@ CHECK: vsli.64 d12, d19, #63 @ encoding: [0xb3,0xc5,0xbf,0xf3] +@ CHECK: vsli.8 q1, q8, #7 @ encoding: [0x70,0x25,0x8f,0xf3] +@ CHECK: vsli.16 q2, q7, #15 @ encoding: [0x5e,0x45,0x9f,0xf3] +@ CHECK: vsli.32 q3, q6, #31 @ encoding: [0x5c,0x65,0xbf,0xf3] +@ CHECK: vsli.64 q4, q5, #63 @ encoding: [0xda,0x85,0xbf,0xf3] +@ CHECK: vsli.8 d16, d16, #7 @ encoding: [0x30,0x05,0xcf,0xf3] +@ CHECK: vsli.16 d15, d15, #15 @ encoding: [0x1f,0xf5,0x9f,0xf3] +@ CHECK: vsli.32 d14, d14, #31 @ encoding: [0x1e,0xe5,0xbf,0xf3] +@ CHECK: vsli.64 d13, d13, #63 @ encoding: [0x9d,0xd5,0xbf,0xf3] +@ CHECK: vsli.8 q4, q4, #7 @ encoding: [0x58,0x85,0x8f,0xf3] +@ CHECK: vsli.16 q5, q5, #15 @ encoding: [0x5a,0xa5,0x9f,0xf3] +@ CHECK: vsli.32 q6, q6, #31 @ encoding: [0x5c,0xc5,0xbf,0xf3] +@ CHECK: vsli.64 q7, q7, #63 @ encoding: [0xde,0xe5,0xbf,0xf3] @ CHECK: vshll.s8 q8, d16, #7 @ encoding: [0x30,0x0a,0xcf,0xf2] From grosbach at apple.com Tue Jan 24 11:55:33 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 17:55:33 -0000 Subject: [llvm-commits] [llvm] r148820 - /llvm/trunk/test/MC/ARM/neon-shiftaccum-encoding.s Message-ID: <20120124175533.2AD272A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 11:55:32 2012 New Revision: 148820 URL: http://llvm.org/viewvc/llvm-project?rev=148820&view=rev Log: Remove redundant test file. Removed: llvm/trunk/test/MC/ARM/neon-shiftaccum-encoding.s Removed: llvm/trunk/test/MC/ARM/neon-shiftaccum-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-shiftaccum-encoding.s?rev=148819&view=auto ============================================================================== --- llvm/trunk/test/MC/ARM/neon-shiftaccum-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-shiftaccum-encoding.s (removed) @@ -1,98 +0,0 @@ -@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s - -@ CHECK: vsra.s8 d17, d16, #8 @ encoding: [0x30,0x11,0xc8,0xf2] - vsra.s8 d17, d16, #8 -@ CHECK: vsra.s16 d17, d16, #16 @ encoding: [0x30,0x11,0xd0,0xf2] - vsra.s16 d17, d16, #16 -@ CHECK: vsra.s32 d17, d16, #32 @ encoding: [0x30,0x11,0xe0,0xf2] - vsra.s32 d17, d16, #32 -@ CHECK: vsra.s64 d17, d16, #64 @ encoding: [0xb0,0x11,0xc0,0xf2] - vsra.s64 d17, d16, #64 -@ CHECK: vsra.s8 q8, q9, #8 @ encoding: [0x72,0x01,0xc8,0xf2] - vsra.s8 q8, q9, #8 -@ CHECK: vsra.s16 q8, q9, #16 @ encoding: [0x72,0x01,0xd0,0xf2] - vsra.s16 q8, q9, #16 -@ CHECK: vsra.s32 q8, q9, #32 @ encoding: [0x72,0x01,0xe0,0xf2] - vsra.s32 q8, q9, #32 -@ CHECK: vsra.s64 q8, q9, #64 @ encoding: [0xf2,0x01,0xc0,0xf2] - vsra.s64 q8, q9, #64 -@ CHECK: vsra.u8 d17, d16, #8 @ encoding: [0x30,0x11,0xc8,0xf3] - vsra.u8 d17, d16, #8 -@ CHECK: vsra.u16 d17, d16, #16 @ encoding: [0x30,0x11,0xd0,0xf3] - vsra.u16 d17, d16, #16 -@ CHECK: vsra.u32 d17, d16, #32 @ encoding: [0x30,0x11,0xe0,0xf3] - vsra.u32 d17, d16, #32 -@ CHECK: vsra.u64 d17, d16, #64 @ encoding: [0xb0,0x11,0xc0,0xf3] - vsra.u64 d17, d16, #64 -@ CHECK: vsra.u8 q8, q9, #8 @ encoding: [0x72,0x01,0xc8,0xf3] - vsra.u8 q8, q9, #8 -@ CHECK: vsra.u16 q8, q9, #16 @ encoding: [0x72,0x01,0xd0,0xf3] - vsra.u16 q8, q9, #16 -@ CHECK: vsra.u32 q8, q9, #32 @ encoding: [0x72,0x01,0xe0,0xf3] - vsra.u32 q8, q9, #32 -@ CHECK: vsra.u64 q8, q9, #64 @ encoding: [0xf2,0x01,0xc0,0xf3] - vsra.u64 q8, q9, #64 -@ CHECK: vrsra.s8 d17, d16, #8 @ encoding: [0x30,0x13,0xc8,0xf2] - vrsra.s8 d17, d16, #8 -@ CHECK: vrsra.s16 d17, d16, #16 @ encoding: [0x30,0x13,0xd0,0xf2] - vrsra.s16 d17, d16, #16 -@ CHECK: vrsra.s32 d17, d16, #32 @ encoding: [0x30,0x13,0xe0,0xf2] - vrsra.s32 d17, d16, #32 -@ CHECK: vrsra.s64 d17, d16, #64 @ encoding: [0xb0,0x13,0xc0,0xf2] - vrsra.s64 d17, d16, #64 -@ CHECK: vrsra.u8 d17, d16, #8 @ encoding: [0x30,0x13,0xc8,0xf3] - vrsra.u8 d17, d16, #8 -@ CHECK: vrsra.u16 d17, d16, #16 @ encoding: [0x30,0x13,0xd0,0xf3] - vrsra.u16 d17, d16, #16 -@ CHECK: vrsra.u32 d17, d16, #32 @ encoding: [0x30,0x13,0xe0,0xf3] - vrsra.u32 d17, d16, #32 -@ CHECK: vrsra.u64 d17, d16, #64 @ encoding: [0xb0,0x13,0xc0,0xf3] - vrsra.u64 d17, d16, #64 -@ CHECK: vrsra.s8 q8, q9, #8 @ encoding: [0x72,0x03,0xc8,0xf2] - vrsra.s8 q8, q9, #8 -@ CHECK: vrsra.s16 q8, q9, #16 @ encoding: [0x72,0x03,0xd0,0xf2] - vrsra.s16 q8, q9, #16 -@ CHECK: vrsra.s32 q8, q9, #32 @ encoding: [0x72,0x03,0xe0,0xf2] - vrsra.s32 q8, q9, #32 -@ CHECK: vrsra.s64 q8, q9, #64 @ encoding: [0xf2,0x03,0xc0,0xf2] - vrsra.s64 q8, q9, #64 -@ CHECK: vrsra.u8 q8, q9, #8 @ encoding: [0x72,0x03,0xc8,0xf3] - vrsra.u8 q8, q9, #8 -@ CHECK: vrsra.u16 q8, q9, #16 @ encoding: [0x72,0x03,0xd0,0xf3] - vrsra.u16 q8, q9, #16 -@ CHECK: vrsra.u32 q8, q9, #32 @ encoding: [0x72,0x03,0xe0,0xf3] - vrsra.u32 q8, q9, #32 -@ CHECK: vrsra.u64 q8, q9, #64 @ encoding: [0xf2,0x03,0xc0,0xf3] - vrsra.u64 q8, q9, #64 -@ CHECK: vsli.8 d17, d16, #7 @ encoding: [0x30,0x15,0xcf,0xf3] - vsli.8 d17, d16, #7 -@ CHECK: vsli.16 d17, d16, #15 @ encoding: [0x30,0x15,0xdf,0xf3] - vsli.16 d17, d16, #15 -@ CHECK: vsli.32 d17, d16, #31 @ encoding: [0x30,0x15,0xff,0xf3] - vsli.32 d17, d16, #31 -@ CHECK: vsli.64 d17, d16, #63 @ encoding: [0xb0,0x15,0xff,0xf3] - vsli.64 d17, d16, #63 -@ CHECK: vsli.8 q9, q8, #7 @ encoding: [0x70,0x25,0xcf,0xf3] - vsli.8 q9, q8, #7 -@ CHECK: vsli.16 q9, q8, #15 @ encoding: [0x70,0x25,0xdf,0xf3] - vsli.16 q9, q8, #15 -@ CHECK: vsli.32 q9, q8, #31 @ encoding: [0x70,0x25,0xff,0xf3] - vsli.32 q9, q8, #31 -@ CHECK: vsli.64 q9, q8, #63 @ encoding: [0xf0,0x25,0xff,0xf3] - vsli.64 q9, q8, #63 -@ CHECK: vsri.8 d17, d16, #8 @ encoding: [0x30,0x14,0xc8,0xf3] - vsri.8 d17, d16, #8 -@ CHECK: vsri.16 d17, d16, #16 @ encoding: [0x30,0x14,0xd0,0xf3] - vsri.16 d17, d16, #16 -@ CHECK: vsri.32 d17, d16, #32 @ encoding: [0x30,0x14,0xe0,0xf3] - vsri.32 d17, d16, #32 -@ CHECK: vsri.64 d17, d16, #64 @ encoding: [0xb0,0x14,0xc0,0xf3] - vsri.64 d17, d16, #64 -@ CHECK: vsri.8 q9, q8, #8 @ encoding: [0x70,0x24,0xc8,0xf3] - vsri.8 q9, q8, #8 -@ CHECK: vsri.16 q9, q8, #16 @ encoding: [0x70,0x24,0xd0,0xf3] - vsri.16 q9, q8, #16 -@ CHECK: vsri.32 q9, q8, #32 @ encoding: [0x70,0x24,0xe0,0xf3] - vsri.32 q9, q8, #32 -@ CHECK: vsri.64 q9, q8, #64 @ encoding: [0xf0,0x24,0xc0,0xf3] - vsri.64 q9, q8, #64 From grosbach at apple.com Tue Jan 24 11:55:37 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 17:55:37 -0000 Subject: [llvm-commits] [llvm] r148821 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td test/MC/ARM/neon-shift-encoding.s Message-ID: <20120124175537.246502A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 11:55:36 2012 New Revision: 148821 URL: http://llvm.org/viewvc/llvm-project?rev=148821&view=rev Log: NEON Two-operand assembly aliases for VSRA. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/test/MC/ARM/neon-shift-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148821&r1=148820&r2=148821&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 11:55:36 2012 @@ -6494,6 +6494,45 @@ def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +// Two-operand variants for VSRA. + // Signed. +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + // Two-operand variants for VSRI. def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; Modified: llvm/trunk/test/MC/ARM/neon-shift-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-shift-encoding.s?rev=148821&r1=148820&r2=148821&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-shift-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-shift-encoding.s Tue Jan 24 11:55:36 2012 @@ -105,39 +105,77 @@ @ CHECK: vshr.s32 q8, q8, #31 @ encoding: [0x70,0x00,0xe1,0xf2] @ CHECK: vshr.s64 q8, q8, #63 @ encoding: [0xf0,0x00,0xc1,0xf2] - vsra.u8 d16, d16, #7 - vsra.u16 d16, d16, #15 - vsra.u32 d16, d16, #31 - vsra.u64 d16, d16, #63 - vsra.u8 q8, q8, #7 - vsra.u16 q8, q8, #15 - vsra.u32 q8, q8, #31 - vsra.u64 q8, q8, #63 - vsra.s8 d16, d16, #7 - vsra.s16 d16, d16, #15 - vsra.s32 d16, d16, #31 - vsra.s64 d16, d16, #63 - vsra.s8 q8, q8, #7 - vsra.s16 q8, q8, #15 - vsra.s32 q8, q8, #31 - vsra.s64 q8, q8, #63 - -@ CHECK: vsra.u8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf3] -@ CHECK: vsra.u16 d16, d16, #15 @ encoding: [0x30,0x01,0xd1,0xf3] -@ CHECK: vsra.u32 d16, d16, #31 @ encoding: [0x30,0x01,0xe1,0xf3] -@ CHECK: vsra.u64 d16, d16, #63 @ encoding: [0xb0,0x01,0xc1,0xf3] -@ CHECK: vsra.u8 q8, q8, #7 @ encoding: [0x70,0x01,0xc9,0xf3] -@ CHECK: vsra.u16 q8, q8, #15 @ encoding: [0x70,0x01,0xd1,0xf3] -@ CHECK: vsra.u32 q8, q8, #31 @ encoding: [0x70,0x01,0xe1,0xf3] -@ CHECK: vsra.u64 q8, q8, #63 @ encoding: [0xf0,0x01,0xc1,0xf3] -@ CHECK: vsra.s8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf2] -@ CHECK: vsra.s16 d16, d16, #15 @ encoding: [0x30,0x01,0xd1,0xf2] -@ CHECK: vsra.s32 d16, d16, #31 @ encoding: [0x30,0x01,0xe1,0xf2] -@ CHECK: vsra.s64 d16, d16, #63 @ encoding: [0xb0,0x01,0xc1,0xf2] -@ CHECK: vsra.s8 q8, q8, #7 @ encoding: [0x70,0x01,0xc9,0xf2] -@ CHECK: vsra.s16 q8, q8, #15 @ encoding: [0x70,0x01,0xd1,0xf2] -@ CHECK: vsra.s32 q8, q8, #31 @ encoding: [0x70,0x01,0xe1,0xf2] -@ CHECK: vsra.s64 q8, q8, #63 @ encoding: [0xf0,0x01,0xc1,0xf2] + + vsra.s8 d16, d6, #7 + vsra.s16 d26, d18, #15 + vsra.s32 d11, d10, #31 + vsra.s64 d12, d19, #63 + vsra.s8 q1, q8, #7 + vsra.s16 q2, q7, #15 + vsra.s32 q3, q6, #31 + vsra.s64 q4, q5, #63 + + vsra.s8 d16, #7 + vsra.s16 d15, #15 + vsra.s32 d14, #31 + vsra.s64 d13, #63 + vsra.s8 q4, #7 + vsra.s16 q5, #15 + vsra.s32 q6, #31 + vsra.s64 q7, #63 + +@ CHECK: vsra.s8 d16, d6, #7 @ encoding: [0x16,0x01,0xc9,0xf2] +@ CHECK: vsra.s16 d26, d18, #15 @ encoding: [0x32,0xa1,0xd1,0xf2] +@ CHECK: vsra.s32 d11, d10, #31 @ encoding: [0x1a,0xb1,0xa1,0xf2] +@ CHECK: vsra.s64 d12, d19, #63 @ encoding: [0xb3,0xc1,0x81,0xf2] +@ CHECK: vsra.s8 q1, q8, #7 @ encoding: [0x70,0x21,0x89,0xf2] +@ CHECK: vsra.s16 q2, q7, #15 @ encoding: [0x5e,0x41,0x91,0xf2] +@ CHECK: vsra.s32 q3, q6, #31 @ encoding: [0x5c,0x61,0xa1,0xf2] +@ CHECK: vsra.s64 q4, q5, #63 @ encoding: [0xda,0x81,0x81,0xf2] +@ CHECK: vsra.s8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf2] +@ CHECK: vsra.s16 d15, d15, #15 @ encoding: [0x1f,0xf1,0x91,0xf2] +@ CHECK: vsra.s32 d14, d14, #31 @ encoding: [0x1e,0xe1,0xa1,0xf2] +@ CHECK: vsra.s64 d13, d13, #63 @ encoding: [0x9d,0xd1,0x81,0xf2] +@ CHECK: vsra.s8 q4, q4, #7 @ encoding: [0x58,0x81,0x89,0xf2] +@ CHECK: vsra.s16 q5, q5, #15 @ encoding: [0x5a,0xa1,0x91,0xf2] +@ CHECK: vsra.s32 q6, q6, #31 @ encoding: [0x5c,0xc1,0xa1,0xf2] +@ CHECK: vsra.s64 q7, q7, #63 @ encoding: [0xde,0xe1,0x81,0xf2] + + + vsra.u8 d16, d6, #7 + vsra.u16 d26, d18, #15 + vsra.u32 d11, d10, #31 + vsra.u64 d12, d19, #63 + vsra.u8 q1, q8, #7 + vsra.u16 q2, q7, #15 + vsra.u32 q3, q6, #31 + vsra.u64 q4, q5, #63 + + vsra.u8 d16, #7 + vsra.u16 d15, #15 + vsra.u32 d14, #31 + vsra.u64 d13, #63 + vsra.u8 q4, #7 + vsra.u16 q5, #15 + vsra.u32 q6, #31 + vsra.u64 q7, #63 + +@ CHECK: vsra.u8 d16, d6, #7 @ encoding: [0x16,0x01,0xc9,0xf3] +@ CHECK: vsra.u16 d26, d18, #15 @ encoding: [0x32,0xa1,0xd1,0xf3] +@ CHECK: vsra.u32 d11, d10, #31 @ encoding: [0x1a,0xb1,0xa1,0xf3] +@ CHECK: vsra.u64 d12, d19, #63 @ encoding: [0xb3,0xc1,0x81,0xf3] +@ CHECK: vsra.u8 q1, q8, #7 @ encoding: [0x70,0x21,0x89,0xf3] +@ CHECK: vsra.u16 q2, q7, #15 @ encoding: [0x5e,0x41,0x91,0xf3] +@ CHECK: vsra.u32 q3, q6, #31 @ encoding: [0x5c,0x61,0xa1,0xf3] +@ CHECK: vsra.u64 q4, q5, #63 @ encoding: [0xda,0x81,0x81,0xf3] +@ CHECK: vsra.u8 d16, d16, #7 @ encoding: [0x30,0x01,0xc9,0xf3] +@ CHECK: vsra.u16 d15, d15, #15 @ encoding: [0x1f,0xf1,0x91,0xf3] +@ CHECK: vsra.u32 d14, d14, #31 @ encoding: [0x1e,0xe1,0xa1,0xf3] +@ CHECK: vsra.u64 d13, d13, #63 @ encoding: [0x9d,0xd1,0x81,0xf3] +@ CHECK: vsra.u8 q4, q4, #7 @ encoding: [0x58,0x81,0x89,0xf3] +@ CHECK: vsra.u16 q5, q5, #15 @ encoding: [0x5a,0xa1,0x91,0xf3] +@ CHECK: vsra.u32 q6, q6, #31 @ encoding: [0x5c,0xc1,0xa1,0xf3] +@ CHECK: vsra.u64 q7, q7, #63 @ encoding: [0xde,0xe1,0x81,0xf3] vsri.8 d16, d6, #7 From chandlerc at gmail.com Tue Jan 24 12:00:44 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Tue, 24 Jan 2012 18:00:44 -0000 Subject: [llvm-commits] [llvm] r148822 - in /llvm/trunk/cmake: config-ix.cmake modules/CMakeLists.txt modules/GetHostTriple.cmake modules/GetTargetTriple.cmake Message-ID: <20120124180044.CD5172A6C12C@llvm.org> Author: chandlerc Date: Tue Jan 24 12:00:44 2012 New Revision: 148822 URL: http://llvm.org/viewvc/llvm-project?rev=148822&view=rev Log: Undo an over zealous rename. This bit of the CMake build really is dealing in the host triple, be honest about it and document the decision to default the target triple to the host triple unless overridden. Added: llvm/trunk/cmake/modules/GetHostTriple.cmake - copied, changed from r148782, llvm/trunk/cmake/modules/GetTargetTriple.cmake Removed: llvm/trunk/cmake/modules/GetTargetTriple.cmake Modified: llvm/trunk/cmake/config-ix.cmake llvm/trunk/cmake/modules/CMakeLists.txt Modified: llvm/trunk/cmake/config-ix.cmake URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/config-ix.cmake?rev=148822&r1=148821&r2=148822&view=diff ============================================================================== --- llvm/trunk/cmake/config-ix.cmake (original) +++ llvm/trunk/cmake/config-ix.cmake Tue Jan 24 12:00:44 2012 @@ -287,15 +287,18 @@ check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG) -include(GetTargetTriple) -get_target_triple(LLVM_DEFAULT_TARGET_TRIPLE) +include(GetHostTriple) +get_host_triple(LLVM_HOST_TRIPLE) +# By default, we target the host, but this can be overridden at CMake +# invocation time. +set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}") set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}") # Determine the native architecture. string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH) if( LLVM_NATIVE_ARCH STREQUAL "host" ) - string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_DEFAULT_TARGET_TRIPLE}) + string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOST_TRIPLE}) endif () if (LLVM_NATIVE_ARCH MATCHES "i[2-6]86") Modified: llvm/trunk/cmake/modules/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/modules/CMakeLists.txt?rev=148822&r1=148821&r2=148822&view=diff ============================================================================== --- llvm/trunk/cmake/modules/CMakeLists.txt (original) +++ llvm/trunk/cmake/modules/CMakeLists.txt Tue Jan 24 12:00:44 2012 @@ -32,6 +32,6 @@ PATTERN LLVMConfig.cmake EXCLUDE PATTERN LLVMConfigVersion.cmake EXCLUDE PATTERN LLVM-Config.cmake EXCLUDE - PATTERN GetTargetTriple.cmake EXCLUDE + PATTERN GetHostTriple.cmake EXCLUDE PATTERN VersionFromVCS.cmake EXCLUDE PATTERN CheckAtomic.cmake EXCLUDE) Copied: llvm/trunk/cmake/modules/GetHostTriple.cmake (from r148782, llvm/trunk/cmake/modules/GetTargetTriple.cmake) URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/modules/GetHostTriple.cmake?p2=llvm/trunk/cmake/modules/GetHostTriple.cmake&p1=llvm/trunk/cmake/modules/GetTargetTriple.cmake&r1=148782&r2=148822&rev=148822&view=diff ============================================================================== --- llvm/trunk/cmake/modules/GetTargetTriple.cmake (original) +++ llvm/trunk/cmake/modules/GetHostTriple.cmake Tue Jan 24 12:00:44 2012 @@ -1,7 +1,7 @@ # Returns the host triple. # Invokes config.guess -function( get_target_triple var ) +function( get_host_triple var ) if( MSVC ) if( CMAKE_CL_64 ) set( value "x86_64-pc-win32" ) @@ -27,4 +27,4 @@ endif( MSVC ) set( ${var} ${value} PARENT_SCOPE ) message(STATUS "Target triple: ${value}") -endfunction( get_target_triple var ) +endfunction( get_host_triple var ) Removed: llvm/trunk/cmake/modules/GetTargetTriple.cmake URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/modules/GetTargetTriple.cmake?rev=148821&view=auto ============================================================================== --- llvm/trunk/cmake/modules/GetTargetTriple.cmake (original) +++ llvm/trunk/cmake/modules/GetTargetTriple.cmake (removed) @@ -1,30 +0,0 @@ -# Returns the host triple. -# Invokes config.guess - -function( get_target_triple var ) - if( MSVC ) - if( CMAKE_CL_64 ) - set( value "x86_64-pc-win32" ) - else() - set( value "i686-pc-win32" ) - endif() - elseif( MINGW AND NOT MSYS ) - if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) - set( value "x86_64-w64-mingw32" ) - else() - set( value "i686-pc-mingw32" ) - endif() - else( MSVC ) - set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess) - execute_process(COMMAND sh ${config_guess} - RESULT_VARIABLE TT_RV - OUTPUT_VARIABLE TT_OUT - OUTPUT_STRIP_TRAILING_WHITESPACE) - if( NOT TT_RV EQUAL 0 ) - message(FATAL_ERROR "Failed to execute ${config_guess}") - endif( NOT TT_RV EQUAL 0 ) - set( value ${TT_OUT} ) - endif( MSVC ) - set( ${var} ${value} PARENT_SCOPE ) - message(STATUS "Target triple: ${value}") -endfunction( get_target_triple var ) From stoklund at 2pi.dk Tue Jan 24 12:06:05 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 24 Jan 2012 18:06:05 -0000 Subject: [llvm-commits] [llvm] r148824 - in /llvm/trunk: include/llvm/Target/Target.td test/TableGen/SetTheory.td utils/TableGen/SetTheory.cpp Message-ID: <20120124180606.025642A6C12C@llvm.org> Author: stoklund Date: Tue Jan 24 12:06:05 2012 New Revision: 148824 URL: http://llvm.org/viewvc/llvm-project?rev=148824&view=rev Log: Add an (interleave A, B, ...) SetTheory operator. This will interleave the elements from two or more lists. Modified: llvm/trunk/include/llvm/Target/Target.td llvm/trunk/test/TableGen/SetTheory.td llvm/trunk/utils/TableGen/SetTheory.cpp Modified: llvm/trunk/include/llvm/Target/Target.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/Target.td?rev=148824&r1=148823&r2=148824&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/Target.td (original) +++ llvm/trunk/include/llvm/Target/Target.td Tue Jan 24 12:06:05 2012 @@ -200,12 +200,15 @@ // // (decimate GPR, 2) - Pick every N'th element, starting with the first. // +// (interleave A, B, ...) - Interleave the elements from each argument list. +// // All of these operators work on ordered sets, not lists. That means // duplicates are removed from sub-expressions. // Set operators. The rest is defined in TargetSelectionDAG.td. def sequence; def decimate; +def interleave; // RegisterTuples - Automatically generate super-registers by forming tuples of // sub-registers. This is useful for modeling register sequence constraints Modified: llvm/trunk/test/TableGen/SetTheory.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/TableGen/SetTheory.td?rev=148824&r1=148823&r2=148824&view=diff ============================================================================== --- llvm/trunk/test/TableGen/SetTheory.td (original) +++ llvm/trunk/test/TableGen/SetTheory.td Tue Jan 24 12:06:05 2012 @@ -165,3 +165,10 @@ // CHECK: S9b = [ e7 e6 e5 e4 e3 ] // CHECK: S9c = [ e0 ] // CHECK: S9d = [ a b c d e0 e3 e6 e9 e4 e5 e7 ] + +// The 'interleave' operator is almost the inverse of 'decimate'. +def interleave; +def T0a : Set<(interleave S9a, S9b)>; +def T0b : Set<(interleave S8e, S8d)>; +// CHECK: T0a = [ e3 e7 e4 e6 e5 ] +// CHECK: T0b = [ e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ] Modified: llvm/trunk/utils/TableGen/SetTheory.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/SetTheory.cpp?rev=148824&r1=148823&r2=148824&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/SetTheory.cpp (original) +++ llvm/trunk/utils/TableGen/SetTheory.cpp Tue Jan 24 12:06:05 2012 @@ -139,6 +139,24 @@ } }; +// (interleave S1, S2, ...) Interleave elements of the arguments. +struct InterleaveOp : public SetTheory::Operator { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { + // Evaluate the arguments individually. + SmallVector Args(Expr->getNumArgs()); + unsigned MaxSize = 0; + for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) { + ST.evaluate(Expr->getArg(i), Args[i]); + MaxSize = std::max(MaxSize, unsigned(Args[i].size())); + } + // Interleave arguments into Elts. + for (unsigned n = 0; n != MaxSize; ++n) + for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) + if (n < Args[i].size()) + Elts.insert(Args[i][n]); + } +}; + // (sequence "Format", From, To) Generate a sequence of records by name. struct SequenceOp : public SetTheory::Operator { void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) { @@ -211,6 +229,7 @@ addOperator("rotl", new RotOp(false)); addOperator("rotr", new RotOp(true)); addOperator("decimate", new DecimateOp); + addOperator("interleave", new InterleaveOp); addOperator("sequence", new SequenceOp); } From grosbach at apple.com Tue Jan 24 12:10:10 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 10:10:10 -0800 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSubtarge In-Reply-To: <000c01ccda73$52b639b0$f822ad10$%molloy@arm.com> References: <20120122120733.DC7EF2A6C12C@llvm.org> <000c01ccda73$52b639b0$f822ad10$%molloy@arm.com> Message-ID: <27CD8E28-7069-4B26-B21A-850759D4E569@apple.com> Do we even need a separate subtarget feature for it at all? I.e., is it reasonable to suppose a target which has NEONVFP4 but not VFP4? That seems really odd to me. I would rather get rid of the "neon-vfpv4" attribute entirely. If we do keep it, it should be renamed to "neon-vfp4" rather than "neon-vfpv4" to be consistent with the rest of the attributes which reference vfp versions. -Jim On Jan 24, 2012, at 12:36 AM, James Molloy wrote: > Hi Anton, > > I'm not 100% happy with this patch. The implementation means that > -mattr=+neon,+vfp4 does not enable neon-vfp4, which I think is rather > confusing. > > I'd much prefer the predicate to be rewritten such that hasNEONVFP4() === > hasNEONVFP4 || (hasVFP4 && hasNEON). > > Cheers, > > James > > -----Original Message----- > From: llvm-commits-bounces at cs.uiuc.edu > [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Anton Korobeynikov > Sent: 22 January 2012 12:08 > To: llvm-commits at cs.uiuc.edu > Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: > lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp > lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td > lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td > lib/Target/ARM/ARMSubtarget... > > Author: asl > Date: Sun Jan 22 06:07:33 2012 > New Revision: 148658 > > URL: http://llvm.org/viewvc/llvm-project?rev=148658&view=rev > Log: > Add fused multiple+add instructions from VFPv4. > Patch by Ana Pazos! > > Added: > llvm/trunk/test/CodeGen/ARM/fusedMAC.ll > Modified: > llvm/trunk/lib/Target/ARM/ARM.td > llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp > llvm/trunk/lib/Target/ARM/ARMInstrInfo.td > llvm/trunk/lib/Target/ARM/ARMInstrNEON.td > llvm/trunk/lib/Target/ARM/ARMInstrVFP.td > llvm/trunk/lib/Target/ARM/ARMSchedule.td > llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp > llvm/trunk/lib/Target/ARM/ARMSubtarget.h > > Modified: llvm/trunk/lib/Target/ARM/ARM.td > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=148 > 658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARM.td (original) > +++ llvm/trunk/lib/Target/ARM/ARM.td Sun Jan 22 06:07:33 2012 > @@ -32,9 +32,15 @@ > def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", > "Enable VFP3 instructions", > [FeatureVFP2]>; > +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", > + "Enable VFP4 instructions", > + [FeatureVFP3]>; > def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", > "Enable NEON instructions", > [FeatureVFP3]>; > +def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", > "true", > + "Enable NEON-VFP4 instructions", > + [FeatureVFP4, FeatureNEON]>; > def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", > "Enable Thumb2 instructions">; > def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", > > Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter. > cpp?rev=148658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Sun Jan 22 06:07:33 2012 > @@ -719,15 +719,25 @@ > > if (Subtarget->hasNEON() && emitFPU) { > /* NEON is not exactly a VFP architecture, but GAS emit one of > - * neon/vfpv3/vfpv2 for .fpu parameters */ > - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, > "neon"); > + * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ > + if (Subtarget->hasNEONVFP4()) > + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, > "neon-vfpv4"); > + else > + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, > "neon"); > /* If emitted for NEON, omit from VFP below, since you can have both > * NEON and VFP in build attributes but only one .fpu */ > emitFPU = false; > } > > + /* VFPv4 + .fpu */ > + if (Subtarget->hasVFP4()) { > + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, > + ARMBuildAttrs::AllowFPv4A); > + if (emitFPU) > + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4"); > + > /* VFPv3 + .fpu */ > - if (Subtarget->hasVFP3()) { > + } else if (Subtarget->hasVFP3()) { > AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, > ARMBuildAttrs::AllowFPv3A); > if (emitFPU) > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.t > d?rev=148658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sun Jan 22 06:07:33 2012 > @@ -179,8 +179,14 @@ > AssemblerPredicate<"FeatureVFP2">; > def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, > AssemblerPredicate<"FeatureVFP3">; > +def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, > + AssemblerPredicate<"FeatureVFP4">; > +def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; > def HasNEON : Predicate<"Subtarget->hasNEON()">, > AssemblerPredicate<"FeatureNEON">; > +def HasNEONVFP4 : Predicate<"Subtarget->hasNEONVFP4()">, > + AssemblerPredicate<"FeatureNEONVFP4">; > +def NoNEONVFP4 : Predicate<"!Subtarget->hasNEONVFP4()">; > def HasFP16 : Predicate<"Subtarget->hasFP16()">, > AssemblerPredicate<"FeatureFP16">; > def HasDivide : Predicate<"Subtarget->hasDivide()">, > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.t > d?rev=148658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Sun Jan 22 06:07:33 2012 > @@ -3897,10 +3897,10 @@ > IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; > def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", > v2f32, fmul_su, fadd_mlx>, > - Requires<[HasNEON, UseFPVMLx]>; > + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; > def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", > v4f32, fmul_su, fadd_mlx>, > - Requires<[HasNEON, UseFPVMLx]>; > + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; > defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, > IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", > add>; > def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", > @@ -3955,10 +3955,10 @@ > IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; > def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", > v2f32, fmul_su, fsub_mlx>, > - Requires<[HasNEON, UseFPVMLx]>; > + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; > def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", > v4f32, fmul_su, fsub_mlx>, > - Requires<[HasNEON, UseFPVMLx]>; > + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>; > defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, > IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", > sub>; > def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", > @@ -4007,6 +4007,24 @@ > "vqdmlsl", "s", int_arm_neon_vqdmlsl>; > defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", > int_arm_neon_vqdmlsl>; > > + > +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. > +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", > + v2f32, fmul_su, fadd_mlx>, > + Requires<[HasNEONVFP4]>; > + > +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", > + v4f32, fmul_su, fadd_mlx>, > + Requires<[HasNEONVFP4]>; > + > +// Fused Vector Multiply Subtract (floating-point) > +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", > + v2f32, fmul_su, fsub_mlx>, > + Requires<[HasNEONVFP4]>; > +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", > + v4f32, fmul_su, fsub_mlx>, > + Requires<[HasNEONVFP4]>; > + > // Vector Subtract Operations. > > // VSUB : Vector Subtract (integer and floating-point) > @@ -5358,9 +5376,13 @@ > def : N3VSPat; > def : N3VSPat; > def : N3VSMulOpPat, > - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; > + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; > def : N3VSMulOpPat, > - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; > + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>; > +def : N3VSMulOpPat, > + Requires<[HasNEONVFP4, UseNEONForFP]>; > +def : N3VSMulOpPat, > + Requires<[HasNEONVFP4, UseNEONForFP]>; > def : N2VSPat; > def : N2VSPat; > def : N3VSPat; > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td > ?rev=148658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Sun Jan 22 06:07:33 2012 > @@ -920,7 +920,7 @@ > [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), > (f64 DPR:$Ddin)))]>, > RegConstraint<"$Ddin = $Dd">, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > > def VMLAS : ASbIn<0b11100, 0b00, 0, 0, > (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > @@ -928,7 +928,7 @@ > [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), > SPR:$Sdin))]>, > RegConstraint<"$Sdin = $Sd">, > - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { > + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { > // Some single precision VFP instructions may be executed on both NEON > and > // VFP pipelines on A8. > let D = VFPNeonA8Domain; > @@ -936,10 +936,10 @@ > > def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), > (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), > (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, > - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>; > + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; > > def VMLSD : ADbI<0b11100, 0b00, 1, 0, > (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > @@ -947,7 +947,7 @@ > [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), > (f64 DPR:$Ddin)))]>, > RegConstraint<"$Ddin = $Dd">, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > > def VMLSS : ASbIn<0b11100, 0b00, 1, 0, > (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > @@ -955,7 +955,7 @@ > [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, > SPR:$Sm)), > SPR:$Sdin))]>, > RegConstraint<"$Sdin = $Sd">, > - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { > + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { > // Some single precision VFP instructions may be executed on both NEON > and > // VFP pipelines on A8. > let D = VFPNeonA8Domain; > @@ -963,10 +963,10 @@ > > def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), > (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), > (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, > - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; > + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; > > def VNMLAD : ADbI<0b11100, 0b01, 1, 0, > (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > @@ -974,7 +974,7 @@ > [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), > (f64 DPR:$Ddin)))]>, > RegConstraint<"$Ddin = $Dd">, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > > def VNMLAS : ASbI<0b11100, 0b01, 1, 0, > (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > @@ -982,7 +982,7 @@ > [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, > SPR:$Sm)), > SPR:$Sdin))]>, > RegConstraint<"$Sdin = $Sd">, > - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { > + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { > // Some single precision VFP instructions may be executed on both NEON > and > // VFP pipelines on A8. > let D = VFPNeonA8Domain; > @@ -990,10 +990,10 @@ > > def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), > (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), > (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, > - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; > + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; > > def VNMLSD : ADbI<0b11100, 0b01, 0, 0, > (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > @@ -1001,14 +1001,14 @@ > [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), > (f64 DPR:$Ddin)))]>, > RegConstraint<"$Ddin = $Dd">, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > > def VNMLSS : ASbI<0b11100, 0b01, 0, 0, > (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", > [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), > SPR:$Sdin))]>, > RegConstraint<"$Sdin = $Sd">, > - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { > + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { > // Some single precision VFP instructions may be executed on both NEON > and > // VFP pipelines on A8. > let D = VFPNeonA8Domain; > @@ -1016,11 +1016,116 @@ > > def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), > (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, > - Requires<[HasVFP2,UseFPVMLx]>; > + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; > def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), > (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, > - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; > + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; > > +//===---------------------------------------------------------------------- > ===// > +// Fused FP Multiply-Accumulate Operations. > +// > +def VFMAD : ADbI<0b11101, 0b10, 0, 0, > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > + IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm", > + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), > + (f64 DPR:$Ddin)))]>, > + RegConstraint<"$Ddin = $Dd">, > + Requires<[HasVFP4]>; > + > +def VFMAS : ASbIn<0b11101, 0b10, 0, 0, > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > + IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm", > + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), > + SPR:$Sdin))]>, > + RegConstraint<"$Sdin = $Sd">, > + Requires<[HasVFP4,DontUseNEONForFP]> { > + // Some single precision VFP instructions may be executed on both NEON > and > + // VFP pipelines. > +} > + > +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), > + (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, > + Requires<[HasVFP4]>; > +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), > + (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, > + Requires<[HasVFP4,DontUseNEONForFP]>; > + > +def VFMSD : ADbI<0b11101, 0b10, 1, 0, > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > + IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm", > + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), > + (f64 DPR:$Ddin)))]>, > + RegConstraint<"$Ddin = $Dd">, > + Requires<[HasVFP4]>; > + > +def VFMSS : ASbIn<0b11101, 0b10, 1, 0, > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > + IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm", > + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, > SPR:$Sm)), > + SPR:$Sdin))]>, > + RegConstraint<"$Sdin = $Sd">, > + Requires<[HasVFP4,DontUseNEONForFP]> { > + // Some single precision VFP instructions may be executed on both NEON > and > + // VFP pipelines. > +} > + > +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), > + (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, > + Requires<[HasVFP4]>; > +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), > + (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, > + Requires<[HasVFP4,DontUseNEONForFP]>; > + > +def VFNMAD : ADbI<0b11101, 0b01, 1, 0, > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > + IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", > + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), > + (f64 DPR:$Ddin)))]>, > + RegConstraint<"$Ddin = $Dd">, > + Requires<[HasVFP4]>; > + > +def VFNMAS : ASbI<0b11101, 0b01, 1, 0, > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > + IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm", > + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, > SPR:$Sm)), > + SPR:$Sdin))]>, > + RegConstraint<"$Sdin = $Sd">, > + Requires<[HasVFP4,DontUseNEONForFP]> { > + // Some single precision VFP instructions may be executed on both NEON > and > + // VFP pipelines. > +} > + > +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), > + (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, > + Requires<[HasVFP4]>; > +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), > + (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, > + Requires<[HasVFP4,DontUseNEONForFP]>; > + > +def VFNMSD : ADbI<0b11101, 0b01, 0, 0, > + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), > + IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm", > + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), > + (f64 DPR:$Ddin)))]>, > + RegConstraint<"$Ddin = $Dd">, > + Requires<[HasVFP4]>; > + > +def VFNMSS : ASbI<0b11101, 0b01, 0, 0, > + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), > + IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", > + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), > SPR:$Sdin))]>, > + RegConstraint<"$Sdin = $Sd">, > + Requires<[HasVFP4,DontUseNEONForFP]> { > + // Some single precision VFP instructions may be executed on both NEON > and > + // VFP pipelines. > +} > + > +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), > + (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, > + Requires<[HasVFP4]>; > +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), > + (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, > + Requires<[HasVFP4,DontUseNEONForFP]>; > > > //===----------------------------------------------------------------------= > ==// > // FP Conditional moves. > > Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td > ?rev=148658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Sun Jan 22 06:07:33 2012 > @@ -118,6 +118,8 @@ > def IIC_fpMUL64 : InstrItinClass; > def IIC_fpMAC32 : InstrItinClass; > def IIC_fpMAC64 : InstrItinClass; > +def IIC_fpFMAC32 : InstrItinClass; > +def IIC_fpFMAC64 : InstrItinClass; > def IIC_fpDIV32 : InstrItinClass; > def IIC_fpDIV64 : InstrItinClass; > def IIC_fpSQRT32 : InstrItinClass; > @@ -208,6 +210,8 @@ > def IIC_VPERMQ3 : InstrItinClass; > def IIC_VMACD : InstrItinClass; > def IIC_VMACQ : InstrItinClass; > +def IIC_VFMACD : InstrItinClass; > +def IIC_VFMACQ : InstrItinClass; > def IIC_VRECSD : InstrItinClass; > def IIC_VRECSQ : InstrItinClass; > def IIC_VCNTiD : InstrItinClass; > > Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.c > pp?rev=148658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Sun Jan 22 06:07:33 2012 > @@ -47,7 +47,9 @@ > , HasV7Ops(false) > , HasVFPv2(false) > , HasVFPv3(false) > + , HasVFPv4(false) > , HasNEON(false) > + , HasNEONVFPv4(false) > , UseNEONForSinglePrecisionFP(false) > , SlowFPVMLx(false) > , HasVMLxForwarding(false) > > Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h > ?rev=148658&r1=148657&r2=148658&view=diff > ============================================================================ > == > --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original) > +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Sun Jan 22 06:07:33 2012 > @@ -45,11 +45,13 @@ > bool HasV6T2Ops; > bool HasV7Ops; > > - /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are > - /// supported. > + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what > + /// floating point ISAs are supported. > bool HasVFPv2; > bool HasVFPv3; > + bool HasVFPv4; > bool HasNEON; > + bool HasNEONVFPv4; > > /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been > /// specified. Use the method useNEONForSinglePrecisionFP() to > @@ -197,7 +199,9 @@ > > bool hasVFP2() const { return HasVFPv2; } > bool hasVFP3() const { return HasVFPv3; } > + bool hasVFP4() const { return HasVFPv4; } > bool hasNEON() const { return HasNEON; } > + bool hasNEONVFP4() const { return HasNEONVFPv4; } > bool useNEONForSinglePrecisionFP() const { > return hasNEON() && UseNEONForSinglePrecisionFP; } > > > Added: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll? > rev=148658&view=auto > ============================================================================ > == > --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (added) > +++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Sun Jan 22 06:07:33 2012 > @@ -0,0 +1,68 @@ > +; RUN: llc < %s -march=arm -mattr=+neon-vfpv4 | FileCheck %s > +; Check generated fused MAC and MLS. > + > +define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind > readnone noinline { > +;CHECK: fusedMACTest1: > +;CHECK: vfma.f64 > + %1 = fmul double %d1, %d2 > + %2 = fadd double %1, %d3 > + ret double %2 > +} > + > +define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind > readnone noinline { > +;CHECK: fusedMACTest2: > +;CHECK: vfma.f32 > + %1 = fmul float %f1, %f2 > + %2 = fadd float %1, %f3 > + ret float %2 > +} > + > +define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind > readnone noinline { > +;CHECK: fusedMACTest3: > +;CHECK: vfms.f64 > + %1 = fmul double %d2, %d3 > + %2 = fsub double %d1, %1 > + ret double %2 > +} > + > +define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind > readnone noinline { > +;CHECK: fusedMACTest4: > +;CHECK: vfms.f32 > + %1 = fmul float %f2, %f3 > + %2 = fsub float %f1, %1 > + ret float %2 > +} > + > +define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind > readnone noinline { > +;CHECK: fusedMACTest5: > +;CHECK: vfnma.f64 > + %1 = fmul double %d1, %d2 > + %2 = fsub double -0.0, %1 > + %3 = fsub double %2, %d3 > + ret double %3 > +} > + > +define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind { > +;CHECK: fusedMACTest6: > +;CHECK: vfnma.f32 > + %1 = fmul float %f1, %f2 > + %2 = fsub float -0.0, %1 > + %3 = fsub float %2, %f3 > + ret float %3 > +} > + > +define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind { > +;CHECK: fusedMACTest7: > +;CHECK: vfnms.f64 > + %1 = fmul double %d1, %d2 > + %2 = fsub double %1, %d3 > + ret double %2 > +} > + > +define float @fusedMACTest8(float %f1, float %f2, float %f3) nounwind { > +;CHECK: fusedMACTest8: > +;CHECK: vfnms.f32 > + %1 = fmul float %f1, %f2 > + %2 = fsub float %1, %f3 > + ret float %2 > +} > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From stoklund at 2pi.dk Tue Jan 24 12:09:18 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 24 Jan 2012 18:09:18 -0000 Subject: [llvm-commits] [llvm] r148825 - /llvm/trunk/lib/CodeGen/AllocationOrder.h Message-ID: <20120124180918.951862A6C12C@llvm.org> Author: stoklund Date: Tue Jan 24 12:09:18 2012 New Revision: 148825 URL: http://llvm.org/viewvc/llvm-project?rev=148825&view=rev Log: Fix old doxygen comment. Modified: llvm/trunk/lib/CodeGen/AllocationOrder.h Modified: llvm/trunk/lib/CodeGen/AllocationOrder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AllocationOrder.h?rev=148825&r1=148824&r2=148825&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AllocationOrder.h (original) +++ llvm/trunk/lib/CodeGen/AllocationOrder.h Tue Jan 24 12:09:18 2012 @@ -34,8 +34,7 @@ /// AllocationOrder - Create a new AllocationOrder for VirtReg. /// @param VirtReg Virtual register to allocate for. /// @param VRM Virtual register map for function. - /// @param ReservedRegs Set of reserved registers as returned by - /// TargetRegisterInfo::getReservedRegs(). + /// @param RegClassInfo Information about reserved and allocatable registers. AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo); From stoklund at 2pi.dk Tue Jan 24 12:13:58 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 24 Jan 2012 10:13:58 -0800 Subject: [llvm-commits] [llvm] r132581 - in /llvm/trunk: lib/CodeGen/AllocationOrder.cpp lib/CodeGen/AllocationOrder.h lib/CodeGen/RegAllocBase.h lib/CodeGen/RegAllocBasic.cpp lib/CodeGen/RegAllocGreedy.cpp test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll In-Reply-To: References: <20110603203454.2E8092A6C12D@llvm.org> Message-ID: On Jan 24, 2012, at 4:32 AM, Jay Foad wrote: > Hi Jakob, > > On 3 June 2011 21:34, Jakob Stoklund Olesen wrote: >> Author: stoklund >> Date: Fri Jun 3 15:34:53 2011 >> New Revision: 132581 >> @@ -37,7 +37,7 @@ >> /// TargetRegisterInfo::getReservedRegs(). >> AllocationOrder(unsigned VirtReg, >> const VirtRegMap &VRM, >> - const BitVector &ReservedRegs); >> + const RegisterClassInfo &RegClassInfo); > > The comment above this function needs updating for this change. Better late than never. r148825. Thanks, /jakob From asl at math.spbu.ru Tue Jan 24 12:15:56 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Tue, 24 Jan 2012 22:15:56 +0400 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub Message-ID: Hi Jim, > If we do keep it, it should be renamed to "neon-vfp4" rather than "neon-vfpv4" to be consistent with the rest of the attributes which reference vfp versions. ARM docs mentions this as Advances SIMDv2. I just found the following link: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0438c/CEGCDBHC.html So, it's not possible to have just NEONv2 w/o VFPv4. I will take care about necessary backend changes. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From mcrosier at apple.com Tue Jan 24 12:25:08 2012 From: mcrosier at apple.com (Chad Rosier) Date: Tue, 24 Jan 2012 10:25:08 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <000c01ccda5b$76b208c0$64161a40$@org> References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> <000c01ccda5b$76b208c0$64161a40$@org> Message-ID: <38F583CE-3516-421A-84C2-46978621E648@apple.com> Hi Zino, One comments below. On Jan 23, 2012, at 9:46 PM, Zino Benaissa wrote: > Hi Jacob, > > Thanks for quick feedback. See my comments below, > > Cheers, > > Zino > From: Jakob Stoklund Olesen [mailto:stoklund at 2pi.dk] > Sent: Monday, January 23, 2012 7:18 PM > To: Zino Benaissa > Cc: llvm-commits at cs.uiuc.edu; rajav at codeaurora.org > Subject: Re: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set > > > On Jan 23, 2012, at 5:11 PM, Zino Benaissa wrote: > > > Description: > This contribution extends LLVM greedy Register Allocator to optimize for code size when LLVM compiler targets ARM Thumb 2 instruction set. This heuristic favors assigning register R0 through R7 to operands used in instruction that can be encoded in 16 bits (16-bit is allowed only if R0-7 are used). Operands that appear most frequently in a function (and in instructions that qualify) get R0-7 register. > This heuristic is turned on by default and has impact on generated code only if -mthumb compiler switch is used. To turn this heuristic off use -disable-favor-r0-7 feature flag. > > This patch modifies: > 1) The LLVM greedy register allocator located in LLVM/CodeGen directory: To add the new code size heuristic. > 2) The ARM-specific flies located in LLVM/Target/ARM directory: To add the function that determines which instruction can be encoded in 16-bits and a fix to enable the compiler to emit CMN instruction in 16-bits encoding. > 3) The LLVM test suite: fix test/CodeGen/Thumb2/thumb2-cmn.ll test. > > Hi Zino, > > Thanks for working on this interesting patch. > > Please submit the CMN-related changes as an independent patch. > > ? Sure, I can do that. > > If you don't mind, I would like you to run a couple of experiments to better understand why this change improves some benchmarks. > > ? Sure, please let me know what you find. Just to be clear, I believe Jakob was suggesting *you* run the experiments. Chad > > First of all, is the regHasSizeImpact() hook necessary? Do you get significantly different results if you pretend this function always returns 2? > > ? For my experiments, precision is quite important to maximize code size gains. > > Second, what happens if you use a 'flatter' spill weight? Instead of your patch, in LiveIntervals::getSpillWeight replace this: > > float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); > > with this: > > float lc = std::pow(1 + (100.0 / (loopDepth + 10))/5, (double)loopDepth); > > And in CalcSpillWeights.h, replace the number 25 in normalizeSpillWeight() with 250. Does that give you similar results? > > ? I did not try this experiment. However by doing so, aren?t you increasing chances for spills to be inserted in hot blocks (like inner loops)? > > Thanks, > /jakob > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/2761b13f/attachment.html From apazos at codeaurora.org Tue Jan 24 12:25:43 2012 From: apazos at codeaurora.org (Ana Pazos) Date: Tue, 24 Jan 2012 10:25:43 -0800 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <027a01cccb29$a4709100$ed51b300$@org> <003c01ccda40$dddd4610$9997d230$@org> Message-ID: <009d01ccdac5$950ecf50$bf2c6df0$@org> Hi folks! Thanks for the feedback on the fused multiply add/sub patch. I will work with Anton on an updated patch. Anton, Reply to your question below: FMA mode. The reason that the fused operation (FMA) is more accurate than the chained (VMLA) is the chained operation performs a round after the multiply and before the add. This is only an error of +/-2-24 (for single precision) but it can have a huge impact on the results returned. Ana. -----Original Message----- From: Anton Korobeynikov [mailto:anton at korobeynikov.info] Sent: Monday, January 23, 2012 9:07 PM To: Ana Pazos Cc: llvm-commits at cs.uiuc.edu; rajav at codeaurora.org Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Hi Ana, > Regarding accuracy, what I know is that Qualcomm provides IEEE-754 2008 specified result. This means the multiply is performed without any loss of accuracy (i.e., no rounding) and then the add/subtract operation happens. The final result is rounded according to the configured rounding mode in the VFP unit. In which case multiply is performed w/o loss of accuracy? FMA mode? Of usual VMLA? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/da637e43/attachment.html From James.Molloy at arm.com Tue Jan 24 12:35:10 2012 From: James.Molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 18:35:10 +0000 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub In-Reply-To: References: Message-ID: Hi Anton, >From my discussions with colleagues we come to a different conclusion - that it is indeed possible to have the NEONv2 vector integer unit without the floating point unit of VFPv4. We don't think it's possible to have VFPv4 with NEONv1 however. Cheers, James ________________________________________ From: anton at korobeynikov.info [anton at korobeynikov.info] On Behalf Of Anton Korobeynikov [asl at math.spbu.ru] Sent: 24 January 2012 18:15 To: Jim Grosbach Cc: James Molloy; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub Hi Jim, > If we do keep it, it should be renamed to "neon-vfp4" rather than "neon-vfpv4" to be consistent with the rest of the attributes which reference vfp versions. ARM docs mentions this as Advances SIMDv2. I just found the following link: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0438c/CEGCDBHC.html So, it's not possible to have just NEONv2 w/o VFPv4. I will take care about necessary backend changes. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University -- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you. From grosbach at apple.com Tue Jan 24 12:37:25 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 18:37:25 -0000 Subject: [llvm-commits] [llvm] r148832 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/neon-vld-encoding.s Message-ID: <20120124183725.B7FDF2A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 12:37:25 2012 New Revision: 148832 URL: http://llvm.org/viewvc/llvm-project?rev=148832&view=rev Log: NEON VLD4(one lane) assembly parsing and encoding. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/neon-vld-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148832&r1=148831&r2=148832&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 12:37:25 2012 @@ -306,6 +306,57 @@ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -6203,6 +6254,55 @@ (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VLD4 multiple structure pseudo-instructions. These need special handling for Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148832&r1=148831&r2=148832&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Jan 24 12:37:25 2012 @@ -1203,6 +1203,31 @@ return VectorList.Count == 3 && VectorList.LaneIndex <= 1; } + bool isVecListFourDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 7; + } + + bool isVecListFourDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -5338,6 +5363,23 @@ case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16; case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32; + // VLD4LN + case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8; + case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16; + case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32; + case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16; + case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32; + // VLD4 case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; @@ -5665,6 +5707,41 @@ return true; } + case ARM::VLD4LNdWB_register_Asm_8: + case ARM::VLD4LNdWB_register_Asm_16: + case ARM::VLD4LNdWB_register_Asm_32: + case ARM::VLD4LNqWB_register_Asm_16: + case ARM::VLD4LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_32: { @@ -5744,6 +5821,41 @@ return true; } + case ARM::VLD4LNdWB_fixed_Asm_8: + case ARM::VLD4LNdWB_fixed_Asm_16: + case ARM::VLD4LNdWB_fixed_Asm_32: + case ARM::VLD4LNqWB_fixed_Asm_16: + case ARM::VLD4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_32: { @@ -5817,6 +5929,39 @@ return true; } + case ARM::VLD4LNdAsm_8: + case ARM::VLD4LNdAsm_16: + case ARM::VLD4LNdAsm_32: + case ARM::VLD4LNqAsm_16: + case ARM::VLD4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // VLD3 multiple 3-element structure instructions. case ARM::VLD3dAsm_8: case ARM::VLD3dAsm_16: Modified: llvm/trunk/test/MC/ARM/neon-vld-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vld-encoding.s?rev=148832&r1=148831&r2=148832&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vld-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vld-encoding.s Tue Jan 24 12:37:25 2012 @@ -336,17 +336,39 @@ @ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4] -@ vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] -@ vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] -@ vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] -@ vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64] -@ vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] - -@ FIXME: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf4] -@ FIXME: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf4] -@ FIXME: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf4] -@ FIXME: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf4] -@ FIXME: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf4] + vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] + vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] + vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] + vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] + vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] + + vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! + vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! + vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! + vld4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]! + vld4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! + + vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 + vld4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 + vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 + vld4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 + vld4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 + +@ CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xe1,0xf4] +@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xe2,0xf4] +@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xe3,0xf4] +@ CHECK: vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xe7,0xf4] +@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xe8,0xf4] +@ CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xe1,0xf4] +@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xe2,0xf4] +@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xe3,0xf4] +@ CHECK: vld4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xe7,0xf4] +@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xe8,0xf4] +@ CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xe1,0xf4] +@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xe2,0xf4] +@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4] +@ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xe6,0xf4] +@ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4] @ Handle 'Q' registers in register lists as if the sub-reg D regs were From resistor at mac.com Tue Jan 24 12:37:30 2012 From: resistor at mac.com (Owen Anderson) Date: Tue, 24 Jan 2012 18:37:30 -0000 Subject: [llvm-commits] [llvm] r148833 - in /llvm/trunk: lib/Target/ARM/ARMCodeEmitter.cpp lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp lib/Target/Mips/MipsCodeEmitter.cpp lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp lib/Target/PowerPC/PPCCodeEmitter.cpp utils/TableGen/CodeEmitterGen.cpp Message-ID: <20120124183730.66B0C2A6C12C@llvm.org> Author: resistor Date: Tue Jan 24 12:37:29 2012 New Revision: 148833 URL: http://llvm.org/viewvc/llvm-project?rev=148833&view=rev Log: Widen the instruction encoder that TblGen emits to a 64 bits, which should accomodate every target I can think of offhand. Modified: llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp llvm/trunk/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp llvm/trunk/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp llvm/trunk/lib/Target/Mips/MipsCodeEmitter.cpp llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp llvm/trunk/utils/TableGen/CodeEmitterGen.cpp Modified: llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp Tue Jan 24 12:37:29 2012 @@ -74,7 +74,7 @@ /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; bool runOnMachineFunction(MachineFunction &MF); Modified: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp Tue Jan 24 12:37:29 2012 @@ -64,7 +64,7 @@ // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups) const; /// getMachineOpValue - Return binary encoding of operand. If the machine Modified: llvm/trunk/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp Tue Jan 24 12:37:29 2012 @@ -43,7 +43,7 @@ // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI) const; + uint64_t getBinaryCodeForInstr(const MCInst &MI) const; /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. Modified: llvm/trunk/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp Tue Jan 24 12:37:29 2012 @@ -58,7 +58,7 @@ // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups) const; // getBranchJumpOpValue - Return binary encoding of the jump Modified: llvm/trunk/lib/Target/Mips/MipsCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsCodeEmitter.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsCodeEmitter.cpp Tue Jan 24 12:37:29 2012 @@ -80,7 +80,7 @@ /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; void emitInstruction(const MachineInstr &MI); Modified: llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp Tue Jan 24 12:37:29 2012 @@ -57,7 +57,7 @@ // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const { Modified: llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp Tue Jan 24 12:37:29 2012 @@ -50,7 +50,7 @@ /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; MachineRelocation GetRelocation(const MachineOperand &MO, Modified: llvm/trunk/utils/TableGen/CodeEmitterGen.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeEmitterGen.cpp?rev=148833&r1=148832&r2=148833&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeEmitterGen.cpp (original) +++ llvm/trunk/utils/TableGen/CodeEmitterGen.cpp Tue Jan 24 12:37:29 2012 @@ -169,13 +169,13 @@ opShift = beginInstBit - beginVarBit; if (opShift > 0) { - Case += " Value |= (op & " + utostr(opMask) + "U) << " + + Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) << " + itostr(opShift) + ";\n"; } else if (opShift < 0) { - Case += " Value |= (op & " + utostr(opMask) + "U) >> " + + Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) >> " + itostr(-opShift) + ";\n"; } else { - Case += " Value |= op & " + utostr(opMask) + "U;\n"; + Case += " Value |= op & UINT64_C(" + utostr(opMask) + ");\n"; } } } @@ -220,7 +220,7 @@ Target.getInstructionsByEnumValue(); // Emit function declaration - o << "unsigned " << Target.getName(); + o << "uint64_t " << Target.getName(); if (MCEmitter) o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" << " SmallVectorImpl &Fixups) const {\n"; @@ -238,7 +238,7 @@ if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) { - o << " 0U,\n"; + o << " UINT64_C(0),\n"; continue; } @@ -250,9 +250,9 @@ if (BitInit *B = dynamic_cast(BI->getBit(e-i-1))) Value |= B->getValue() << (e-i-1); } - o << " " << Value << "U," << '\t' << "// " << R->getName() << "\n"; + o << " UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n"; } - o << " 0U\n };\n"; + o << " UINT64_C(0)\n };\n"; // Map to accumulate all the cases. std::map > CaseMap; From stoklund at 2pi.dk Tue Jan 24 12:47:34 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 24 Jan 2012 10:47:34 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <38F583CE-3516-421A-84C2-46978621E648@apple.com> References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> <000c01ccda5b$76b208c0$64161a40$@org> <38F583CE-3516-421A-84C2-46978621E648@apple.com> Message-ID: <901B7A01-6E81-4807-A78F-2922C100117D@2pi.dk> On Jan 24, 2012, at 10:25 AM, Chad Rosier wrote: > On Jan 23, 2012, at 9:46 PM, Zino Benaissa wrote: > >> From: Jakob Stoklund Olesen [mailto:stoklund at 2pi.dk] If you don't mind, I would like you to run a couple of experiments to better understand why this change improves some benchmarks. >> >> ? Sure, please let me know what you find. > > Just to be clear, I believe Jakob was suggesting *you* run the experiments. Oh, sorry if that wasn't clear. >> First of all, is the regHasSizeImpact() hook necessary? Do you get significantly different results if you pretend this function always returns 2? >> >> ? For my experiments, precision is quite important to maximize code size gains. The thing is, the function is using information that isn't yet available at RA time. For example, you look at flags, but they will be changed by the post-RA scheduler moving instructions around. You look at load/store offsets, but they are not filled in until PEI runs. You can't really know which instructions can be converted to 2-address form until after RA etc. So basically, regHasSizeImpact() returns a guess, it has to. Another guess that is much faster to compute is '2'. I want to know which guess is better, because if there is only a small difference, we can leave out a lot of code and save compile time. /jakob -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/01034f6b/attachment.html From zinob at codeaurora.org Tue Jan 24 12:48:47 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Tue, 24 Jan 2012 10:48:47 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <38F583CE-3516-421A-84C2-46978621E648@apple.com> References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> <000c01ccda5b$76b208c0$64161a40$@org> <38F583CE-3516-421A-84C2-46978621E648@apple.com> Message-ID: <001401ccdac8$ce05c220$6a114660$@org> Thanks Chad missed the *you* J Jacob, First experiment: Tried this over the course of development. I observed code size and performance regressions. Only a small subset of Thumb 2 instructions can be encode in 16-bit. We want to reserve R0-R7 only for these instructions. My findings precision is important. Second experiment: I have a concern of weakening the spill weight from introducing the spills in hot blocks. Again this approach is indiscriminate in assigning R0-R7 to instructions that will be encoded in 32-bits anyway. I am still interested in giving it a try. -Zino From: Chad Rosier [mailto:mcrosier at apple.com] Sent: Tuesday, January 24, 2012 10:25 AM To: Zino Benaissa Cc: 'Jakob Stoklund Olesen'; rajav at codeaurora.org; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set Hi Zino, One comments below. On Jan 23, 2012, at 9:46 PM, Zino Benaissa wrote: Hi Jacob, Thanks for quick feedback. See my comments below, Cheers, Zino From: Jakob Stoklund Olesen [mailto:stoklund at 2pi.dk] Sent: Monday, January 23, 2012 7:18 PM To: Zino Benaissa Cc: llvm-commits at cs.uiuc.edu; rajav at codeaurora.org Subject: Re: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set On Jan 23, 2012, at 5:11 PM, Zino Benaissa wrote: Description: This contribution extends LLVM greedy Register Allocator to optimize for code size when LLVM compiler targets ARM Thumb 2 instruction set. This heuristic favors assigning register R0 through R7 to operands used in instruction that can be encoded in 16 bits (16-bit is allowed only if R0-7 are used). Operands that appear most frequently in a function (and in instructions that qualify) get R0-7 register. This heuristic is turned on by default and has impact on generated code only if -mthumb compiler switch is used. To turn this heuristic off use -disable-favor-r0-7 feature flag. This patch modifies: 1) The LLVM greedy register allocator located in LLVM/CodeGen directory: To add the new code size heuristic. 2) The ARM-specific flies located in LLVM/Target/ARM directory: To add the function that determines which instruction can be encoded in 16-bits and a fix to enable the compiler to emit CMN instruction in 16-bits encoding. 3) The LLVM test suite: fix test/CodeGen/Thumb2/thumb2-cmn.ll test. Hi Zino, Thanks for working on this interesting patch. Please submit the CMN-related changes as an independent patch. ? Sure, I can do that. If you don't mind, I would like you to run a couple of experiments to better understand why this change improves some benchmarks. ? Sure, please let me know what you find. Just to be clear, I believe Jakob was suggesting *you* run the experiments. Chad First of all, is the regHasSizeImpact() hook necessary? Do you get significantly different results if you pretend this function always returns 2? ? For my experiments, precision is quite important to maximize code size gains. Second, what happens if you use a 'flatter' spill weight? Instead of your patch, in LiveIntervals::getSpillWeight replace this: float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); with this: float lc = std::pow(1 + (100.0 / (loopDepth + 10))/5, (double)loopDepth); And in CalcSpillWeights.h, replace the number 25 in normalizeSpillWeight() with 250. Does that give you similar results? ? I did not try this experiment. However by doing so, aren?t you increasing chances for spills to be inserted in hot blocks (like inner loops)? Thanks, /jakob _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/a616fbb7/attachment.html From grosbach at apple.com Tue Jan 24 12:53:14 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 18:53:14 -0000 Subject: [llvm-commits] [llvm] r148836 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/neon-vst-encoding.s Message-ID: <20120124185314.64D942A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 12:53:13 2012 New Revision: 148836 URL: http://llvm.org/viewvc/llvm-project?rev=148836&view=rev Log: NEON VST4(one lane) assembly parsing and encoding. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/neon-vst-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148836&r1=148835&r2=148836&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 12:53:13 2012 @@ -6364,6 +6364,55 @@ (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VST4 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148836&r1=148835&r2=148836&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Jan 24 12:53:13 2012 @@ -5273,6 +5273,23 @@ case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16; case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32; + // VST4LN + case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8; + case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16; + case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32; + case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16; + case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32; + // VST4 case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; @@ -5493,6 +5510,34 @@ return true; } + case ARM::VST4LNdWB_register_Asm_8: + case ARM::VST4LNdWB_register_Asm_16: + case ARM::VST4LNdWB_register_Asm_32: + case ARM::VST4LNqWB_register_Asm_16: + case ARM::VST4LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_32: { @@ -5563,6 +5608,34 @@ return true; } + case ARM::VST4LNdWB_fixed_Asm_8: + case ARM::VST4LNdWB_fixed_Asm_16: + case ARM::VST4LNdWB_fixed_Asm_32: + case ARM::VST4LNqWB_fixed_Asm_16: + case ARM::VST4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_32: { @@ -5627,6 +5700,32 @@ return true; } + case ARM::VST4LNdAsm_8: + case ARM::VST4LNdAsm_16: + case ARM::VST4LNdAsm_32: + case ARM::VST4LNqAsm_16: + case ARM::VST4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_16: Modified: llvm/trunk/test/MC/ARM/neon-vst-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vst-encoding.s?rev=148836&r1=148835&r2=148836&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vst-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vst-encoding.s Tue Jan 24 12:53:13 2012 @@ -210,17 +210,39 @@ @ CHECK: vst3.32 {d5[1], d7[1], d9[1]}, [r4]! @ encoding: [0xcd,0x5a,0x84,0xf4] -@ vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] -@ vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] -@ vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] -@ vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64] -@ vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] - -@ FIXME: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4] -@ FIXME: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4] -@ FIXME: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4] -@ FIXME: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4] -@ FIXME: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4] + vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] + vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] + vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] + vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] + vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] + + vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! + vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! + vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! + vst4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]! + vst4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! + + vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 + vst4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 + vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 + vst4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 + vst4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 + +@ CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xc1,0xf4] +@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xc2,0xf4] +@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xc3,0xf4] +@ CHECK: vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xc7,0xf4] +@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xc8,0xf4] +@ CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xc1,0xf4] +@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xc2,0xf4] +@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xc3,0xf4] +@ CHECK: vst4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xc7,0xf4] +@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xc8,0xf4] +@ CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xc1,0xf4] +@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xc2,0xf4] +@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4] +@ CHECK: vst4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xc6,0xf4] +@ CHECK: vst4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xc9,0xf4] @ Spot-check additional size-suffix aliases. From zinob at codeaurora.org Tue Jan 24 13:24:09 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Tue, 24 Jan 2012 11:24:09 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <901B7A01-6E81-4807-A78F-2922C100117D@2pi.dk> References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> <000c01ccda5b$76b208c0$64161a40$@org> <38F583CE-3516-421A-84C2-46978621E648@apple.com> <901B7A01-6E81-4807-A78F-2922C100117D@2pi.dk> Message-ID: <001c01ccdacd$befdd9c0$3cf98d40$@org> Hi Jacob, Hope this clarifies, see below From: Jakob Stoklund Olesen [mailto:stoklund at 2pi.dk] Sent: Tuesday, January 24, 2012 10:48 AM To: Chad Rosier Cc: Zino Benaissa; rajav at codeaurora.org; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set On Jan 24, 2012, at 10:25 AM, Chad Rosier wrote: On Jan 23, 2012, at 9:46 PM, Zino Benaissa wrote: From: Jakob Stoklund Olesen [mailto:stoklund at 2pi.dk] If you don't mind, I would like you to run a couple of experiments to better understand why this change improves some benchmarks. ? Sure, please let me know what you find. Just to be clear, I believe Jakob was suggesting *you* run the experiments. Oh, sorry if that wasn't clear. First of all, is the regHasSizeImpact() hook necessary? Do you get significantly different results if you pretend this function always returns 2? ? From my experiments, precision is quite important to maximize code size gains. The thing is, the function is using information that isn't yet available at RA time. For example, you look at flags, but they will be changed by the post-RA scheduler moving instructions around. You look at load/store offsets, but they are not filled in until PEI runs. You can't really know which instructions can be converted to 2-address form until after RA etc. So basically, regHasSizeImpact() returns a guess, it has to. Another guess that is much faster to compute is '2'. Yes, this is heuristic by definition it is a guess. The way to look at it is the other way: 1. If the offset of load/store is too large then don?t bother assigning R0-7 2. If both operands of ADD are not kill then don?t bother assigning R0-7 3. If immediate of ADD is too large don?t bother assigning R0-7 The goal is to eliminate as much as possible candidates that compete for R0-R7 so that the RA does a better assignment of R0-R7 (which ultimately increases 16-bits encoding). Returning 2 fails to do this. You may as well return 0 instead of 2 J 1. ?you look at flags, but they will be changed by the post-RA scheduler moving instructions around? the majority of the cases Destination operand will reuse the register of Source 1 operand leading to a 16- bit encoding. 2. ?You look at load/store offsets, but they are not filled in until PEI runs? Is it? What I want to know which guess is better, because if there is only a small difference, we can leave out a lot of code and save compile time. /jakob -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/50d421e3/attachment.html From mcrosier at apple.com Tue Jan 24 13:33:22 2012 From: mcrosier at apple.com (Chad Rosier) Date: Tue, 24 Jan 2012 19:33:22 -0000 Subject: [llvm-commits] [test-suite] r148845 - /test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Message-ID: <20120124193322.B166D2A6C12C@llvm.org> Author: mcrosier Date: Tue Jan 24 13:33:22 2012 New Revision: 148845 URL: http://llvm.org/viewvc/llvm-project?rev=148845&view=rev Log: nbench is still taking too long on embedded devices, so drop the number of iterations. Modified: test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Modified: test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile?rev=148845&r1=148844&r2=148845&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile (original) +++ test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Tue Jan 24 13:33:22 2012 @@ -1,7 +1,7 @@ LEVEL = ../../.. PROG = nbench -CPPFLAGS = -DBASE_ITERATIONS=50 +CPPFLAGS = -DBASE_ITERATIONS=25 LDFLAGS = include ../../Makefile.multisrc From kcc at google.com Tue Jan 24 13:34:43 2012 From: kcc at google.com (Kostya Serebryany) Date: Tue, 24 Jan 2012 19:34:43 -0000 Subject: [llvm-commits] [llvm] r148846 - in /llvm/trunk: lib/Transforms/Instrumentation/AddressSanitizer.cpp test/Instrumentation/AddressSanitizer/bug_11395.ll test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll test/Instrumentation/AddressSanitizer/test64.ll Message-ID: <20120124193443.6EDE52A6C12C@llvm.org> Author: kcc Date: Tue Jan 24 13:34:43 2012 New Revision: 148846 URL: http://llvm.org/viewvc/llvm-project?rev=148846&view=rev Log: [asan] enable asan only for the functions that have Attribute::AddressSafety Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp llvm/trunk/test/Instrumentation/AddressSanitizer/bug_11395.ll llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll llvm/trunk/test/Instrumentation/AddressSanitizer/test64.ll Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=148846&r1=148845&r2=148846&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original) +++ llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Tue Jan 24 13:34:43 2012 @@ -608,6 +608,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + if (!F.hasFnAttr(Attribute::AddressSafety)) return false; if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/bug_11395.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/bug_11395.ll?rev=148846&r1=148845&r2=148846&view=diff ============================================================================== --- llvm/trunk/test/Instrumentation/AddressSanitizer/bug_11395.ll (original) +++ llvm/trunk/test/Instrumentation/AddressSanitizer/bug_11395.ll Tue Jan 24 13:34:43 2012 @@ -36,14 +36,14 @@ @ff_mlp_firorder_7 = external global i8 @ff_mlp_firorder_8 = external global i8 -define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind { +define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind address_safety { entry: %mlp_filter_channel = getelementptr inbounds %struct.DSPContext* %c, i32 0, i32 131 store void (i32*, i32*, i32, i32, i32, i32, i32, i32*)* @mlp_filter_channel_x86, void (i32*, i32*, i32, i32, i32, i32, i32, i32*)** %mlp_filter_channel, align 4, !tbaa !0 ret void } -define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind { +define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind address_safety { entry: %filter_shift.addr = alloca i32, align 4 %mask.addr = alloca i32, align 4 Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll?rev=148846&r1=148845&r2=148846&view=diff ============================================================================== --- llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll (original) +++ llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll Tue Jan 24 13:34:43 2012 @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" -define void @IncrementMe(i32* %a) { +define void @IncrementMe(i32* %a) address_safety { entry: %tmp1 = load i32* %a, align 4 %tmp2 = add i32 %tmp1, 1 Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/test64.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/test64.ll?rev=148846&r1=148845&r2=148846&view=diff ============================================================================== --- llvm/trunk/test/Instrumentation/AddressSanitizer/test64.ll (original) +++ llvm/trunk/test/Instrumentation/AddressSanitizer/test64.ll Tue Jan 24 13:34:43 2012 @@ -1,7 +1,7 @@ ; RUN: opt < %s -asan -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" -define i32 @read_4_bytes(i32* %a) { +define i32 @read_4_bytes(i32* %a) address_safety { entry: %tmp1 = load i32* %a, align 4 ret i32 %tmp1 From mcrosier at apple.com Tue Jan 24 13:40:38 2012 From: mcrosier at apple.com (Chad Rosier) Date: Tue, 24 Jan 2012 11:40:38 -0800 Subject: [llvm-commits] [test-suite] r148727 - /test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile In-Reply-To: <02F378F9-7A42-4CCF-AC73-A2B346BF4314@googlemail.com> References: <20120123212653.83A842A6C12C@llvm.org> <02F378F9-7A42-4CCF-AC73-A2B346BF4314@googlemail.com> Message-ID: <87B7585F-754C-43AF-B075-055838B18788@apple.com> On Jan 24, 2012, at 5:21 AM, Benjamin Kramer wrote: > > On 23.01.2012, at 22:26, Chad Rosier wrote: > >> Author: mcrosier >> Date: Mon Jan 23 15:26:53 2012 >> New Revision: 148727 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148727&view=rev >> Log: >> nbench exceeds the 500s default, so bump it a bit. > > If it runs too slowly on embedded machines it may be a good idea to reduce the BASE_ITERATIONS number to scale the number of iterations down. I tweaked it to take between 10 and 20 seconds on my test machines, which are intel boxes not older than a couple of years. > Good idea; committed revision 148845. > - Ben > >> >> Modified: >> test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile >> >> Modified: test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile >> URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile?rev=148727&r1=148726&r2=148727&view=diff >> ============================================================================== >> --- test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile (original) >> +++ test-suite/trunk/MultiSource/Benchmarks/nbench/Makefile Mon Jan 23 15:26:53 2012 >> @@ -6,6 +6,9 @@ >> >> include ../../Makefile.multisrc >> >> +# This test can take more than the default 500s timeout at -O0. >> +RUNTIMELIMIT:=750 >> + >> # Always copy NNET.DAT so it's available with SRCDIR != OBJDIR builds. >> # FIXME: Hack >> $(shell cp -n $(PROJ_SRC_DIR)/NNET.DAT .) >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From matthewbg at google.com Tue Jan 24 13:43:30 2012 From: matthewbg at google.com (Matt Beaumont-Gay) Date: Tue, 24 Jan 2012 19:43:30 -0000 Subject: [llvm-commits] [llvm] r148849 - /llvm/trunk/lib/Object/ELFObjectFile.cpp Message-ID: <20120124194330.E53852A6C12C@llvm.org> Author: matthewbg Date: Tue Jan 24 13:43:30 2012 New Revision: 148849 URL: http://llvm.org/viewvc/llvm-project?rev=148849&view=rev Log: Sink assert-only variables into the asserts Modified: llvm/trunk/lib/Object/ELFObjectFile.cpp Modified: llvm/trunk/lib/Object/ELFObjectFile.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/ELFObjectFile.cpp?rev=148849&r1=148848&r2=148849&view=diff ============================================================================== --- llvm/trunk/lib/Object/ELFObjectFile.cpp (original) +++ llvm/trunk/lib/Object/ELFObjectFile.cpp Tue Jan 24 13:43:30 2012 @@ -1672,14 +1672,11 @@ new DyldELFObject(Object, MemoryMap, ec); // Unit testing for type inquiry - bool isBinary = isa(result); - bool isDyld = isa >(result); - bool isFile = isa >(result); - (void)isBinary; - (void)isDyld; - (void)isFile; - assert(isBinary && isDyld && isFile && - "Type inquiry failed for ELF object!"); + assert(isa(result) && "Type inquiry failed for ELF object!"); + assert((isa >(result)) && + "Type inquiry failed for ELF object!"); + assert((isa >(result)) && + "Type inquiry failed for ELF object!"); return result; } } @@ -1695,14 +1692,11 @@ new ELFObjectFile(Object, ec); // Unit testing for type inquiry - bool isBinary = isa(result); - bool isDyld = isa >(result); - bool isFile = isa >(result); - (void)isBinary; - (void)isDyld; - (void)isFile; - assert(isBinary && isFile && !isDyld && - "Type inquiry failed for ELF object!"); + assert(isa(result) && "Type inquiry failed for ELF object!"); + assert((!isa >(result)) && + "Type inquiry failed for ELF object!"); + assert((isa >(result)) && + "Type inquiry failed for ELF object!"); return result; } From matthewbg at google.com Tue Jan 24 13:47:35 2012 From: matthewbg at google.com (Matt Beaumont-Gay) Date: Tue, 24 Jan 2012 11:47:35 -0800 Subject: [llvm-commits] [llvm] r148715 - /llvm/trunk/lib/Object/ELFObjectFile.cpp In-Reply-To: <4F1E3F5F.5070909@mxc.ca> References: <20120123184604.9348B2A6C12C@llvm.org> <4F1E3F5F.5070909@mxc.ca> Message-ID: r148849 On Mon, Jan 23, 2012 at 21:19, Nick Lewycky wrote: > Matt Beaumont-Gay wrote: >> >> Author: matthewbg >> Date: Mon Jan 23 12:46:04 2012 >> New Revision: 148715 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148715&view=rev >> Log: >> Silence warnings in -asserts build >> >> Modified: >> ? ? llvm/trunk/lib/Object/ELFObjectFile.cpp >> >> Modified: llvm/trunk/lib/Object/ELFObjectFile.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/ELFObjectFile.cpp?rev=148715&r1=148714&r2=148715&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Object/ELFObjectFile.cpp (original) >> +++ llvm/trunk/lib/Object/ELFObjectFile.cpp Mon Jan 23 12:46:04 2012 >> @@ -1675,6 +1675,9 @@ >> ? ? ? ? ?bool isBinary = isa(result); >> ? ? ? ? ?bool isDyld = isa >> ?>(result); >> ? ? ? ? ?bool isFile = isa >> ?>(result); >> + ? ? ? ?(void)isBinary; >> + ? ? ? ?(void)isDyld; >> + ? ? ? ?(void)isFile; >> ? ? ? ? ?assert(isBinary&& ?isDyld&& ?isFile&& >> ? ? ? ? ? ? ?"Type inquiry failed for ELF object!"); > > > Please sink these values into the assert, even when it is this much code. > (Or in this case, how about multiple asserts, one for each "type inquiry"?) > > Nick > > >> ? ? ? ? ?return result; >> @@ -1695,6 +1698,9 @@ >> ? ? ? ?bool isBinary = isa(result); >> ? ? ? ?bool isDyld = isa ?>(result); >> ? ? ? ?bool isFile = isa ?>(result); >> + ? ? ?(void)isBinary; >> + ? ? ?(void)isDyld; >> + ? ? ?(void)isFile; >> ? ? ? ?assert(isBinary&& ?isFile&& ?!isDyld&& >> ? ? ? ? ? ?"Type inquiry failed for ELF object!"); >> ? ? ? ?return result; >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > From glider at google.com Tue Jan 24 14:07:26 2012 From: glider at google.com (Alexander Potapenko) Date: Wed, 25 Jan 2012 00:07:26 +0400 Subject: [llvm-commits] [llvm] r148846 - in /llvm/trunk: lib/Transforms/Instrumentation/AddressSanitizer.cpp test/Instrumentation/AddressSanitizer/bug_11395.ll test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll test/Instrumentation/Addres Message-ID: Is it convenient to have an attribute that you'll have to define for almost all the functions (IIUC)? From kai at redstar.de Tue Jan 24 14:24:38 2012 From: kai at redstar.de (Kai) Date: Tue, 24 Jan 2012 21:24:38 +0100 Subject: [llvm-commits] [PATCH] TLS support for Windows 32+64bit Message-ID: <4F1F1386.8020508@redstar.de> Hello, the attached patch adds TLS support for x86_64-pc-win32 and x86-pc-win32. Implemented is the implicit TLS model (__declspec(thread) in Visual C++). This is one of the missing pieces for the Win64 port of LDC (the LLVM based D compiler). It should also be useful for implementing the Microsoft specific TLS extension in Clang. Please review and commit if acceptable. Thank you very much! Regards Kai -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: tls-20120124.diff Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/0d8f9ee2/attachment.pl From asl at math.spbu.ru Tue Jan 24 14:34:04 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Wed, 25 Jan 2012 00:34:04 +0400 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub In-Reply-To: References: Message-ID: Hi James, > >From my discussions with colleagues we come to a different conclusion - that it is indeed possible to have the NEONv2 vector integer unit without the floating point unit of VFPv4. > We don't think it's possible to have VFPv4 with NEONv1 however. Well... this conflicts with that doc I mentioned. Where is the truth? :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From kcc at google.com Tue Jan 24 14:50:32 2012 From: kcc at google.com (Kostya Serebryany) Date: Tue, 24 Jan 2012 12:50:32 -0800 Subject: [llvm-commits] [llvm] r148846 - in /llvm/trunk: lib/Transforms/Instrumentation/AddressSanitizer.cpp test/Instrumentation/AddressSanitizer/bug_11395.ll test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll test/Instrumentation/Addres In-Reply-To: References: Message-ID: On Tue, Jan 24, 2012 at 12:07 PM, Alexander Potapenko wrote: > Is it convenient to have an attribute that you'll have to define for > almost all the functions (IIUC)? > Looks like it is, why not? Clang sets this attribute when asan is on. --kcc -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/b457dbe0/attachment.html From hfinkel at anl.gov Tue Jan 24 15:01:47 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Tue, 24 Jan 2012 15:01:47 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327421849.11266.69.camel@sapling> References: <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling > <4F1ED40C.80306@grosser.es> <1327421849.11266.69.camel@sapling> Message-ID: <1327438907.11266.134.camel@sapling> I have attached the latest version of my basic-block autovectorization pass. With regard to the non-trivial cycle checking I had mentioned previously, I implemented the "late abort" solution and made it the default for cases where the full cycle check would be expensive (for blocks that have many candidate pairs). For blocks with fewer candidate pairs, the full cycle check is used. I believe that I have addressed all concerns raised thus far (except for the container Value* -> Instruction* type changes, which Tobias said he would be okay with having changed post commit). If I receive no objections over the next few days, I'll commit. I would like to thank everyone who has provided feedback, many of the suggestions have proved quite valuable. Thanks again, Hal On Tue, 2012-01-24 at 10:17 -0600, Hal Finkel wrote: > On Tue, 2012-01-24 at 16:53 +0100, Tobias Grosser wrote: > > On 01/24/2012 05:13 AM, Hal Finkel wrote: > > > On Tue, 2012-01-17 at 13:25 -0600, Sebastian Pop wrote: > > >> Hi, > > >> > > >> On Fri, Dec 30, 2011 at 3:09 AM, Tobias Grosser wrote: > > >>> As it seems my intuition is wrong, I am very eager to see and understand > > >>> an example where a search limit of 4000 is really needed. > > >>> > > >> > > >> To make the ball roll again, I attached a testcase that can be tuned > > >> to understand the impact on compile time for different sizes of a > > >> basic block. One can also set the number of iterations in the loop to > > >> 1 to test the vectorizer with no loops around. > > >> > > >> Hal, could you please report the compile times with/without the > > >> vectorizer for different basic block sizes? > > > > > > I've looked at your test case, and I am pleased to report a negligible > > > compile-time increase! > > That is nice. But does this example actually trigger the search limit of > > 4000? I think that is the case I am especially interested in. > > I know (and the answer is yes, it could, but not in an interesting way), > but I reduced the default search limit to 400. I did this because, when > used in combination with my load/store-reordering patch, such a high > limit is no longer optimal. As I suspected, it appears that the high > limit was compensating for the lack of the ability to schedule > non-aliasing loads after stores. I would like to deal with the > load/store reording problem on its own merits (and have already > submitted a patch that does this), and so I'll leave the lower default > on the vectorizer search limit. > > In addition, Sebastian's test case highlights why, with the current > implementation, having such a high search limit would be bad for compile > times. A limit in the hundreds, not thousands, is necessary to provide > reasonable compile times for unrolled loops with long dependency chains > such as the ones in his example. > > Thanks again, > Hal > > > > > Cheers > > Tobi > -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory -------------- next part -------------- A non-text attachment was scrubbed... Name: llvm_bb_vectorize-20120124-2.diff Type: text/x-patch Size: 128711 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/bd809a52/attachment-0001.bin From grosbach at apple.com Tue Jan 24 15:06:59 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 21:06:59 -0000 Subject: [llvm-commits] [llvm] r148856 - /llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Message-ID: <20120124210659.C378C2A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 15:06:59 2012 New Revision: 148856 URL: http://llvm.org/viewvc/llvm-project?rev=148856&view=rev Log: Tidy up. Trailing whitespace. Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp?rev=148856&r1=148855&r2=148856&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Tue Jan 24 15:06:59 2012 @@ -285,7 +285,7 @@ /// Register record if this token is singleton register. Record *SingletonReg; - explicit AsmOperand(StringRef T) : Token(T), Class(0), SubOpIdx(-1), + explicit AsmOperand(StringRef T) : Token(T), Class(0), SubOpIdx(-1), SingletonReg(0) {} }; @@ -408,24 +408,24 @@ std::string ConversionFnKind; MatchableInfo(const CodeGenInstruction &CGI) - : AsmVariantID(0), TheDef(CGI.TheDef), DefRec(&CGI), + : AsmVariantID(0), TheDef(CGI.TheDef), DefRec(&CGI), AsmString(CGI.AsmString) { } MatchableInfo(const CodeGenInstAlias *Alias) - : AsmVariantID(0), TheDef(Alias->TheDef), DefRec(Alias), + : AsmVariantID(0), TheDef(Alias->TheDef), DefRec(Alias), AsmString(Alias->AsmString) { } void Initialize(const AsmMatcherInfo &Info, - SmallPtrSet &SingletonRegisters, + SmallPtrSet &SingletonRegisters, int AsmVariantNo, std::string &RegisterPrefix); /// Validate - Return true if this matchable is a valid thing to match against /// and perform a bunch of validity checking. bool Validate(StringRef CommentDelimiter, bool Hack) const; - /// extractSingletonRegisterForAsmOperand - Extract singleton register, + /// extractSingletonRegisterForAsmOperand - Extract singleton register, /// if present, from specified token. void extractSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info, @@ -652,7 +652,7 @@ SmallPtrSet &SingletonRegisters, int AsmVariantNo, std::string &RegisterPrefix) { AsmVariantID = AsmVariantNo; - AsmString = + AsmString = CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo); TokenizeAsmString(Info); @@ -811,10 +811,10 @@ return true; } -/// extractSingletonRegisterForAsmOperand - Extract singleton register, +/// extractSingletonRegisterForAsmOperand - Extract singleton register, /// if present, from specified token. void MatchableInfo:: -extractSingletonRegisterForAsmOperand(unsigned OperandNo, +extractSingletonRegisterForAsmOperand(unsigned OperandNo, const AsmMatcherInfo &Info, std::string &RegisterPrefix) { StringRef Tok = AsmOperands[OperandNo].Token; @@ -823,7 +823,7 @@ if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok)) AsmOperands[OperandNo].SingletonReg = Reg->TheDef; return; - } + } if (!Tok.startswith(RegisterPrefix)) return; @@ -1182,20 +1182,20 @@ std::string CommentDelimiter = AsmVariant->getValueAsString("CommentDelimiter"); std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix"); int AsmVariantNo = AsmVariant->getValueAsInt("Variant"); - + for (CodeGenTarget::inst_iterator I = Target.inst_begin(), E = Target.inst_end(); I != E; ++I) { const CodeGenInstruction &CGI = **I; - + // If the tblgen -match-prefix option is specified (for tblgen hackers), // filter the set of instructions we consider. if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix)) continue; - + // Ignore "codegen only" instructions. if (CGI.TheDef->getValueAsBit("isCodeGenOnly")) continue; - + // Validate the operand list to ensure we can handle this instruction. for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { const CGIOperandList::OperandInfo &OI = CGI.Operands[i]; @@ -1216,47 +1216,47 @@ } } } - + OwningPtr II(new MatchableInfo(CGI)); - + II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); - + // Ignore instructions which shouldn't be matched and diagnose invalid // instruction definitions with an error. if (!II->Validate(CommentDelimiter, true)) continue; - + // Ignore "Int_*" and "*_Int" instructions, which are internal aliases. // // FIXME: This is a total hack. if (StringRef(II->TheDef->getName()).startswith("Int_") || StringRef(II->TheDef->getName()).endswith("_Int")) continue; - + Matchables.push_back(II.take()); } - + // Parse all of the InstAlias definitions and stick them in the list of // matchables. std::vector AllInstAliases = Records.getAllDerivedDefinitions("InstAlias"); for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) { CodeGenInstAlias *Alias = new CodeGenInstAlias(AllInstAliases[i], Target); - + // If the tblgen -match-prefix option is specified (for tblgen hackers), // filter the set of instruction aliases we consider, based on the target // instruction. if (!StringRef(Alias->ResultInst->TheDef->getName()).startswith( MatchPrefix)) continue; - + OwningPtr II(new MatchableInfo(Alias)); - + II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); - + // Validate the alias definitions. II->Validate(CommentDelimiter, false); - + Matchables.push_back(II.take()); } } From ahatanaka at mips.com Tue Jan 24 15:19:12 2012 From: ahatanaka at mips.com (Hatanaka, Akira) Date: Tue, 24 Jan 2012 21:19:12 +0000 Subject: [llvm-commits] [PATCH][Review request] IEEE quad software emulation library call In-Reply-To: References: <95DD8BA8AA50B14BBFB86A1D541FA3809EAB2E46@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA3809EAB2E71@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA3809EAB542D@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA3809EAB5468@exchdb03.mips.com>, Message-ID: <95DD8BA8AA50B14BBFB86A1D541FA3809EAB583D@exchdb03.mips.com> Okay, if that doesn't work, I think I will have to legalize f128 during type legalization. Is the patch fine as it is now? Or does it need further changes? ________________________________________ From: Anton Korobeynikov [anton at korobeynikov.info] Sent: Saturday, January 21, 2012 12:11 PM To: Hatanaka, Akira Cc: Eli Friedman; baldrick at free.fr; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [PATCH][Review request] IEEE quad software emulation library call Hi Akira, > Do you mean I define and add a new register class FGR128RegisterClass and make f128 a legal type? > addRegisterClass(MVT::f128, Mips::FGR128RegisterClass); Ah, probably it will require f128 to be a legal type, yes... On z/System there is hardware support for f128, so it was not a problem. Here most probably it won't work... -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From James.Molloy at arm.com Tue Jan 24 15:23:23 2012 From: James.Molloy at arm.com (James Molloy) Date: Tue, 24 Jan 2012 21:23:23 +0000 Subject: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub In-Reply-To: References: , Message-ID: Hi Anton, Unless I'm missing something, I can't see where the Cortex-A15 TRM mentions what legal combinations of NEON and VFP there are? It's just describing what the A15 implements, not what is theoretically possible. Cheers, James ________________________________________ From: anton at korobeynikov.info [anton at korobeynikov.info] On Behalf Of Anton Korobeynikov [asl at math.spbu.ru] Sent: 24 January 2012 20:34 To: James Molloy Cc: Jim Grosbach; llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub Hi James, > >From my discussions with colleagues we come to a different conclusion - that it is indeed possible to have the NEONv2 vector integer unit without the floating point unit of VFPv4. > We don't think it's possible to have VFPv4 with NEONv1 however. Well... this conflicts with that doc I mentioned. Where is the truth? :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University -- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you. From ahatanaka at mips.com Tue Jan 24 15:41:10 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Tue, 24 Jan 2012 21:41:10 -0000 Subject: [llvm-commits] [llvm] r148862 - in /llvm/trunk: lib/Target/Mips/Mips64InstrInfo.td lib/Target/Mips/MipsInstrInfo.td test/CodeGen/Mips/2008-07-16-SignExtInReg.ll Message-ID: <20120124214110.1991D2A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 15:41:09 2012 New Revision: 148862 URL: http://llvm.org/viewvc/llvm-project?rev=148862&view=rev Log: 64-bit sign extension in register instructions. Modified: llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td llvm/trunk/lib/Target/Mips/MipsInstrInfo.td llvm/trunk/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll Modified: llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td?rev=148862&r1=148861&r2=148862&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td (original) +++ llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td Tue Jan 24 15:41:09 2012 @@ -178,6 +178,10 @@ def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>; def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>; +/// Sign Ext In Register Instructions. +def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>; +def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>; + /// Count Leading def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>; def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; Modified: llvm/trunk/lib/Target/Mips/MipsInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsInstrInfo.td?rev=148862&r1=148861&r2=148862&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsInstrInfo.td (original) +++ llvm/trunk/lib/Target/Mips/MipsInstrInfo.td Tue Jan 24 15:41:09 2012 @@ -616,10 +616,11 @@ } // Sign Extend in Register. -class SignExtInReg sa, string instr_asm, ValueType vt>: - FR<0x1f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt), +class SignExtInReg sa, string instr_asm, ValueType vt, + RegisterClass RC>: + FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt), !strconcat(instr_asm, "\t$rd, $rt"), - [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> { + [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> { let rs = 0; let shamt = sa; let Predicates = [HasSEInReg]; @@ -889,8 +890,8 @@ def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>; /// Sign Ext In Register Instructions. -def SEB : SignExtInReg<0x10, "seb", i8>; -def SEH : SignExtInReg<0x18, "seh", i16>; +def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>; +def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>; /// Count Leading def CLZ : CountLeading0<0x20, "clz", CPURegs>; Modified: llvm/trunk/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll?rev=148862&r1=148861&r2=148862&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll (original) +++ llvm/trunk/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll Tue Jan 24 15:41:09 2012 @@ -1,20 +1,16 @@ -; DISABLED: llc < %s -march=mips -o %t -; DISABLED: grep seh %t | count 1 -; DISABLED: grep seb %t | count 1 -; RUN: false -; XFAIL: * - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-unknown-psp-elf" +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s define signext i8 @A(i8 %e.0, i8 signext %sum) nounwind { entry: +; CHECK: seb add i8 %sum, %e.0 ; :0 [#uses=1] ret i8 %0 } define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind { entry: +; CHECK: seh add i16 %sum, %e.0 ; :0 [#uses=1] ret i16 %0 } From dpatel at apple.com Tue Jan 24 15:43:37 2012 From: dpatel at apple.com (Devang Patel) Date: Tue, 24 Jan 2012 21:43:37 -0000 Subject: [llvm-commits] [llvm] r148864 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp test/MC/X86/intel-syntax-encoding.s Message-ID: <20120124214337.15B1C2A6C12C@llvm.org> Author: dpatel Date: Tue Jan 24 15:43:36 2012 New Revision: 148864 URL: http://llvm.org/viewvc/llvm-project?rev=148864&view=rev Log: Intel Syntax: Extend special hand coded logic, to recognize special instructions, for intel syntax. Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/test/MC/X86/intel-syntax-encoding.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=148864&r1=148863&r2=148864&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Tue Jan 24 15:43:36 2012 @@ -1168,11 +1168,21 @@ Name.startswith("rcl") || Name.startswith("rcr") || Name.startswith("rol") || Name.startswith("ror")) && Operands.size() == 3) { - X86Operand *Op1 = static_cast(Operands[1]); - if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 1) { - delete Operands[1]; - Operands.erase(Operands.begin() + 1); + if (getParser().getAssemblerDialect()) { + // Intel syntax + X86Operand *Op1 = static_cast(Operands[2]); + if (Op1->isImm() && isa(Op1->getImm()) && + cast(Op1->getImm())->getValue() == 1) { + delete Operands[2]; + Operands.pop_back(); + } + } else { + X86Operand *Op1 = static_cast(Operands[1]); + if (Op1->isImm() && isa(Op1->getImm()) && + cast(Op1->getImm())->getValue() == 1) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); + } } } Modified: llvm/trunk/test/MC/X86/intel-syntax-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax-encoding.s?rev=148864&r1=148863&r2=148864&view=diff ============================================================================== --- llvm/trunk/test/MC/X86/intel-syntax-encoding.s (original) +++ llvm/trunk/test/MC/X86/intel-syntax-encoding.s Tue Jan 24 15:43:36 2012 @@ -36,3 +36,6 @@ jmp LBB0_3 // CHECK: encoding: [0xf2,0x0f,0x10,0x2c,0x25,0xf8,0xff,0xff,0xff] movsd XMM5, QWORD PTR [-8] + +// CHECK: encoding: [0xd1,0xe7] + shl EDI, 1 From grosbach at apple.com Tue Jan 24 15:45:25 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 21:45:25 -0000 Subject: [llvm-commits] [llvm] r148865 - in /llvm/trunk: lib/MC/MachObjectWriter.cpp test/MC/MachO/ARM/no-subsections-reloc.s Message-ID: <20120124214525.630002A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 15:45:25 2012 New Revision: 148865 URL: http://llvm.org/viewvc/llvm-project?rev=148865&view=rev Log: ARM Darwin symbol ref differences w/o subsection-via-symbols. When not using subsections via symbols, the assembler can resolve symbol differences (including pcrel references) to non-local labels at assembly time, not just those in the same atom. Added: llvm/trunk/test/MC/MachO/ARM/no-subsections-reloc.s Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MachObjectWriter.cpp?rev=148865&r1=148864&r2=148865&view=diff ============================================================================== --- llvm/trunk/lib/MC/MachObjectWriter.cpp (original) +++ llvm/trunk/lib/MC/MachObjectWriter.cpp Tue Jan 24 15:45:25 2012 @@ -592,7 +592,8 @@ if (!Asm.getBackend().hasReliableSymbolDifference()) { if (!SA.isInSection() || &SecA != &SecB || (!SA.isTemporary() && - FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom())) + FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() && + Asm.getSubsectionsViaSymbols())) return false; return true; } Added: llvm/trunk/test/MC/MachO/ARM/no-subsections-reloc.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/ARM/no-subsections-reloc.s?rev=148865&view=auto ============================================================================== --- llvm/trunk/test/MC/MachO/ARM/no-subsections-reloc.s (added) +++ llvm/trunk/test/MC/MachO/ARM/no-subsections-reloc.s Tue Jan 24 15:45:25 2012 @@ -0,0 +1,18 @@ +@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj +@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump +@ RUN: FileCheck < %t.dump %s + +@ When not using subsections-via-symbols, references to non-local symbols +@ in the same section can be resolved at assembly time w/o relocations. + + .syntax unified + .text + .thumb + .thumb_func _foo +_foo: + ldr r3, bar +bar: + .long 0 + +@ CHECK: 'num_reloc', 0 +@ CHECK: '_section_data', 'dff80030 00000000' From spop at codeaurora.org Tue Jan 24 16:08:07 2012 From: spop at codeaurora.org (Sebastian Pop) Date: Tue, 24 Jan 2012 16:08:07 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327378420.32397.1603.camel@sapling> References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> Message-ID: On Mon, Jan 23, 2012 at 10:13 PM, Hal Finkel wrote: > On Tue, 2012-01-17 at 13:25 -0600, Sebastian Pop wrote: >> Hi, >> >> On Fri, Dec 30, 2011 at 3:09 AM, Tobias Grosser wrote: >> > As it seems my intuition is wrong, I am very eager to see and understand >> > an example where a search limit of 4000 is really needed. >> > >> >> To make the ball roll again, I attached a testcase that can be tuned >> to understand the impact on compile time for different sizes of a >> basic block. ?One can also set the number of iterations in the loop to >> 1 to test the vectorizer with no loops around. >> >> Hal, could you please report the compile times with/without the >> vectorizer for different basic block sizes? > > I've looked at your test case, and I am pleased to report a negligible > compile-time increase! Also, there is no vectorization of the main Good! > loop :) Here's why: (as you know) the main part of the loop is > essentially one long dependency chain, and so there is nothing to > vectorize there. The only vectorization opportunities come from > unrolling the loop. Using the default thresholds, the loop will not even > partially unroll (because the body is too large). As a result, > essentially nothing happens. > > I've prepared a reduced version of your test case (attached). Using > -unroll-threshold=300 (along with -unroll-allow-partial), I can make the > loop unroll partially (the reduced loop size is 110, so this allows > unrolling 2 iterations). Once this is done, the vectorizer finds > candidate pairs and vectorizes [as a practical manner, you need -basicaa > too]. > > I think that even this is probably too big for a regression test. I > don't think that the basic structure really adds anything over existing > tests (although I need to make sure that alias-analysis use is otherwise > covered), but I'll copy-and-paste a small portion into a regression test > to cover the search limit logic (which is currently uncovered). We > should probably discuss different situations that we'd like to see > covered in the regression suite (perhaps post-commit). > > Thanks for working on this! I'll post an updated patch for review > shortly. Thanks for the new patch. I will send you some more comments on the patch as I'm advancing through testing: I found some interesting benchmarks in which enabling vectorization gets the performance down by 80% on ARM. I will prepare a reduced testcase and try to find out the reason. As a first shot, I would say that this comes from the vectorization of code in a loop and the overhead of transfer between scalar and vector registers. I would like to not stop you from committing the patch just because of performance issues: let's address any further improvements once the patch is installed on tot. Thanks again, Sebastian -- Qualcomm Innovation Center, Inc is a member of Code Aurora Forum From ahatanaka at mips.com Tue Jan 24 16:05:25 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Tue, 24 Jan 2012 22:05:25 -0000 Subject: [llvm-commits] [llvm] r148869 - in /llvm/trunk: lib/Target/Mips/MipsInstrFPU.td test/CodeGen/Mips/mips64ext.ll Message-ID: <20120124220525.9EFFF2A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 16:05:25 2012 New Revision: 148869 URL: http://llvm.org/viewvc/llvm-project?rev=148869&view=rev Log: Pattern for f32 to i64 conversion. Modified: llvm/trunk/lib/Target/Mips/MipsInstrFPU.td llvm/trunk/test/CodeGen/Mips/mips64ext.ll Modified: llvm/trunk/lib/Target/Mips/MipsInstrFPU.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsInstrFPU.td?rev=148869&r1=148868&r2=148869&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsInstrFPU.td (original) +++ llvm/trunk/lib/Target/Mips/MipsInstrFPU.td Tue Jan 24 16:05:25 2012 @@ -337,6 +337,7 @@ (CVT_D64_L (DMTC1 CPU64Regs:$src))>; def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>; + def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>; def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; Modified: llvm/trunk/test/CodeGen/Mips/mips64ext.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/mips64ext.ll?rev=148869&r1=148868&r2=148869&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Mips/mips64ext.ll (original) +++ llvm/trunk/test/CodeGen/Mips/mips64ext.ll Tue Jan 24 16:05:25 2012 @@ -17,3 +17,10 @@ ret i64 %conv } +define i64 @i64_float(float %f) nounwind readnone { +entry: +; CHECK: trunc.l.s + %conv = fptosi float %f to i64 + ret i64 %conv +} + From ahatanaka at mips.com Tue Jan 24 16:07:37 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Tue, 24 Jan 2012 22:07:37 -0000 Subject: [llvm-commits] [llvm] r148871 - /llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Message-ID: <20120124220737.2D45E2A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 16:07:36 2012 New Revision: 148871 URL: http://llvm.org/viewvc/llvm-project?rev=148871&view=rev Log: Pass CCState by reference. Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp?rev=148871&r1=148870&r2=148871&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Tue Jan 24 16:07:36 2012 @@ -1946,7 +1946,7 @@ #include "MipsGenCallingConv.inc" static void -AnalyzeMips64CallOperands(CCState CCInfo, +AnalyzeMips64CallOperands(CCState &CCInfo, const SmallVectorImpl &Outs) { unsigned NumOps = Outs.size(); for (unsigned i = 0; i != NumOps; ++i) { From ahatanaka at mips.com Tue Jan 24 17:18:43 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Tue, 24 Jan 2012 23:18:43 -0000 Subject: [llvm-commits] [llvm] r148875 - /llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Message-ID: <20120124231843.6655A2A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 17:18:43 2012 New Revision: 148875 URL: http://llvm.org/viewvc/llvm-project?rev=148875&view=rev Log: Sign-extend 32-bit integer arguments when they are passed in 64-bit registers, which is what N32/64 does. Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp?rev=148875&r1=148874&r2=148875&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Tue Jan 24 17:18:43 2012 @@ -2299,7 +2299,10 @@ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg); break; case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg); + if (ValVT == MVT::i32) + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg); + else + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg); break; } From daniel at zuster.org Tue Jan 24 17:28:28 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 24 Jan 2012 23:28:28 -0000 Subject: [llvm-commits] [zorg] r148877 - in /zorg/trunk/lnt/lnt/server/ui: templates/v4_graph.html views.py Message-ID: <20120124232828.D3AB62A6C12C@llvm.org> Author: ddunbar Date: Tue Jan 24 17:28:28 2012 New Revision: 148877 URL: http://llvm.org/viewvc/llvm-project?rev=148877&view=rev Log: [lnt/v0.4] Add some hacky code to autoconvert graphs from machines which report revisions as "" into a date format. The 2011 to 2012 jump otherwise makes graphs completely unreadable. Modified: zorg/trunk/lnt/lnt/server/ui/templates/v4_graph.html zorg/trunk/lnt/lnt/server/ui/views.py Modified: zorg/trunk/lnt/lnt/server/ui/templates/v4_graph.html URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/lnt/lnt/server/ui/templates/v4_graph.html?rev=148877&r1=148876&r2=148877&view=diff ============================================================================== --- zorg/trunk/lnt/lnt/server/ui/templates/v4_graph.html (original) +++ zorg/trunk/lnt/lnt/server/ui/templates/v4_graph.html Tue Jan 24 17:28:28 2012 @@ -24,8 +24,13 @@ {% for plot in graph_plots %} {{ plot }} {% endfor %} - + +{% if use_day_axis %} + graph.xAxis.format = graph.xAxis.formats.day; +{% else %} graph.xAxis.format = graph.xAxis.formats.normal; +{% endif %} + graph.draw(); } {% endblock %} Modified: zorg/trunk/lnt/lnt/server/ui/views.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/lnt/lnt/server/ui/views.py?rev=148877&r1=148876&r2=148877&view=diff ============================================================================== --- zorg/trunk/lnt/lnt/server/ui/views.py (original) +++ zorg/trunk/lnt/lnt/server/ui/views.py Tue Jan 24 17:28:28 2012 @@ -923,6 +923,7 @@ graph_plots = [] num_points = 0 num_plots = len(graph_tests) + use_day_axis = None for i,(test,field) in enumerate(graph_tests): # Determine the base plot color. col = list(util.makeDarkColor(float(i) / num_plots)) @@ -949,6 +950,30 @@ for v,r in q).items() data.sort() + # Infer whether or not we should use a day axis. This is a total hack to + # try and get graphs of machines which report in the %04Y%02M%02D format + # to look readable. + # + # We only do this detection for the first test. + if use_day_axis is None: + if data: + use_day_axis = (20000000 <= data[0][0] < 20990000) + else: + use_day_axis = False + + # If we are using a day axis, convert the keys into seconds since the + # epoch. + if use_day_axis: + def convert((x,y)): + year = x//10000 + month = (x//100) % 100 + day = x % 100 + seconds = datetime.datetime + timestamp = time.mktime((year, month, day, + 0, 0, 0, 0, 0, 0)) + return (timestamp,y) + data = map(convert, data) + # Compute the graph points. errorbar_data = [] points_data = [] @@ -1037,4 +1062,5 @@ compare_to=compare_to, options=options, num_plots=num_plots, num_points=num_points, neighboring_runs=neighboring_runs, - graph_plots=graph_plots, legend=legend) + graph_plots=graph_plots, legend=legend, + use_day_axis=use_day_axis) From stoklund at 2pi.dk Tue Jan 24 17:28:38 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 24 Jan 2012 23:28:38 -0000 Subject: [llvm-commits] [llvm] r148878 - /llvm/trunk/lib/CodeGen/SlotIndexes.cpp Message-ID: <20120124232838.CF76C2A6C12C@llvm.org> Author: stoklund Date: Tue Jan 24 17:28:38 2012 New Revision: 148878 URL: http://llvm.org/viewvc/llvm-project?rev=148878&view=rev Log: Use the standard MachineFunction::print() after SlotIndexes. Modified: llvm/trunk/lib/CodeGen/SlotIndexes.cpp Modified: llvm/trunk/lib/CodeGen/SlotIndexes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SlotIndexes.cpp?rev=148878&r1=148877&r2=148878&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SlotIndexes.cpp (original) +++ llvm/trunk/lib/CodeGen/SlotIndexes.cpp Tue Jan 24 17:28:38 2012 @@ -106,7 +106,7 @@ // Sort the Idx2MBBMap std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); - DEBUG(dump()); + DEBUG(mf->print(dbgs(), this)); // And we're done! return false; From stoklund at 2pi.dk Tue Jan 24 17:28:42 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 24 Jan 2012 23:28:42 -0000 Subject: [llvm-commits] [llvm] r148879 - in /llvm/trunk: lib/CodeGen/TwoAddressInstructionPass.cpp test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll Message-ID: <20120124232842.AE4D92A6C12C@llvm.org> Author: stoklund Date: Tue Jan 24 17:28:42 2012 New Revision: 148879 URL: http://llvm.org/viewvc/llvm-project?rev=148879&view=rev Log: Set correct flags when lowering REG_SEQUENCE. A REG_SEQUENCE instruction is lowered into a sequence of partial defs: %vreg7:ssub_0 = COPY %vreg20:ssub_0 %vreg7:ssub_1 = COPY %vreg2 %vreg7:ssub_2 = COPY %vreg2 %vreg7:ssub_3 = COPY %vreg2 The first def needs an flag to indicate it is the beginning of the live range, while the other defs are read-modify-write. Previously, we depended on LiveIntervalAnalysis to notice and fix the missing , but that solution was never robust, it was causing problems with ProcessImplicitDefs and the lowering of chained REG_SEQUENCE instructions. This fixes PR11841. Added: llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp llvm/trunk/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=148879&r1=148878&r2=148879&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Tue Jan 24 17:28:42 2012 @@ -1646,6 +1646,36 @@ } } +// Find the first def of Reg, assuming they are all in the same basic block. +static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) { + SmallPtrSet Defs; + MachineInstr *First = 0; + for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg); + MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI)) + First = MI; + if (!First) + return 0; + + MachineBasicBlock *MBB = First->getParent(); + MachineBasicBlock::iterator A = First, B = First; + bool Moving; + do { + Moving = false; + if (A != MBB->begin()) { + Moving = true; + --A; + if (Defs.erase(A)) First = A; + } + if (B != MBB->end()) { + Defs.erase(B); + ++B; + Moving = true; + } + } while (Moving && !Defs.empty()); + assert(Defs.empty() && "Instructions outside basic block!"); + return First; +} + /// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are /// EXTRACT_SUBREG from the same register and to the same virtual register /// with different sub-register indices, attempt to combine the @@ -1874,6 +1904,22 @@ UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); } + // Set flags on the first DstReg def in the basic block. + // It marks the beginning of the live range. All the other defs are + // read-modify-write. + if (MachineInstr *Def = findFirstDef(DstReg, MRI)) { + for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { + MachineOperand &MO = Def->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) + MO.setIsUndef(); + } + // Make sure there is a full non-subreg imp-def operand on the + // instruction. This shouldn't be necessary, but it seems that at least + // RAFast requires it. + Def->addRegisterDefined(DstReg, TRI); + DEBUG(dbgs() << "First def: " << *Def); + } + if (IsImpDef) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); Modified: llvm/trunk/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll?rev=148879&r1=148878&r2=148879&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll (original) +++ llvm/trunk/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll Tue Jan 24 17:28:42 2012 @@ -12,7 +12,7 @@ ; CHECK: vld1.64 {d16, d17}, [r{{.}}] ; CHECK-NOT: vld1.64 {d16, d17} -; CHECK: vmov.f64 d19, d16 +; CHECK: vmov.f64 define i32 @test(i8* %arg) nounwind { entry: Added: llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll?rev=148879&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll (added) +++ llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll Tue Jan 24 17:28:42 2012 @@ -0,0 +1,43 @@ +; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing +; PR11841 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-eabi" + +; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE. +define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 { +bb: + %tmp = load <2 x float>* undef, align 8, !tbaa !0 + %tmp2 = extractelement <2 x float> %tmp, i32 0 + %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0 + %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1 + %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2 + %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3 + %tmp7 = extractelement <2 x float> %tmp, i32 1 + %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1 + %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2 + %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3 + %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64> + %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float> + %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer + %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64> + %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float> + %tmp18 = extractelement <2 x float> %tmp17, i32 0 + tail call arm_aapcs_vfpcc void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind + %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64> + %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float> + %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> + %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64> + %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float> + %tmp26 = extractelement <2 x float> %tmp25, i32 0 + tail call arm_aapcs_vfpcc void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind + ret void +} + +declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} From viridia at gmail.com Tue Jan 24 17:43:59 2012 From: viridia at gmail.com (Talin) Date: Tue, 24 Jan 2012 23:43:59 -0000 Subject: [llvm-commits] [llvm] r148881 - in /llvm/trunk: include/llvm/ADT/SmallString.h unittests/ADT/SmallStringTest.cpp Message-ID: <20120124234359.51DCF2A6C12C@llvm.org> Author: talin Date: Tue Jan 24 17:43:59 2012 New Revision: 148881 URL: http://llvm.org/viewvc/llvm-project?rev=148881&view=rev Log: Additional methods for SmallString. Modified: llvm/trunk/include/llvm/ADT/SmallString.h llvm/trunk/unittests/ADT/SmallStringTest.cpp Modified: llvm/trunk/include/llvm/ADT/SmallString.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallString.h?rev=148881&r1=148880&r2=148881&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallString.h (original) +++ llvm/trunk/include/llvm/ADT/SmallString.h Tue Jan 24 17:43:59 2012 @@ -24,21 +24,244 @@ template class SmallString : public SmallVector { public: - // Default ctor - Initialize to empty. + /// Default ctor - Initialize to empty. SmallString() {} - // Initialize from a StringRef. + /// Initialize from a StringRef. SmallString(StringRef S) : SmallVector(S.begin(), S.end()) {} - // Initialize with a range. + /// Initialize with a range. template SmallString(ItTy S, ItTy E) : SmallVector(S, E) {} - // Copy ctor. + /// Copy ctor. SmallString(const SmallString &RHS) : SmallVector(RHS) {} + // Note that in order to add new overloads for append & assign, we have to + // duplicate the inherited versions so as not to inadvertently hide them. + + /// @} + /// @name String Assignment + /// @{ + + /// Assign from a repeated element + void assign(unsigned NumElts, char Elt) { + this->SmallVectorImpl::assign(NumElts, Elt); + } + + /// Assign from an iterator pair + template + void assign(in_iter S, in_iter E) { + this->clear(); + SmallVectorImpl::append(S, E); + } + + /// Assign from a StringRef + void assign(StringRef RHS) { + this->clear(); + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// Assign from a SmallVector + void assign(const SmallVectorImpl &RHS) { + this->clear(); + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// @} + /// @name String Concatenation + /// @{ + + /// Append from an iterator pair + template + void append(in_iter S, in_iter E) { + SmallVectorImpl::append(S, E); + } + + /// Append from a StringRef + void append(StringRef RHS) { + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// Append from a SmallVector + void append(const SmallVectorImpl &RHS) { + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// @} + /// @name String Comparison + /// @{ + + /// equals - Check for string equality, this is more efficient than + /// compare() when the relative ordering of inequal strings isn't needed. + bool equals(StringRef RHS) const { + return str().equals(RHS); + } + + /// equals_lower - Check for string equality, ignoring case. + bool equals_lower(StringRef RHS) const { + return str().equals_lower(RHS); + } + + /// compare - Compare two strings; the result is -1, 0, or 1 if this string + /// is lexicographically less than, equal to, or greater than the \arg RHS. + int compare(StringRef RHS) const { + return str().compare(RHS); + } + + /// compare_lower - Compare two strings, ignoring case. + int compare_lower(StringRef RHS) const { + return str().compare_lower(RHS); + } + + /// compare_numeric - Compare two strings, treating sequences of digits as + /// numbers. + int compare_numeric(StringRef RHS) const { + return str().compare_numeric(RHS); + } + + /// @} + /// @name String Predicates + /// @{ + + /// startswith - Check if this string starts with the given \arg Prefix. + bool startswith(StringRef Prefix) const { + return str().startswith(Prefix); + } + + /// endswith - Check if this string ends with the given \arg Suffix. + bool endswith(StringRef Suffix) const { + return str().endswith(Suffix); + } + + /// @} + /// @name String Searching + /// @{ + + /// find - Search for the first character \arg C in the string. + /// + /// \return - The index of the first occurrence of \arg C, or npos if not + /// found. + size_t find(char C, size_t From = 0) const { + return str().find(C, From); + } + + /// find - Search for the first string \arg Str in the string. + /// + /// \return - The index of the first occurrence of \arg Str, or npos if not + /// found. + size_t find(StringRef Str, size_t From = 0) const { + return str().find(Str, From); + } + + /// rfind - Search for the last character \arg C in the string. + /// + /// \return - The index of the last occurrence of \arg C, or npos if not + /// found. + size_t rfind(char C, size_t From = StringRef::npos) const { + return str().rfind(C, From); + } + + /// rfind - Search for the last string \arg Str in the string. + /// + /// \return - The index of the last occurrence of \arg Str, or npos if not + /// found. + size_t rfind(StringRef Str) const { + return str().rfind(Str); + } + + /// find_first_of - Find the first character in the string that is \arg C, + /// or npos if not found. Same as find. + size_t find_first_of(char C, size_t From = 0) const { + return str().find_first_of(C, From); + } + + /// find_first_of - Find the first character in the string that is in \arg + /// Chars, or npos if not found. + /// + /// Note: O(size() + Chars.size()) + size_t find_first_of(StringRef Chars, size_t From = 0) const { + return str().find_first_of(Chars, From); + } + + /// find_first_not_of - Find the first character in the string that is not + /// \arg C or npos if not found. + size_t find_first_not_of(char C, size_t From = 0) const { + return str().find_first_not_of(C, From); + } + + /// find_first_not_of - Find the first character in the string that is not + /// in the string \arg Chars, or npos if not found. + /// + /// Note: O(size() + Chars.size()) + size_t find_first_not_of(StringRef Chars, size_t From = 0) const { + return str().find_first_not_of(Chars, From); + } + + /// find_last_of - Find the last character in the string that is \arg C, or + /// npos if not found. + size_t find_last_of(char C, size_t From = StringRef::npos) const { + return str().find_last_of(C, From); + } + + /// find_last_of - Find the last character in the string that is in \arg C, + /// or npos if not found. + /// + /// Note: O(size() + Chars.size()) + size_t find_last_of( + StringRef Chars, size_t From = StringRef::npos) const { + return str().find_last_of(Chars, From); + } + + /// @} + /// @name Helpful Algorithms + /// @{ + + /// count - Return the number of occurrences of \arg C in the string. + size_t count(char C) const { + return str().count(C); + } + + /// count - Return the number of non-overlapped occurrences of \arg Str in + /// the string. + size_t count(StringRef Str) const { + return str().count(Str); + } + + /// @} + /// @name Substring Operations + /// @{ + + /// substr - Return a reference to the substring from [Start, Start + N). + /// + /// \param Start - The index of the starting character in the substring; if + /// the index is npos or greater than the length of the string then the + /// empty substring will be returned. + /// + /// \param N - The number of characters to included in the substring. If N + /// exceeds the number of characters remaining in the string, the string + /// suffix (starting with \arg Start) will be returned. + StringRef substr(size_t Start, size_t N = StringRef::npos) const { + return str().substr(Start, N); + } + + /// slice - Return a reference to the substring from [Start, End). + /// + /// \param Start - The index of the starting character in the substring; if + /// the index is npos or greater than the length of the string then the + /// empty substring will be returned. + /// + /// \param End - The index following the last character to include in the + /// substring. If this is npos, or less than \arg Start, or exceeds the + /// number of characters remaining in the string, the string suffix + /// (starting with \arg Start) will be returned. + StringRef slice(size_t Start, size_t End) const { + return str().slice(Start, End); + } // Extra methods. + + /// Explicit conversion to StringRef StringRef str() const { return StringRef(this->begin(), this->size()); } // TODO: Make this const, if it's safe... @@ -48,7 +271,7 @@ return this->data(); } - // Implicit conversion to StringRef. + /// Implicit conversion to StringRef. operator StringRef() const { return str(); } // Extra operators. Modified: llvm/trunk/unittests/ADT/SmallStringTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/SmallStringTest.cpp?rev=148881&r1=148880&r2=148881&view=diff ============================================================================== --- llvm/trunk/unittests/ADT/SmallStringTest.cpp (original) +++ llvm/trunk/unittests/ADT/SmallStringTest.cpp Tue Jan 24 17:43:59 2012 @@ -44,5 +44,153 @@ EXPECT_TRUE(theString.rbegin() == theString.rend()); } +TEST_F(SmallStringTest, AssignRepeated) { + theString.assign(3, 'a'); + EXPECT_EQ(3u, theString.size()); + EXPECT_STREQ("aaa", theString.c_str()); } +TEST_F(SmallStringTest, AssignIterPair) { + StringRef abc = "abc"; + theString.assign(abc.begin(), abc.end()); + EXPECT_EQ(3u, theString.size()); + EXPECT_STREQ("abc", theString.c_str()); +} + +TEST_F(SmallStringTest, AssignStringRef) { + StringRef abc = "abc"; + theString.assign(abc); + EXPECT_EQ(3u, theString.size()); + EXPECT_STREQ("abc", theString.c_str()); +} + +TEST_F(SmallStringTest, AssignSmallVector) { + StringRef abc = "abc"; + SmallVector abcVec(abc.begin(), abc.end()); + theString.assign(abcVec); + EXPECT_EQ(3u, theString.size()); + EXPECT_STREQ("abc", theString.c_str()); +} + +TEST_F(SmallStringTest, AppendIterPair) { + StringRef abc = "abc"; + theString.append(abc.begin(), abc.end()); + theString.append(abc.begin(), abc.end()); + EXPECT_EQ(6u, theString.size()); + EXPECT_STREQ("abcabc", theString.c_str()); +} + +TEST_F(SmallStringTest, AppendStringRef) { + StringRef abc = "abc"; + theString.append(abc); + theString.append(abc); + EXPECT_EQ(6u, theString.size()); + EXPECT_STREQ("abcabc", theString.c_str()); +} + +TEST_F(SmallStringTest, AppendSmallVector) { + StringRef abc = "abc"; + SmallVector abcVec(abc.begin(), abc.end()); + theString.append(abcVec); + theString.append(abcVec); + EXPECT_EQ(6u, theString.size()); + EXPECT_STREQ("abcabc", theString.c_str()); +} + +TEST_F(SmallStringTest, Substr) { + theString = "hello"; + EXPECT_EQ("lo", theString.substr(3)); + EXPECT_EQ("", theString.substr(100)); + EXPECT_EQ("hello", theString.substr(0, 100)); + EXPECT_EQ("o", theString.substr(4, 10)); +} + +TEST_F(SmallStringTest, Slice) { + theString = "hello"; + EXPECT_EQ("l", theString.slice(2, 3)); + EXPECT_EQ("ell", theString.slice(1, 4)); + EXPECT_EQ("llo", theString.slice(2, 100)); + EXPECT_EQ("", theString.slice(2, 1)); + EXPECT_EQ("", theString.slice(10, 20)); +} + +TEST_F(SmallStringTest, Find) { + theString = "hello"; + EXPECT_EQ(2U, theString.find('l')); + EXPECT_EQ(StringRef::npos, theString.find('z')); + EXPECT_EQ(StringRef::npos, theString.find("helloworld")); + EXPECT_EQ(0U, theString.find("hello")); + EXPECT_EQ(1U, theString.find("ello")); + EXPECT_EQ(StringRef::npos, theString.find("zz")); + EXPECT_EQ(2U, theString.find("ll", 2)); + EXPECT_EQ(StringRef::npos, theString.find("ll", 3)); + EXPECT_EQ(0U, theString.find("")); + + EXPECT_EQ(3U, theString.rfind('l')); + EXPECT_EQ(StringRef::npos, theString.rfind('z')); + EXPECT_EQ(StringRef::npos, theString.rfind("helloworld")); + EXPECT_EQ(0U, theString.rfind("hello")); + EXPECT_EQ(1U, theString.rfind("ello")); + EXPECT_EQ(StringRef::npos, theString.rfind("zz")); + + EXPECT_EQ(2U, theString.find_first_of('l')); + EXPECT_EQ(1U, theString.find_first_of("el")); + EXPECT_EQ(StringRef::npos, theString.find_first_of("xyz")); + + EXPECT_EQ(1U, theString.find_first_not_of('h')); + EXPECT_EQ(4U, theString.find_first_not_of("hel")); + EXPECT_EQ(StringRef::npos, theString.find_first_not_of("hello")); + + theString = "hellx xello hell ello world foo bar hello"; + EXPECT_EQ(36U, theString.find("hello")); + EXPECT_EQ(28U, theString.find("foo")); + EXPECT_EQ(12U, theString.find("hell", 2)); + EXPECT_EQ(0U, theString.find("")); +} + +TEST_F(SmallStringTest, Count) { + theString = "hello"; + EXPECT_EQ(2U, theString.count('l')); + EXPECT_EQ(1U, theString.count('o')); + EXPECT_EQ(0U, theString.count('z')); + EXPECT_EQ(0U, theString.count("helloworld")); + EXPECT_EQ(1U, theString.count("hello")); + EXPECT_EQ(1U, theString.count("ello")); + EXPECT_EQ(0U, theString.count("zz")); +} + +TEST(StringRefTest, Comparisons) { + EXPECT_EQ(-1, SmallString<10>("aab").compare("aad")); + EXPECT_EQ( 0, SmallString<10>("aab").compare("aab")); + EXPECT_EQ( 1, SmallString<10>("aab").compare("aaa")); + EXPECT_EQ(-1, SmallString<10>("aab").compare("aabb")); + EXPECT_EQ( 1, SmallString<10>("aab").compare("aa")); + EXPECT_EQ( 1, SmallString<10>("\xFF").compare("\1")); + + EXPECT_EQ(-1, SmallString<10>("AaB").compare_lower("aAd")); + EXPECT_EQ( 0, SmallString<10>("AaB").compare_lower("aab")); + EXPECT_EQ( 1, SmallString<10>("AaB").compare_lower("AAA")); + EXPECT_EQ(-1, SmallString<10>("AaB").compare_lower("aaBb")); + EXPECT_EQ( 1, SmallString<10>("AaB").compare_lower("aA")); + EXPECT_EQ( 1, SmallString<10>("\xFF").compare_lower("\1")); + + EXPECT_EQ(-1, SmallString<10>("aab").compare_numeric("aad")); + EXPECT_EQ( 0, SmallString<10>("aab").compare_numeric("aab")); + EXPECT_EQ( 1, SmallString<10>("aab").compare_numeric("aaa")); + EXPECT_EQ(-1, SmallString<10>("aab").compare_numeric("aabb")); + EXPECT_EQ( 1, SmallString<10>("aab").compare_numeric("aa")); + EXPECT_EQ(-1, SmallString<10>("1").compare_numeric("10")); + EXPECT_EQ( 0, SmallString<10>("10").compare_numeric("10")); + EXPECT_EQ( 0, SmallString<10>("10a").compare_numeric("10a")); + EXPECT_EQ( 1, SmallString<10>("2").compare_numeric("1")); + EXPECT_EQ( 0, SmallString<10>("llvm_v1i64_ty").compare_numeric("llvm_v1i64_ty")); + EXPECT_EQ( 1, SmallString<10>("\xFF").compare_numeric("\1")); + EXPECT_EQ( 1, SmallString<10>("V16").compare_numeric("V1_q0")); + EXPECT_EQ(-1, SmallString<10>("V1_q0").compare_numeric("V16")); + EXPECT_EQ(-1, SmallString<10>("V8_q0").compare_numeric("V16")); + EXPECT_EQ( 1, SmallString<10>("V16").compare_numeric("V8_q0")); + EXPECT_EQ(-1, SmallString<10>("V1_q0").compare_numeric("V8_q0")); + EXPECT_EQ( 1, SmallString<10>("V8_q0").compare_numeric("V1_q0")); +} + +} From grosbach at apple.com Tue Jan 24 17:47:04 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 23:47:04 -0000 Subject: [llvm-commits] [llvm] r148882 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.h test/MC/ARM/neon-vld-encoding.s Message-ID: <20120124234704.8ED362A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 17:47:04 2012 New Revision: 148882 URL: http://llvm.org/viewvc/llvm-project?rev=148882&view=rev Log: NEON VLD3(all lanes) assembly parsing and encoding. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h llvm/trunk/test/MC/ARM/neon-vld-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148882&r1=148881&r2=148882&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 17:47:04 2012 @@ -171,6 +171,27 @@ "printVectorListTwoSpacedAllLanes"> { let ParserMatchClass = VecListTwoQAllLanesAsmOperand; } +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeDAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; +} +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} + // Register list of one D register, with byte lane subscripting. def VecListOneDByteIndexAsmOperand : AsmOperandClass { @@ -1433,9 +1454,9 @@ def VLD3DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): -def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB op7_4, string Dt> @@ -1451,9 +1472,9 @@ def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; -def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; @@ -6036,6 +6057,64 @@ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VLD3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148882&r1=148881&r2=148882&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Jan 24 17:47:04 2012 @@ -1132,6 +1132,16 @@ return VectorList.Count == 2; } + bool isVecListThreeDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListThreeQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + bool isSingleSpacedVectorIndexed() const { return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; } @@ -5343,6 +5353,26 @@ case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16; case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; + // VLD3DUP + case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8; + case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16; + case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32; + case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8; + case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16; + case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32; + // VLD3LN case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; @@ -6061,6 +6091,77 @@ return true; } + // VLD3DUP single 3-element structure to all lanes instructions. + case ARM::VLD3DUPdAsm_8: + case ARM::VLD3DUPdAsm_16: + case ARM::VLD3DUPdAsm_32: + case ARM::VLD3DUPqAsm_8: + case ARM::VLD3DUPqAsm_16: + case ARM::VLD3DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_fixed_Asm_8: + case ARM::VLD3DUPdWB_fixed_Asm_16: + case ARM::VLD3DUPdWB_fixed_Asm_32: + case ARM::VLD3DUPqWB_fixed_Asm_8: + case ARM::VLD3DUPqWB_fixed_Asm_16: + case ARM::VLD3DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_register_Asm_8: + case ARM::VLD3DUPdWB_register_Asm_16: + case ARM::VLD3DUPdWB_register_Asm_32: + case ARM::VLD3DUPqWB_register_Asm_8: + case ARM::VLD3DUPqWB_register_Asm_16: + case ARM::VLD3DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // VLD3 multiple 3-element structure instructions. case ARM::VLD3dAsm_8: case ARM::VLD3dAsm_16: Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp?rev=148882&r1=148881&r2=148882&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp Tue Jan 24 17:47:04 2012 @@ -1067,6 +1067,17 @@ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; } +void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1086,6 +1097,17 @@ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; } +void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h?rev=148882&r1=148881&r2=148882&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h Tue Jan 24 17:47:04 2012 @@ -139,10 +139,14 @@ raw_ostream &O); void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, Modified: llvm/trunk/test/MC/ARM/neon-vld-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vld-encoding.s?rev=148882&r1=148881&r2=148882&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vld-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vld-encoding.s Tue Jan 24 17:47:04 2012 @@ -336,6 +336,47 @@ @ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4] + vld3.8 {d16[], d17[], d18[]}, [r1] + vld3.16 {d16[], d17[], d18[]}, [r2] + vld3.32 {d16[], d17[], d18[]}, [r3] + vld3.8 {d17[], d19[], d21[]}, [r7] + vld3.16 {d17[], d19[], d21[]}, [r7] + vld3.32 {d16[], d18[], d20[]}, [r8] + + vld3.s8 {d16[], d17[], d18[]}, [r1]! + vld3.s16 {d16[], d17[], d18[]}, [r2]! + vld3.s32 {d16[], d17[], d18[]}, [r3]! + vld3.u8 {d17[], d19[], d21[]}, [r7]! + vld3.u16 {d17[], d19[], d21[]}, [r7]! + vld3.u32 {d16[], d18[], d20[]}, [r8]! + + vld3.p8 {d16[], d17[], d18[]}, [r1], r8 + vld3.p16 {d16[], d17[], d18[]}, [r2], r7 + vld3.f32 {d16[], d17[], d18[]}, [r3], r5 + vld3.i8 {d16[], d18[], d20[]}, [r6], r3 + vld3.i16 {d16[], d18[], d20[]}, [r6], r3 + vld3.i32 {d17[], d19[], d21[]}, [r9], r4 + +@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1] @ encoding: [0x0f,0x0e,0xe1,0xf4] +@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2] @ encoding: [0x4f,0x0e,0xe2,0xf4] +@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3] @ encoding: [0x8f,0x0e,0xe3,0xf4] +@ CHECK: vld3.8 {d17[], d19[], d21[]}, [r7] @ encoding: [0x2f,0x1e,0xe7,0xf4] +@ CHECK: vld3.16 {d17[], d19[], d21[]}, [r7] @ encoding: [0x6f,0x1e,0xe7,0xf4] +@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8] @ encoding: [0xaf,0x0e,0xe8,0xf4] +@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]! @ encoding: [0x0d,0x0e,0xe1,0xf4] +@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2]! @ encoding: [0x4d,0x0e,0xe2,0xf4] +@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3]! @ encoding: [0x8d,0x0e,0xe3,0xf4] +@ CHECK: vld3.8 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x2d,0x1e,0xe7,0xf4] +@ CHECK: vld3.16 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x6d,0x1e,0xe7,0xf4] +@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8]! @ encoding: [0xad,0x0e,0xe8,0xf4] +@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r8 @ encoding: [0x08,0x0e,0xe1,0xf4] +@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2], r7 @ encoding: [0x47,0x0e,0xe2,0xf4] +@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3], r5 @ encoding: [0x85,0x0e,0xe3,0xf4] +@ CHECK: vld3.8 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x23,0x0e,0xe6,0xf4] +@ CHECK: vld3.16 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x63,0x0e,0xe6,0xf4] +@ CHECK: vld3.32 {d17[], d19[], d21[]}, [r9], r4 @ encoding: [0xa4,0x1e,0xe9,0xf4] + + vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] From grosbach at apple.com Tue Jan 24 17:47:07 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 24 Jan 2012 23:47:07 -0000 Subject: [llvm-commits] [llvm] r148883 - /llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Message-ID: <20120124234707.D934D2A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 17:47:07 2012 New Revision: 148883 URL: http://llvm.org/viewvc/llvm-project?rev=148883&view=rev Log: Tidy up. Rename VLD4DUP patterns for consistency. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148883&r1=148882&r2=148883&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 17:47:07 2012 @@ -1500,9 +1500,9 @@ def VLD4DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): -def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; -def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; -def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB op7_4, string Dt> @@ -1519,9 +1519,9 @@ def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; -def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; -def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; From grosbach at apple.com Tue Jan 24 18:01:08 2012 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 25 Jan 2012 00:01:08 -0000 Subject: [llvm-commits] [llvm] r148884 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.h test/MC/ARM/neon-vld-encoding.s Message-ID: <20120125000109.1C0EF2A6C12C@llvm.org> Author: grosbach Date: Tue Jan 24 18:01:08 2012 New Revision: 148884 URL: http://llvm.org/viewvc/llvm-project?rev=148884&view=rev Log: NEON VLD4(all lanes) assembly parsing and encoding. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h llvm/trunk/test/MC/ARM/neon-vld-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148884&r1=148883&r2=148884&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Jan 24 18:01:08 2012 @@ -191,6 +191,25 @@ "printVectorListThreeSpacedAllLanes"> { let ParserMatchClass = VecListThreeQAllLanesAsmOperand; } +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} // Register list of one D register, with byte lane subscripting. @@ -6333,6 +6352,65 @@ (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + // VLD4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148884&r1=148883&r2=148884&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Jan 24 18:01:08 2012 @@ -1142,6 +1142,16 @@ return VectorList.Count == 3; } + bool isVecListFourDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isVecListFourQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + bool isSingleSpacedVectorIndexed() const { return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; } @@ -5427,6 +5437,26 @@ case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16; case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32; + // VLD4DUP + case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8; + case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16; + case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32; + case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8; + case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16; + case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32; + // VLD4 case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; @@ -6233,7 +6263,84 @@ return true; } - // VLD4 multiple 3-element structure instructions. + // VLD4DUP single 3-element structure to all lanes instructions. + case ARM::VLD4DUPdAsm_8: + case ARM::VLD4DUPdAsm_16: + case ARM::VLD4DUPdAsm_32: + case ARM::VLD4DUPqAsm_8: + case ARM::VLD4DUPqAsm_16: + case ARM::VLD4DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_fixed_Asm_8: + case ARM::VLD4DUPdWB_fixed_Asm_16: + case ARM::VLD4DUPdWB_fixed_Asm_32: + case ARM::VLD4DUPqWB_fixed_Asm_8: + case ARM::VLD4DUPqWB_fixed_Asm_16: + case ARM::VLD4DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_register_Asm_8: + case ARM::VLD4DUPdWB_register_Asm_16: + case ARM::VLD4DUPdWB_register_Asm_32: + case ARM::VLD4DUPqWB_register_Asm_8: + case ARM::VLD4DUPqWB_register_Asm_16: + case ARM::VLD4DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4 multiple 4-element structure instructions. case ARM::VLD4dAsm_8: case ARM::VLD4dAsm_16: case ARM::VLD4dAsm_32: Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp?rev=148884&r1=148883&r2=148884&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp Tue Jan 24 18:01:08 2012 @@ -1078,6 +1078,18 @@ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; } +void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; +} + void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1105,7 +1117,19 @@ // sort order is guaranteed because they're all of the form D. O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}"; } void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, Modified: llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h?rev=148884&r1=148883&r2=148884&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h (original) +++ llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h Tue Jan 24 18:01:08 2012 @@ -141,12 +141,16 @@ raw_ostream &O); void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, Modified: llvm/trunk/test/MC/ARM/neon-vld-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-vld-encoding.s?rev=148884&r1=148883&r2=148884&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-vld-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-vld-encoding.s Tue Jan 24 18:01:08 2012 @@ -412,6 +412,46 @@ @ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4] + vld4.8 {d16[], d17[], d18[], d19[]}, [r1] + vld4.16 {d16[], d17[], d18[], d19[]}, [r2] + vld4.32 {d16[], d17[], d18[], d19[]}, [r3] + vld4.8 {d17[], d19[], d21[], d23[]}, [r7] + vld4.16 {d17[], d19[], d21[], d23[]}, [r7] + vld4.32 {d16[], d18[], d20[], d22[]}, [r8] + + vld4.s8 {d16[], d17[], d18[], d19[]}, [r1]! + vld4.s16 {d16[], d17[], d18[], d19[]}, [r2]! + vld4.s32 {d16[], d17[], d18[], d19[]}, [r3]! + vld4.u8 {d17[], d19[], d21[], d23[]}, [r7]! + vld4.u16 {d17[], d19[], d21[], d23[]}, [r7]! + vld4.u32 {d16[], d18[], d20[], d22[]}, [r8]! + + vld4.p8 {d16[], d17[], d18[], d19[]}, [r1], r8 + vld4.p16 {d16[], d17[], d18[], d19[]}, [r2], r7 + vld4.f32 {d16[], d17[], d18[], d19[]}, [r3], r5 + vld4.i8 {d16[], d18[], d20[], d22[]}, [r6], r3 + vld4.i16 {d16[], d18[], d20[], d22[]}, [r6], r3 + vld4.i32 {d17[], d19[], d21[], d23[]}, [r9], r4 + +@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1] @ encoding: [0x0f,0x0f,0xe1,0xf4] +@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2] @ encoding: [0x4f,0x0f,0xe2,0xf4] +@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3] @ encoding: [0x8f,0x0f,0xe3,0xf4] +@ CHECK: vld4.8 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x2f,0x1f,0xe7,0xf4] +@ CHECK: vld4.16 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x6f,0x1f,0xe7,0xf4] +@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8] @ encoding: [0xaf,0x0f,0xe8,0xf4] +@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]! @ encoding: [0x0d,0x0f,0xe1,0xf4] +@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2]! @ encoding: [0x4d,0x0f,0xe2,0xf4] +@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3]! @ encoding: [0x8d,0x0f,0xe3,0xf4] +@ CHECK: vld4.8 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x2d,0x1f,0xe7,0xf4] +@ CHECK: vld4.16 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x6d,0x1f,0xe7,0xf4] +@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8]! @ encoding: [0xad,0x0f,0xe8,0xf4] +@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r8 @ encoding: [0x08,0x0f,0xe1,0xf4] +@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2], r7 @ encoding: [0x47,0x0f,0xe2,0xf4] +@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3], r5 @ encoding: [0x85,0x0f,0xe3,0xf4] +@ CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x23,0x0f,0xe6,0xf4] +@ CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x63,0x0f,0xe6,0xf4] +@ CHECK: vld4.32 {d17[], d19[], d21[], d23[]}, [r9], r4 @ encoding: [0xa4,0x1f,0xe9,0xf4] + @ Handle 'Q' registers in register lists as if the sub-reg D regs were @ specified instead. vld1.8 {q3}, [r9] From zinob at codeaurora.org Tue Jan 24 18:08:33 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Tue, 24 Jan 2012 16:08:33 -0800 Subject: [llvm-commits] Fix LLVM to emit 16-bit CMN when targeting ARM Thumb2 ISA Message-ID: Description: As per request from Jacob, I isolated the fix to CMN instruction so that LLVM emits 16-bits CMN instruction. This fix allows mapping t2CMNzrr to tCMNz. Modified: test/CodeGen/Thumb2/thumb2-cmn.ll lib/Target/ARM/Thumb2SizeReduction.cpp Modified: Index: test/CodeGen/Thumb2/thumb2-cmn.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-cmn.ll (revision 148675) +++ test/CodeGen/Thumb2/thumb2-cmn.ll (working copy) @@ -9,7 +9,7 @@ ret i1 %tmp } ; CHECK: f1: -; CHECK: cmn.w r0, r1 +; CHECK: cmn r0, r1 define i1 @f2(i32 %a, i32 %b) { %nb = sub i32 0, %b @@ -17,7 +17,7 @@ ret i1 %tmp } ; CHECK: f2: -; CHECK: cmn.w r0, r1 +; CHECK: cmn r0, r1 define i1 @f3(i32 %a, i32 %b) { %nb = sub i32 0, %b @@ -25,7 +25,7 @@ ret i1 %tmp } ; CHECK: f3: -; CHECK: cmn.w r0, r1 +; CHECK: cmn r0, r1 define i1 @f4(i32 %a, i32 %b) { %nb = sub i32 0, %b @@ -33,7 +33,7 @@ ret i1 %tmp } ; CHECK: f4: -; CHECK: cmn.w r0, r1 +; CHECK: cmn r0, r1 define i1 @f5(i32 %a, i32 %b) { %tmp = shl i32 %b, 5 Index: lib/Target/ARM/Thumb2SizeReduction.cpp =================================================================== --- lib/Target/ARM/Thumb2SizeReduction.cpp (revision 148675) +++ lib/Target/ARM/Thumb2SizeReduction.cpp (working copy) @@ -67,6 +67,7 @@ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, //FIXME: Disable CMN, as CCodes are backwards from compare expectations //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 }, { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/99c73eef/attachment.html From apazos at codeaurora.org Tue Jan 24 18:18:45 2012 From: apazos at codeaurora.org (Ana Pazos) Date: Tue, 24 Jan 2012 16:18:45 -0800 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: Message-ID: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> Anton, Here is an updated patch for the ARM fused multiply add/sub feature with the relevant suggestions incorporated. In this update: - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. - I kept setting .fpu=neon-vfpv4 code attribute because that is what the assembler understands. The additional changes mentioned in the email discussions I think belong to a separate patch: - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with fast-math flag. - VFPv3/VFPv4/NEON/NEON2 associations with FeatureFP16/FeatureD16. - Support to set -mfpu=neon2 in clang. Do you want this?? Thanks, Ana. -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Anton Korobeynikov Sent: Tuesday, January 24, 2012 12:34 PM To: James Molloy Cc: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub Hi James, > >From my discussions with colleagues we come to a different conclusion - that it is indeed possible to have the NEONv2 vector integer unit without the floating point unit of VFPv4. > We don't think it's possible to have VFPv4 with NEONv1 however. Well... this conflicts with that doc I mentioned. Where is the truth? :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- A non-text attachment was scrubbed... Name: fusedMACpatch.diff Type: application/octet-stream Size: 14547 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/a1bacef9/attachment.obj From hfinkel at anl.gov Tue Jan 24 18:41:32 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Tue, 24 Jan 2012 18:41:32 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> Message-ID: <1327452092.2489.29.camel@sapling> On Tue, 2012-01-24 at 16:08 -0600, Sebastian Pop wrote: > On Mon, Jan 23, 2012 at 10:13 PM, Hal Finkel wrote: > > On Tue, 2012-01-17 at 13:25 -0600, Sebastian Pop wrote: > >> Hi, > >> > >> On Fri, Dec 30, 2011 at 3:09 AM, Tobias Grosser wrote: > >> > As it seems my intuition is wrong, I am very eager to see and understand > >> > an example where a search limit of 4000 is really needed. > >> > > >> > >> To make the ball roll again, I attached a testcase that can be tuned > >> to understand the impact on compile time for different sizes of a > >> basic block. One can also set the number of iterations in the loop to > >> 1 to test the vectorizer with no loops around. > >> > >> Hal, could you please report the compile times with/without the > >> vectorizer for different basic block sizes? > > > > I've looked at your test case, and I am pleased to report a negligible > > compile-time increase! Also, there is no vectorization of the main > > Good! > > > loop :) Here's why: (as you know) the main part of the loop is > > essentially one long dependency chain, and so there is nothing to > > vectorize there. The only vectorization opportunities come from > > unrolling the loop. Using the default thresholds, the loop will not even > > partially unroll (because the body is too large). As a result, > > essentially nothing happens. > > > > I've prepared a reduced version of your test case (attached). Using > > -unroll-threshold=300 (along with -unroll-allow-partial), I can make the > > loop unroll partially (the reduced loop size is 110, so this allows > > unrolling 2 iterations). Once this is done, the vectorizer finds > > candidate pairs and vectorizes [as a practical manner, you need -basicaa > > too]. > > > > I think that even this is probably too big for a regression test. I > > don't think that the basic structure really adds anything over existing > > tests (although I need to make sure that alias-analysis use is otherwise > > covered), but I'll copy-and-paste a small portion into a regression test > > to cover the search limit logic (which is currently uncovered). We > > should probably discuss different situations that we'd like to see > > covered in the regression suite (perhaps post-commit). > > > > Thanks for working on this! I'll post an updated patch for review > > shortly. > > Thanks for the new patch. > > I will send you some more comments on the patch as I'm advancing > through testing: I found some interesting benchmarks in which > enabling vectorization gets the performance down by 80% on ARM. > I will prepare a reduced testcase and try to find out the reason. > As a first shot, I would say that this comes from the vectorization of > code in a loop and the overhead of transfer between scalar and > vector registers. This is good; as has been pointed out, we'll need to develop a vectorization cost model for this kind of thing to really be successful, and so we should start thinking about that. The pass, as implemented, has an semi-implicit cost model which says that permutations followed by another vector operation are free, scalar -> vector transfers are free, and vectorizing a memory operation is just as good as vectorizing an arithmetic operation. Depending on the system, these may all be untrue (although on some systems they are true). If you can generate a test case that would be great, I'd like to look at it. > > I would like to not stop you from committing the patch just because > of performance issues: let's address any further improvements once > the patch is installed on tot. Sounds good to me. Thanks again, Hal > > Thanks again, > Sebastian > -- > Qualcomm Innovation Center, Inc is a member of Code Aurora Forum -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From sabre at nondot.org Tue Jan 24 19:27:21 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 01:27:21 -0000 Subject: [llvm-commits] [llvm] r148897 - in /llvm/trunk/lib: Analysis/ConstantFolding.cpp Analysis/ValueTracking.cpp CodeGen/AsmPrinter/AsmPrinter.cpp CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <20120125012721.1ECD12A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 19:27:20 2012 New Revision: 148897 URL: http://llvm.org/viewvc/llvm-project?rev=148897&view=rev Log: Use the right method to get the # elements in a CDS. Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp llvm/trunk/lib/Analysis/ValueTracking.cpp llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=148897&r1=148896&r2=148897&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Tue Jan 24 19:27:20 2012 @@ -351,7 +351,7 @@ uint64_t EltSize = CDS->getElementByteSize(); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; - for (; Index != CDS->getType()->getNumElements(); ++Index) { + for (unsigned e = CDS->getNumElements(); Index != e; ++Index) { if (!ReadDataFromGlobal(CDS->getElementAsConstant(Index), Offset, CurPtr, BytesLeft, TD)) return false; @@ -1042,7 +1042,7 @@ return 0; C = CA->getOperand(IdxVal); } else if (ConstantDataSequential *CDS=dyn_cast(C)){ - if (IdxVal >= CDS->getType()->getNumElements()) + if (IdxVal >= CDS->getNumElements()) return 0; C = CDS->getElementAsConstant(IdxVal); } else if (ConstantVector *CV = dyn_cast(C)) { Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=148897&r1=148896&r2=148897&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp (original) +++ llvm/trunk/lib/Analysis/ValueTracking.cpp Tue Jan 24 19:27:20 2012 @@ -105,7 +105,7 @@ // each element. KnownZero.setAllBits(); KnownOne.setAllBits(); APInt Elt(KnownZero.getBitWidth(), 0); - for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { Elt = CDS->getElementAsInteger(i); KnownZero &= ~Elt; KnownOne &= Elt; Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=148897&r1=148896&r2=148897&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Jan 24 19:27:20 2012 @@ -1630,7 +1630,7 @@ // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); if (isa(CDS->getElementType())) { - for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), ElementByteSize, AddrSpace); } @@ -1643,7 +1643,7 @@ CDS->getElementType()->isDoubleTy()); if (ElementByteSize == 4) { - for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { union { float F; uint32_t I; @@ -1657,7 +1657,7 @@ return; } - for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { union { double F; uint64_t I; Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=148897&r1=148896&r2=148897&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jan 24 19:27:20 2012 @@ -1059,7 +1059,7 @@ if (const ConstantDataSequential *CDS = dyn_cast(C)) { SmallVector Ops; - for (unsigned i = 0, e = CDS->getType()->getNumElements(); i != e; ++i) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. From sabre at nondot.org Tue Jan 24 19:32:59 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 01:32:59 -0000 Subject: [llvm-commits] [llvm] r148898 - in /llvm/trunk: include/llvm/Type.h lib/VMCore/Constants.cpp lib/VMCore/Instructions.cpp lib/VMCore/Type.cpp Message-ID: <20120125013259.85AD52A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 19:32:59 2012 New Revision: 148898 URL: http://llvm.org/viewvc/llvm-project?rev=148898&view=rev Log: Remove the Type::getNumElements() method, which is only called in 4 places, did something extremely surprising, and shadowed actually useful implementations that had completely different behavior. Modified: llvm/trunk/include/llvm/Type.h llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/Instructions.cpp llvm/trunk/lib/VMCore/Type.cpp Modified: llvm/trunk/include/llvm/Type.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Type.h?rev=148898&r1=148897&r2=148898&view=diff ============================================================================== --- llvm/trunk/include/llvm/Type.h (original) +++ llvm/trunk/include/llvm/Type.h Tue Jan 24 19:32:59 2012 @@ -294,10 +294,6 @@ /// otherwise return 'this'. Type *getScalarType(); - /// getNumElements - If this is a vector type, return the number of elements, - /// otherwise return zero. - unsigned getNumElements(); - //===--------------------------------------------------------------------===// // Type Iteration support. // Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148898&r1=148897&r2=148898&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 19:32:59 2012 @@ -1505,8 +1505,10 @@ "PtrToInt source must be pointer or pointer vector"); assert(DstTy->getScalarType()->isIntegerTy() && "PtrToInt destination must be integer or integer vector"); - assert(C->getType()->getNumElements() == DstTy->getNumElements() && - "Invalid cast between a different number of vector elements"); + assert(isa(C->getType()) == isa(DstTy)); + if (VectorType *VT = dyn_cast(C->getType())) + assert(VT->getNumElements() == cast(DstTy)->getNumElements() && + "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::PtrToInt, C, DstTy); } @@ -1515,8 +1517,10 @@ "IntToPtr source must be integer or integer vector"); assert(DstTy->getScalarType()->isPointerTy() && "IntToPtr destination must be a pointer or pointer vector"); - assert(C->getType()->getNumElements() == DstTy->getNumElements() && - "Invalid cast between a different number of vector elements"); + assert(isa(C->getType()) == isa(DstTy)); + if (VectorType *VT = dyn_cast(C->getType())) + assert(VT->getNumElements() == cast(DstTy)->getNumElements() && + "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::IntToPtr, C, DstTy); } @@ -2018,7 +2022,9 @@ /// getNumElements - Return the number of elements in the array or vector. unsigned ConstantDataSequential::getNumElements() const { - return getType()->getNumElements(); + if (ArrayType *AT = dyn_cast(getType())) + return AT->getNumElements(); + return cast(getType())->getNumElements(); } Modified: llvm/trunk/lib/VMCore/Instructions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instructions.cpp?rev=148898&r1=148897&r2=148898&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Instructions.cpp (original) +++ llvm/trunk/lib/VMCore/Instructions.cpp Tue Jan 24 19:32:59 2012 @@ -2671,13 +2671,19 @@ return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() && SrcLength == DstLength; case Instruction::PtrToInt: - if (SrcTy->getNumElements() != DstTy->getNumElements()) + if (isa(SrcTy) != isa(DstTy)) return false; + if (VectorType *VT = dyn_cast(SrcTy)) + if (VT->getNumElements() != cast(DstTy)->getNumElements()) + return false; return SrcTy->getScalarType()->isPointerTy() && DstTy->getScalarType()->isIntegerTy(); case Instruction::IntToPtr: - if (SrcTy->getNumElements() != DstTy->getNumElements()) + if (isa(SrcTy) != isa(DstTy)) return false; + if (VectorType *VT = dyn_cast(SrcTy)) + if (VT->getNumElements() != cast(DstTy)->getNumElements()) + return false; return SrcTy->getScalarType()->isIntegerTy() && DstTy->getScalarType()->isPointerTy(); case Instruction::BitCast: Modified: llvm/trunk/lib/VMCore/Type.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Type.cpp?rev=148898&r1=148897&r2=148898&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Type.cpp (original) +++ llvm/trunk/lib/VMCore/Type.cpp Tue Jan 24 19:32:59 2012 @@ -47,14 +47,6 @@ return this; } -/// getNumElements - If this is a vector type, return the number of elements, -/// otherwise return zero. -unsigned Type::getNumElements() { - if (VectorType *VTy = dyn_cast(this)) - return VTy->getNumElements(); - return 0; -} - /// isIntegerTy - Return true if this is an IntegerType of the specified width. bool Type::isIntegerTy(unsigned Bitwidth) const { return isIntegerTy() && cast(this)->getBitWidth() == Bitwidth; From ahatanaka at mips.com Tue Jan 24 19:43:37 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Wed, 25 Jan 2012 01:43:37 -0000 Subject: [llvm-commits] [llvm] r148900 - in /llvm/trunk/lib/Target/Mips: MipsAnalyzeImmediate.cpp MipsAnalyzeImmediate.h Message-ID: <20120125014337.3F9712A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 19:43:36 2012 New Revision: 148900 URL: http://llvm.org/viewvc/llvm-project?rev=148900&view=rev Log: Add class MipsAnalyzeImmediate which comes up with an instruction sequence to load an immediate. Added: llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.cpp llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h Added: llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.cpp?rev=148900&view=auto ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.cpp (added) +++ llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.cpp Tue Jan 24 19:43:36 2012 @@ -0,0 +1,153 @@ +//===-- MipsAnalyzeImmediate.cpp - Analyze immediates ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "MipsAnalyzeImmediate.h" +#include "Mips.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; + +MipsAnalyzeImmediate::Inst::Inst(unsigned O, unsigned I) : Opc(O), ImmOpnd(I) {} + +// Add I to the instruction sequences. +void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) { + // Add an instruction seqeunce consisting of just I. + if (SeqLs.empty()) { + SeqLs.push_back(InstSeq(1, I)); + return; + } + + for (InstSeqLs::iterator Iter = SeqLs.begin(); Iter != SeqLs.end(); ++Iter) + Iter->push_back(I); +} + +void MipsAnalyzeImmediate::GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + GetInstSeqLs((Imm + 0x8000) & ~0xffff, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffff)); +} + +void MipsAnalyzeImmediate::GetInstSeqLsORi(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + GetInstSeqLs(Imm & ~0xffff, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ORi, Imm & 0xffff)); +} + +void MipsAnalyzeImmediate::GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + unsigned Shamt = CountTrailingZeros_64(Imm); + GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs); + AddInstr(SeqLs, Inst(SLL, Shamt)); +} + +void MipsAnalyzeImmediate::GetInstSeqLs(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + int64_t MaskedImm = Imm & (((uint64_t)-1) >> (64 - Size)); + + // Do nothing if Imm is 0. + if (!MaskedImm) + return; + + // A single ADDiu will do if RemSize <= 16. + if (RemSize <= 16) { + AddInstr(SeqLs, Inst(ADDiu, MaskedImm)); + return; + } + + // Shift if the lower 16-bit is cleared. + if (!(Imm & 0xffff)) { + GetInstSeqLsSLL(Imm, RemSize, SeqLs); + return; + } + + GetInstSeqLsADDiu(Imm, RemSize, SeqLs); + + // If bit 15 is cleared, it doesn't make a difference whether the last + // instruction is an ADDiu or ORi. In that case, do not call GetInstSeqLsORi. + if (Imm & 0x8000) { + InstSeqLs SeqLsORi; + GetInstSeqLsORi(Imm, RemSize, SeqLsORi); + SeqLs.insert(SeqLs.end(), SeqLsORi.begin(), SeqLsORi.end()); + } +} + +// Replace a ADDiu & SLL pair with a LUi. +// e.g. the following two instructions +// ADDiu 0x0111 +// SLL 18 +// are replaced with +// LUi 0x444 +void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) { + // Check if the first two instructions are ADDiu and SLL and the shift amount + // is at least 16. + if ((Seq.size() < 2) || (Seq[0].Opc != ADDiu) || + (Seq[1].Opc != SLL) || (Seq[1].ImmOpnd < 16)) + return; + + // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit. + int64_t Imm = (((int64_t)Seq[0].ImmOpnd) << 48) >> 48; + int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16); + + if (!isInt<16>(ShiftedImm)) + return; + + // Replace the first instruction and erase the second. + Seq[0].Opc = LUi; + Seq[0].ImmOpnd = (unsigned)(ShiftedImm & 0xffff); + Seq.erase(Seq.begin() + 1); +} + +void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) { + InstSeqLs::iterator ShortestSeq = SeqLs.end(); + // The length of an instruction sequence is at most 7. + unsigned ShortestLength = 8; + + for (InstSeqLs::iterator S = SeqLs.begin(); S != SeqLs.end(); ++S) { + ReplaceADDiuSLLWithLUi(*S); + assert(S->size() <= 7); + + if (S->size() < ShortestLength) { + ShortestSeq = S; + ShortestLength = S->size(); + } + } + + Insts.clear(); + Insts.append(ShortestSeq->begin(), ShortestSeq->end()); +} + +const MipsAnalyzeImmediate::InstSeq +&MipsAnalyzeImmediate::Analyze(int64_t Imm, unsigned Size, + bool LastInstrIsADDiu) { + this->Size = Size; + + if (Size == 32) { + ADDiu = Mips::ADDiu; + ORi = Mips::ORi; + SLL = Mips::SLL; + LUi = Mips::LUi; + } else { + ADDiu = Mips::DADDiu; + ORi = Mips::ORi64; + SLL = Mips::DSLL; + LUi = Mips::LUi64; + } + + InstSeqLs SeqLs; + + // Get the list of instruction sequences. + if (LastInstrIsADDiu | !Imm) + GetInstSeqLsADDiu(Imm, Size, SeqLs); + else + GetInstSeqLs(Imm, Size, SeqLs); + + // Set Insts to the shortest instruction sequence. + GetShortestSeq(SeqLs, Insts); + + return Insts; +} Added: llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h?rev=148900&view=auto ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h (added) +++ llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h Tue Jan 24 19:43:36 2012 @@ -0,0 +1,62 @@ +//===-- MipsAnalyzeImmediate.h - Analyze immediates -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef MIPS_ANALYZE_IMMEDIATE_H +#define MIPS_ANALYZE_IMMEDIATE_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + + class MipsAnalyzeImmediate { + public: + struct Inst { + unsigned Opc, ImmOpnd; + Inst(unsigned Opc, unsigned ImmOpnd); + }; + typedef SmallVector InstSeq; + + /// Analyze - Get an instrucion sequence to load immediate Imm. The last + /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is + /// true; + const InstSeq &Analyze(int64_t Imm, unsigned Size, bool LastInstrIsADDiu); + private: + typedef SmallVector InstSeqLs; + + /// AddInstr - Add I to all instruction sequences in SeqLs. + void AddInstr(InstSeqLs &SeqLs, const Inst &I); + + /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to + /// load immediate Imm + void GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to + /// load immediate Imm + void GetInstSeqLsORi(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to + /// load immediate Imm + void GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// GetInstSeqLs - Get instrucion sequences to load immediate Imm. + void GetInstSeqLs(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi. + void ReplaceADDiuSLLWithLUi(InstSeq &Seq); + + /// GetShortestSeq - Find the shortest instruction sequence in SeqLs and + /// return it in Insts. + void GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts); + + unsigned Size; + unsigned ADDiu, ORi, SLL, LUi; + InstSeq Insts; + }; +} + +#endif From sabre at nondot.org Tue Jan 24 19:53:59 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 01:53:59 -0000 Subject: [llvm-commits] [llvm] r148901 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120125015359.590F12A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 19:53:58 2012 New Revision: 148901 URL: http://llvm.org/viewvc/llvm-project?rev=148901&view=rev Log: Introduce a new ConstantVector::getSplat constructor function to simplify a really common case. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148901&r1=148900&r2=148901&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 19:53:58 2012 @@ -489,6 +489,10 @@ // ConstantVector accessors static Constant *get(ArrayRef V); + /// getSplat - Return a ConstantVector with the specified constant in each + /// element. + static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -757,6 +761,11 @@ static Constant *get(LLVMContext &Context, ArrayRef Elts); static Constant *get(LLVMContext &Context, ArrayRef Elts); + /// getSplat - Return a ConstantVector with the specified constant in each + /// element. The specified constant has to be a of a compatible type (i8/i16/ + /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. + static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// getType - Specialize the getType() method to always return a VectorType, /// which reduces the amount of casting needed in parts of the compiler. /// Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148901&r1=148900&r2=148901&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 19:53:58 2012 @@ -129,7 +129,7 @@ // Broadcast a scalar to a vector, if necessary. if (VectorType *VTy = dyn_cast(Ty)) - C = ConstantVector::get(std::vector(VTy->getNumElements(), C)); + C = ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -145,11 +145,9 @@ return ConstantFP::get(Ty->getContext(), FL); } - SmallVector Elts; VectorType *VTy = cast(Ty); - Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType())); - assert(Elts[0] && "Invalid AllOnes value!"); - return cast(ConstantVector::get(Elts)); + return ConstantVector::getSplat(VTy->getNumElements(), + getAllOnesValue(VTy->getElementType())); } void Constant::destroyConstantImpl() { @@ -394,9 +392,8 @@ } assert(VTy->getElementType()->isIntegerTy(1) && "True must be vector of i1 or i1."); - SmallVector Splat(VTy->getNumElements(), - ConstantInt::getTrue(Ty->getContext())); - return ConstantVector::get(Splat); + return ConstantVector::getSplat(VTy->getNumElements(), + ConstantInt::getTrue(Ty->getContext())); } Constant *ConstantInt::getFalse(Type *Ty) { @@ -407,9 +404,8 @@ } assert(VTy->getElementType()->isIntegerTy(1) && "False must be vector of i1 or i1."); - SmallVector Splat(VTy->getNumElements(), - ConstantInt::getFalse(Ty->getContext())); - return ConstantVector::get(Splat); + return ConstantVector::getSplat(VTy->getNumElements(), + ConstantInt::getFalse(Ty->getContext())); } @@ -433,8 +429,7 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get(SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -459,8 +454,7 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get( - SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -506,8 +500,7 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get( - SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -521,8 +514,7 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get( - SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -537,15 +529,12 @@ Constant *ConstantFP::getZeroValueForNegation(Type* Ty) { - if (VectorType *PTy = dyn_cast(Ty)) - if (PTy->getElementType()->isFloatingPointTy()) { - SmallVector zeros(PTy->getNumElements(), - getNegativeZero(PTy->getElementType())); - return ConstantVector::get(zeros); - } - - if (Ty->isFloatingPointTy()) - return getNegativeZero(Ty); + if (Ty->getScalarType()->isFloatingPointTy()) { + Constant *C = getNegativeZero(Ty); + if (VectorType *VTy = dyn_cast(Ty)) + return ConstantVector::getSplat(VTy->getNumElements(), C); + return C; + } return Constant::getNullValue(Ty); } @@ -818,6 +807,12 @@ return pImpl->VectorConstants.getOrCreate(T, V); } +Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) { + SmallVector Elts(NumElts, V); + return get(Elts); +} + + // Utility function for determining if a ConstantExpr is a CastOp or not. This // can't be inline because we don't want to #include Instruction.h into // Constant.h @@ -2194,6 +2189,38 @@ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } +Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { + assert(isElementTypeCompatible(V->getType()) && + "Element type not compatible with ConstantData"); + if (ConstantInt *CI = dyn_cast(V)) { + if (CI->getType()->isIntegerTy(8)) { + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + if (CI->getType()->isIntegerTy(16)) { + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + if (CI->getType()->isIntegerTy(32)) { + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type"); + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + + ConstantFP *CFP = cast(V); + if (CFP->getType()->isFloatTy()) { + SmallVector Elts(NumElts, CFP->getValueAPF().convertToFloat()); + return get(V->getContext(), Elts); + } + assert(CFP->getType()->isDoubleTy() && "Unsupported ConstantData type"); + SmallVector Elts(NumElts, CFP->getValueAPF().convertToDouble()); + return get(V->getContext(), Elts); +} + + /// getElementAsInteger - If this is a sequential container of integers (of /// any size), return the specified element in the low bits of a uint64_t. uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { From netcasper at gmail.com Tue Jan 24 20:01:57 2012 From: netcasper at gmail.com (Liang Wang) Date: Wed, 25 Jan 2012 10:01:57 +0800 Subject: [llvm-commits] fix egrep options for Darwin in utils/llvmgrep Message-ID: Hi, Currently, llvmgrep shows only filename since it uses -l as egrep option on Darwin. It should use -H as for Linux. The following patch fixes the problem. OK for trunk? Thanks, Liang. Index: utils/llvmgrep =================================================================== --- utils/llvmgrep (revision 148897) +++ utils/llvmgrep (working copy) @@ -29,7 +29,7 @@ cd $TOPDIR case `uname -s` in SunOS) grep_cmd="ggrep -H -n" ;; - Linux) grep_cmd="egrep -H -n" ;; + Linux|Darwin) grep_cmd="egrep -H -n" ;; *) grep_cmd="egrep -l -n" ;; esac ./utils/llvmdo -topdir "$TOPDIR" \ -------------- next part -------------- A non-text attachment was scrubbed... Name: egrep-options-for-darwin.patch Type: application/octet-stream Size: 415 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/c73c858c/attachment.obj From akyrtzi at gmail.com Tue Jan 24 20:42:41 2012 From: akyrtzi at gmail.com (Argyrios Kyrtzidis) Date: Wed, 25 Jan 2012 02:42:41 -0000 Subject: [llvm-commits] [llvm] r148906 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120125024241.A2CC92A6C12C@llvm.org> Author: akirtzidis Date: Tue Jan 24 20:42:41 2012 New Revision: 148906 URL: http://llvm.org/viewvc/llvm-project?rev=148906&view=rev Log: Revert r148901 because it crashes llvm tests. Original log: Introduce a new ConstantVector::getSplat constructor function to simplify a really common case. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148906&r1=148905&r2=148906&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 20:42:41 2012 @@ -489,10 +489,6 @@ // ConstantVector accessors static Constant *get(ArrayRef V); - /// getSplat - Return a ConstantVector with the specified constant in each - /// element. - static Constant *getSplat(unsigned NumElts, Constant *Elt); - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -761,11 +757,6 @@ static Constant *get(LLVMContext &Context, ArrayRef Elts); static Constant *get(LLVMContext &Context, ArrayRef Elts); - /// getSplat - Return a ConstantVector with the specified constant in each - /// element. The specified constant has to be a of a compatible type (i8/i16/ - /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. - static Constant *getSplat(unsigned NumElts, Constant *Elt); - /// getType - Specialize the getType() method to always return a VectorType, /// which reduces the amount of casting needed in parts of the compiler. /// Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148906&r1=148905&r2=148906&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 20:42:41 2012 @@ -129,7 +129,7 @@ // Broadcast a scalar to a vector, if necessary. if (VectorType *VTy = dyn_cast(Ty)) - C = ConstantVector::getSplat(VTy->getNumElements(), C); + C = ConstantVector::get(std::vector(VTy->getNumElements(), C)); return C; } @@ -145,9 +145,11 @@ return ConstantFP::get(Ty->getContext(), FL); } + SmallVector Elts; VectorType *VTy = cast(Ty); - return ConstantVector::getSplat(VTy->getNumElements(), - getAllOnesValue(VTy->getElementType())); + Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType())); + assert(Elts[0] && "Invalid AllOnes value!"); + return cast(ConstantVector::get(Elts)); } void Constant::destroyConstantImpl() { @@ -392,8 +394,9 @@ } assert(VTy->getElementType()->isIntegerTy(1) && "True must be vector of i1 or i1."); - return ConstantVector::getSplat(VTy->getNumElements(), - ConstantInt::getTrue(Ty->getContext())); + SmallVector Splat(VTy->getNumElements(), + ConstantInt::getTrue(Ty->getContext())); + return ConstantVector::get(Splat); } Constant *ConstantInt::getFalse(Type *Ty) { @@ -404,8 +407,9 @@ } assert(VTy->getElementType()->isIntegerTy(1) && "False must be vector of i1 or i1."); - return ConstantVector::getSplat(VTy->getNumElements(), - ConstantInt::getFalse(Ty->getContext())); + SmallVector Splat(VTy->getNumElements(), + ConstantInt::getFalse(Ty->getContext())); + return ConstantVector::get(Splat); } @@ -429,7 +433,8 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::getSplat(VTy->getNumElements(), C); + return ConstantVector::get(SmallVector(VTy->getNumElements(), C)); return C; } @@ -454,7 +459,8 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::getSplat(VTy->getNumElements(), C); + return ConstantVector::get( + SmallVector(VTy->getNumElements(), C)); return C; } @@ -500,7 +506,8 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::getSplat(VTy->getNumElements(), C); + return ConstantVector::get( + SmallVector(VTy->getNumElements(), C)); return C; } @@ -514,7 +521,8 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::getSplat(VTy->getNumElements(), C); + return ConstantVector::get( + SmallVector(VTy->getNumElements(), C)); return C; } @@ -529,12 +537,15 @@ Constant *ConstantFP::getZeroValueForNegation(Type* Ty) { - if (Ty->getScalarType()->isFloatingPointTy()) { - Constant *C = getNegativeZero(Ty); - if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::getSplat(VTy->getNumElements(), C); - return C; - } + if (VectorType *PTy = dyn_cast(Ty)) + if (PTy->getElementType()->isFloatingPointTy()) { + SmallVector zeros(PTy->getNumElements(), + getNegativeZero(PTy->getElementType())); + return ConstantVector::get(zeros); + } + + if (Ty->isFloatingPointTy()) + return getNegativeZero(Ty); return Constant::getNullValue(Ty); } @@ -807,12 +818,6 @@ return pImpl->VectorConstants.getOrCreate(T, V); } -Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) { - SmallVector Elts(NumElts, V); - return get(Elts); -} - - // Utility function for determining if a ConstantExpr is a CastOp or not. This // can't be inline because we don't want to #include Instruction.h into // Constant.h @@ -2189,38 +2194,6 @@ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } -Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { - assert(isElementTypeCompatible(V->getType()) && - "Element type not compatible with ConstantData"); - if (ConstantInt *CI = dyn_cast(V)) { - if (CI->getType()->isIntegerTy(8)) { - SmallVector Elts(NumElts, CI->getZExtValue()); - return get(V->getContext(), Elts); - } - if (CI->getType()->isIntegerTy(16)) { - SmallVector Elts(NumElts, CI->getZExtValue()); - return get(V->getContext(), Elts); - } - if (CI->getType()->isIntegerTy(32)) { - SmallVector Elts(NumElts, CI->getZExtValue()); - return get(V->getContext(), Elts); - } - assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type"); - SmallVector Elts(NumElts, CI->getZExtValue()); - return get(V->getContext(), Elts); - } - - ConstantFP *CFP = cast(V); - if (CFP->getType()->isFloatTy()) { - SmallVector Elts(NumElts, CFP->getValueAPF().convertToFloat()); - return get(V->getContext(), Elts); - } - assert(CFP->getType()->isDoubleTy() && "Unsupported ConstantData type"); - SmallVector Elts(NumElts, CFP->getValueAPF().convertToDouble()); - return get(V->getContext(), Elts); -} - - /// getElementAsInteger - If this is a sequential container of integers (of /// any size), return the specified element in the low bits of a uint64_t. uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { From kyrtzidis at apple.com Tue Jan 24 20:47:20 2012 From: kyrtzidis at apple.com (Argyrios Kyrtzidis) Date: Tue, 24 Jan 2012 18:47:20 -0800 Subject: [llvm-commits] [llvm] r148901 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp In-Reply-To: <20120125015359.590F12A6C12C@llvm.org> References: <20120125015359.590F12A6C12C@llvm.org> Message-ID: I reverted it in r148906 because it crashes tests, see http://lab.llvm.org:8011/builders/llvm-x86_64-linux/builds/2635 -Argyrios On Jan 24, 2012, at 5:53 PM, Chris Lattner wrote: > Author: lattner > Date: Tue Jan 24 19:53:58 2012 > New Revision: 148901 > > URL: http://llvm.org/viewvc/llvm-project?rev=148901&view=rev > Log: > Introduce a new ConstantVector::getSplat constructor function to > simplify a really common case. > > Modified: > llvm/trunk/include/llvm/Constants.h > llvm/trunk/lib/VMCore/Constants.cpp > > Modified: llvm/trunk/include/llvm/Constants.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148901&r1=148900&r2=148901&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/Constants.h (original) > +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 19:53:58 2012 > @@ -489,6 +489,10 @@ > // ConstantVector accessors > static Constant *get(ArrayRef V); > > + /// getSplat - Return a ConstantVector with the specified constant in each > + /// element. > + static Constant *getSplat(unsigned NumElts, Constant *Elt); > + > /// Transparently provide more efficient getOperand methods. > DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); > > @@ -757,6 +761,11 @@ > static Constant *get(LLVMContext &Context, ArrayRef Elts); > static Constant *get(LLVMContext &Context, ArrayRef Elts); > > + /// getSplat - Return a ConstantVector with the specified constant in each > + /// element. The specified constant has to be a of a compatible type (i8/i16/ > + /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. > + static Constant *getSplat(unsigned NumElts, Constant *Elt); > + > /// getType - Specialize the getType() method to always return a VectorType, > /// which reduces the amount of casting needed in parts of the compiler. > /// > > Modified: llvm/trunk/lib/VMCore/Constants.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148901&r1=148900&r2=148901&view=diff > ============================================================================== > --- llvm/trunk/lib/VMCore/Constants.cpp (original) > +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 19:53:58 2012 > @@ -129,7 +129,7 @@ > > // Broadcast a scalar to a vector, if necessary. > if (VectorType *VTy = dyn_cast(Ty)) > - C = ConstantVector::get(std::vector(VTy->getNumElements(), C)); > + C = ConstantVector::getSplat(VTy->getNumElements(), C); > > return C; > } > @@ -145,11 +145,9 @@ > return ConstantFP::get(Ty->getContext(), FL); > } > > - SmallVector Elts; > VectorType *VTy = cast(Ty); > - Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType())); > - assert(Elts[0] && "Invalid AllOnes value!"); > - return cast(ConstantVector::get(Elts)); > + return ConstantVector::getSplat(VTy->getNumElements(), > + getAllOnesValue(VTy->getElementType())); > } > > void Constant::destroyConstantImpl() { > @@ -394,9 +392,8 @@ > } > assert(VTy->getElementType()->isIntegerTy(1) && > "True must be vector of i1 or i1."); > - SmallVector Splat(VTy->getNumElements(), > - ConstantInt::getTrue(Ty->getContext())); > - return ConstantVector::get(Splat); > + return ConstantVector::getSplat(VTy->getNumElements(), > + ConstantInt::getTrue(Ty->getContext())); > } > > Constant *ConstantInt::getFalse(Type *Ty) { > @@ -407,9 +404,8 @@ > } > assert(VTy->getElementType()->isIntegerTy(1) && > "False must be vector of i1 or i1."); > - SmallVector Splat(VTy->getNumElements(), > - ConstantInt::getFalse(Ty->getContext())); > - return ConstantVector::get(Splat); > + return ConstantVector::getSplat(VTy->getNumElements(), > + ConstantInt::getFalse(Ty->getContext())); > } > > > @@ -433,8 +429,7 @@ > > // For vectors, broadcast the value. > if (VectorType *VTy = dyn_cast(Ty)) > - return ConstantVector::get(SmallVector - 16>(VTy->getNumElements(), C)); > + return ConstantVector::getSplat(VTy->getNumElements(), C); > > return C; > } > @@ -459,8 +454,7 @@ > > // For vectors, broadcast the value. > if (VectorType *VTy = dyn_cast(Ty)) > - return ConstantVector::get( > - SmallVector(VTy->getNumElements(), C)); > + return ConstantVector::getSplat(VTy->getNumElements(), C); > > return C; > } > @@ -506,8 +500,7 @@ > > // For vectors, broadcast the value. > if (VectorType *VTy = dyn_cast(Ty)) > - return ConstantVector::get( > - SmallVector(VTy->getNumElements(), C)); > + return ConstantVector::getSplat(VTy->getNumElements(), C); > > return C; > } > @@ -521,8 +514,7 @@ > > // For vectors, broadcast the value. > if (VectorType *VTy = dyn_cast(Ty)) > - return ConstantVector::get( > - SmallVector(VTy->getNumElements(), C)); > + return ConstantVector::getSplat(VTy->getNumElements(), C); > > return C; > } > @@ -537,15 +529,12 @@ > > > Constant *ConstantFP::getZeroValueForNegation(Type* Ty) { > - if (VectorType *PTy = dyn_cast(Ty)) > - if (PTy->getElementType()->isFloatingPointTy()) { > - SmallVector zeros(PTy->getNumElements(), > - getNegativeZero(PTy->getElementType())); > - return ConstantVector::get(zeros); > - } > - > - if (Ty->isFloatingPointTy()) > - return getNegativeZero(Ty); > + if (Ty->getScalarType()->isFloatingPointTy()) { > + Constant *C = getNegativeZero(Ty); > + if (VectorType *VTy = dyn_cast(Ty)) > + return ConstantVector::getSplat(VTy->getNumElements(), C); > + return C; > + } > > return Constant::getNullValue(Ty); > } > @@ -818,6 +807,12 @@ > return pImpl->VectorConstants.getOrCreate(T, V); > } > > +Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) { > + SmallVector Elts(NumElts, V); > + return get(Elts); > +} > + > + > // Utility function for determining if a ConstantExpr is a CastOp or not. This > // can't be inline because we don't want to #include Instruction.h into > // Constant.h > @@ -2194,6 +2189,38 @@ > return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); > } > > +Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { > + assert(isElementTypeCompatible(V->getType()) && > + "Element type not compatible with ConstantData"); > + if (ConstantInt *CI = dyn_cast(V)) { > + if (CI->getType()->isIntegerTy(8)) { > + SmallVector Elts(NumElts, CI->getZExtValue()); > + return get(V->getContext(), Elts); > + } > + if (CI->getType()->isIntegerTy(16)) { > + SmallVector Elts(NumElts, CI->getZExtValue()); > + return get(V->getContext(), Elts); > + } > + if (CI->getType()->isIntegerTy(32)) { > + SmallVector Elts(NumElts, CI->getZExtValue()); > + return get(V->getContext(), Elts); > + } > + assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type"); > + SmallVector Elts(NumElts, CI->getZExtValue()); > + return get(V->getContext(), Elts); > + } > + > + ConstantFP *CFP = cast(V); > + if (CFP->getType()->isFloatTy()) { > + SmallVector Elts(NumElts, CFP->getValueAPF().convertToFloat()); > + return get(V->getContext(), Elts); > + } > + assert(CFP->getType()->isDoubleTy() && "Unsupported ConstantData type"); > + SmallVector Elts(NumElts, CFP->getValueAPF().convertToDouble()); > + return get(V->getContext(), Elts); > +} > + > + > /// getElementAsInteger - If this is a sequential container of integers (of > /// any size), return the specified element in the low bits of a uint64_t. > uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From ahatanaka at mips.com Tue Jan 24 21:01:35 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Wed, 25 Jan 2012 03:01:35 -0000 Subject: [llvm-commits] [llvm] r148908 - in /llvm/trunk: lib/Target/Mips/Mips64InstrInfo.td lib/Target/Mips/MipsISelDAGToDAG.cpp test/CodeGen/Mips/mips64imm.ll Message-ID: <20120125030136.00FB32A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 21:01:35 2012 New Revision: 148908 URL: http://llvm.org/viewvc/llvm-project?rev=148908&view=rev Log: Lower 64-bit immediates using MipsAnalyzeImmediate that has just been added. Add a test case to show fewer instructions are needed to load an immediate with the new way of loading immediates. Modified: llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td llvm/trunk/lib/Target/Mips/MipsISelDAGToDAG.cpp llvm/trunk/test/CodeGen/Mips/mips64imm.ll Modified: llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td?rev=148908&r1=148907&r2=148908&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td (original) +++ llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td Tue Jan 24 21:01:35 2012 @@ -31,19 +31,6 @@ // shamt must fit in 6 bits. def immZExt6 : ImmLeaf; -// Is a 32-bit int. -def immSExt32 : ImmLeaf(Imm);}]>; - -// Transformation Function - get the higher 16 bits. -def HIGHER : SDNodeXFormgetZExtValue() >> 32) & 0xFFFF); -}]>; - -// Transformation Function - get the highest 16 bits. -def HIGHEST : SDNodeXFormgetZExtValue() >> 48) & 0xFFFF); -}]>; - //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// @@ -213,24 +200,6 @@ // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// -// Small immediates -def : Pat<(i64 immSExt16:$in), - (DADDiu ZERO_64, imm:$in)>; -def : Pat<(i64 immZExt16:$in), - (ORi64 ZERO_64, imm:$in)>; -def : Pat<(i64 immLow16Zero:$in), - (LUi64 (HI16 imm:$in))>; - -// 32-bit immediates -def : Pat<(i64 immSExt32:$imm), - (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; - -// Arbitrary immediates -def : Pat<(i64 imm:$imm), - (ORi64 (DSLL (ORi64 (DSLL (ORi64 (LUi64 (HIGHEST imm:$imm)), - (HIGHER imm:$imm)), 16), (HI16 imm:$imm)), 16), - (LO16 imm:$imm))>; - // extended loads let Predicates = [NotN64] in { def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; Modified: llvm/trunk/lib/Target/Mips/MipsISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelDAGToDAG.cpp?rev=148908&r1=148907&r2=148908&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsISelDAGToDAG.cpp Tue Jan 24 21:01:35 2012 @@ -13,6 +13,7 @@ #define DEBUG_TYPE "mips-isel" #include "Mips.h" +#include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" @@ -317,6 +318,47 @@ break; } + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast(Node); + unsigned Size = CN->getValueSizeInBits(0); + + if (Size == 32) + break; + + MipsAnalyzeImmediate AnalyzeImm; + int64_t Imm = CN->getSExtValue(); + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, false); + + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + DebugLoc DL = CN->getDebugLoc(); + SDNode *RegOpnd; + SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + + // The first instruction can be a LUi which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == Mips::LUi64) + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); + else + RegOpnd = + CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + CurDAG->getRegister(Mips::ZERO_64, MVT::i64), + ImmOpnd); + + // The remaining instructions in the sequence are handled here. + for (++Inst; Inst != Seq.end(); ++Inst) { + ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + SDValue(RegOpnd, 0), ImmOpnd); + } + + return RegOpnd; + } + case MipsISD::ThreadPointer: { EVT PtrVT = TLI.getPointerTy(); unsigned RdhwrOpc, SrcReg, DestReg; Modified: llvm/trunk/test/CodeGen/Mips/mips64imm.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/mips64imm.ll?rev=148908&r1=148907&r2=148908&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Mips/mips64imm.ll (original) +++ llvm/trunk/test/CodeGen/Mips/mips64imm.ll Tue Jan 24 21:01:35 2012 @@ -12,7 +12,7 @@ entry: ; CHECK: foo3 ; CHECK: lui $[[R0:[0-9]+]], 4660 -; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136 +; CHECK: daddiu ${{[0-9]+}}, $[[R0]], 22136 ret i64 305419896 } @@ -33,11 +33,20 @@ define i64 @foo9() nounwind readnone { entry: ; CHECK: foo9 -; CHECK: lui $[[R0:[0-9]+]], 4660 -; CHECK: ori $[[R1:[0-9]+]], $[[R0]], 22136 -; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 16 -; CHECK: ori $[[R3:[0-9]+]], $[[R2]], 36882 -; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 16 -; CHECK: ori ${{[0-9]+}}, $[[R4]], 13398 +; CHECK: lui $[[R0:[0-9]+]], 583 +; CHECK: daddiu $[[R1:[0-9]+]], $[[R0]], -30001 +; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 18 +; CHECK: daddiu $[[R3:[0-9]+]], $[[R2]], 18441 +; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 17 +; CHECK: daddiu ${{[0-9]+}}, $[[R4]], 13398 ret i64 1311768467284833366 } + +define i64 @foo10() nounwind readnone { +entry: +; CHECK: foo10 +; CHECK: lui $[[R0:[0-9]+]], 34661 +; CHECK: daddiu ${{[0-9]+}}, $[[R0]], 17185 + ret i64 -8690466096928522240 +} + From geek4civic at gmail.com Tue Jan 24 21:15:47 2012 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Wed, 25 Jan 2012 03:15:47 -0000 Subject: [llvm-commits] [llvm] r148909 - /llvm/trunk/lib/Target/Mips/CMakeLists.txt Message-ID: <20120125031547.354B12A6C12C@llvm.org> Author: chapuni Date: Tue Jan 24 21:15:46 2012 New Revision: 148909 URL: http://llvm.org/viewvc/llvm-project?rev=148909&view=rev Log: Target/Mips: Unbreak CMake build. Modified: llvm/trunk/lib/Target/Mips/CMakeLists.txt Modified: llvm/trunk/lib/Target/Mips/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/CMakeLists.txt?rev=148909&r1=148908&r2=148909&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/CMakeLists.txt (original) +++ llvm/trunk/lib/Target/Mips/CMakeLists.txt Tue Jan 24 21:15:46 2012 @@ -11,6 +11,7 @@ add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen + MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp From nicholas at mxc.ca Tue Jan 24 21:20:12 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 25 Jan 2012 03:20:12 -0000 Subject: [llvm-commits] [llvm] r148910 - /llvm/trunk/lib/VMCore/Constants.cpp Message-ID: <20120125032012.BDE292A6C12C@llvm.org> Author: nicholas Date: Tue Jan 24 21:20:12 2012 New Revision: 148910 URL: http://llvm.org/viewvc/llvm-project?rev=148910&view=rev Log: Fix assert("msg"). Fix unused-variable warnings complaining about VT used only in asserts. Modified: llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148910&r1=148909&r2=148910&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 21:20:12 2012 @@ -1506,8 +1506,9 @@ assert(DstTy->getScalarType()->isIntegerTy() && "PtrToInt destination must be integer or integer vector"); assert(isa(C->getType()) == isa(DstTy)); - if (VectorType *VT = dyn_cast(C->getType())) - assert(VT->getNumElements() == cast(DstTy)->getNumElements() && + if (isa(C->getType())) + assert(cast(C->getType())->getNumElements() == + cast(DstTy)->getNumElements() && "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::PtrToInt, C, DstTy); } @@ -1518,8 +1519,9 @@ assert(DstTy->getScalarType()->isPointerTy() && "IntToPtr destination must be a pointer or pointer vector"); assert(isa(C->getType()) == isa(DstTy)); - if (VectorType *VT = dyn_cast(C->getType())) - assert(VT->getNumElements() == cast(DstTy)->getNumElements() && + if (isa(C->getType())) + assert(cast(C->getType())->getNumElements() == + cast(DstTy)->getNumElements() && "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::IntToPtr, C, DstTy); } @@ -2218,7 +2220,8 @@ const char *EltPtr = getElementPointer(Elt); switch (getElementType()->getTypeID()) { - default: assert("Accessor can only be used when element is float/double!"); + default: + assert(0 && "Accessor can only be used when element is float/double!"); case Type::FloatTyID: return APFloat(*(float*)EltPtr); case Type::DoubleTyID: return APFloat(*(double*)EltPtr); } From geek4civic at gmail.com Tue Jan 24 21:34:41 2012 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Wed, 25 Jan 2012 03:34:41 -0000 Subject: [llvm-commits] [llvm] r148912 - /llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h Message-ID: <20120125033441.D60C22A6C12C@llvm.org> Author: chapuni Date: Tue Jan 24 21:34:41 2012 New Revision: 148912 URL: http://llvm.org/viewvc/llvm-project?rev=148912&view=rev Log: MipsAnalyzeImmediate.h: Fix to add DataTypes.h for msvc. inttypes.h is not supplied in msvc. Modified: llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h Modified: llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h?rev=148912&r1=148911&r2=148912&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h (original) +++ llvm/trunk/lib/Target/Mips/MipsAnalyzeImmediate.h Tue Jan 24 21:34:41 2012 @@ -10,6 +10,7 @@ #define MIPS_ANALYZE_IMMEDIATE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataTypes.h" namespace llvm { From rafael.espindola at gmail.com Tue Jan 24 21:39:26 2012 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Wed, 25 Jan 2012 03:39:26 -0000 Subject: [llvm-commits] [llvm] r148913 - /llvm/trunk/cmake/modules/HandleLLVMOptions.cmake Message-ID: <20120125033926.CC6E02A6C12C@llvm.org> Author: rafael Date: Tue Jan 24 21:39:26 2012 New Revision: 148913 URL: http://llvm.org/viewvc/llvm-project?rev=148913&view=rev Log: -fvisibility-inlines-hidden is a c++ only option. Thanks to Peter Collingbourne for noticing it. Modified: llvm/trunk/cmake/modules/HandleLLVMOptions.cmake Modified: llvm/trunk/cmake/modules/HandleLLVMOptions.cmake URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/modules/HandleLLVMOptions.cmake?rev=148913&r1=148912&r2=148913&view=diff ============================================================================== --- llvm/trunk/cmake/modules/HandleLLVMOptions.cmake (original) +++ llvm/trunk/cmake/modules/HandleLLVMOptions.cmake Tue Jan 24 21:39:26 2012 @@ -95,7 +95,6 @@ check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG) if( SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility-inlines-hidden") endif() endif() endif() From craig.topper at gmail.com Tue Jan 24 21:52:09 2012 From: craig.topper at gmail.com (Craig Topper) Date: Wed, 25 Jan 2012 03:52:09 -0000 Subject: [llvm-commits] [llvm] r148914 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td Message-ID: <20120125035209.A53572A6C12C@llvm.org> Author: ctopper Date: Tue Jan 24 21:52:09 2012 New Revision: 148914 URL: http://llvm.org/viewvc/llvm-project?rev=148914&view=rev Log: Merge intrinsic pattern and no pattern versions of VCVTSD2SI intruction definitions. Matches non-AVX version of same instructions. Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=148914&r1=148913&r2=148914&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jan 24 21:52:09 2012 @@ -1412,15 +1412,6 @@ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; } -multiclass sse12_cvt_s_np opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { -let neverHasSideEffects = 1 in { - def rr : SI; - let mayLoad = 1 in - def rm : SI; -} // neverHasSideEffects = 1 -} - multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { @@ -1537,21 +1528,11 @@ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } -defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si">, XD, VEX; -defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, - XD, VEX, VEX_W; - -// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ -// Get rid of this hack or rename the intrinsics, there are several -// intructions that only match with the intrinsic form, why create duplicates -// to let them be recognized by the assembler? -defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG; -defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W, - VEX_LIG; +defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si">, XD, VEX, VEX_LIG; +defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, + XD, VEX, VEX_W, VEX_LIG; defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, f128mem, load, "cvtsd2si{l}">, XD; From ahatanak at gmail.com Tue Jan 24 21:56:48 2012 From: ahatanak at gmail.com (Akira Hatanaka) Date: Tue, 24 Jan 2012 19:56:48 -0800 Subject: [llvm-commits] [llvm] r148909 - /llvm/trunk/lib/Target/Mips/CMakeLists.txt In-Reply-To: <20120125031547.354B12A6C12C@llvm.org> References: <20120125031547.354B12A6C12C@llvm.org> Message-ID: Thank you, I didn't remember to change CMakeLists. On Tue, Jan 24, 2012 at 7:15 PM, NAKAMURA Takumi wrote: > Author: chapuni > Date: Tue Jan 24 21:15:46 2012 > New Revision: 148909 > > URL: http://llvm.org/viewvc/llvm-project?rev=148909&view=rev > Log: > Target/Mips: Unbreak CMake build. > > Modified: > ? ?llvm/trunk/lib/Target/Mips/CMakeLists.txt > > Modified: llvm/trunk/lib/Target/Mips/CMakeLists.txt > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/CMakeLists.txt?rev=148909&r1=148908&r2=148909&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/Mips/CMakeLists.txt (original) > +++ llvm/trunk/lib/Target/Mips/CMakeLists.txt Tue Jan 24 21:15:46 2012 > @@ -11,6 +11,7 @@ > ?add_public_tablegen_target(MipsCommonTableGen) > > ?add_llvm_target(MipsCodeGen > + ?MipsAnalyzeImmediate.cpp > ? MipsAsmPrinter.cpp > ? MipsCodeEmitter.cpp > ? MipsDelaySlotFiller.cpp > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From ahatanaka at mips.com Tue Jan 24 21:55:10 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Wed, 25 Jan 2012 03:55:10 -0000 Subject: [llvm-commits] [llvm] r148916 - /llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Message-ID: <20120125035510.B69DB2A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 21:55:10 2012 New Revision: 148916 URL: http://llvm.org/viewvc/llvm-project?rev=148916&view=rev Log: Modify MipsRegisterInfo::eliminateFrameIndex to use MipsAnalyzeImmediate to expand offsets that do not fit in the 16-bit immediate field of load and store instructions. Also change the types of variables so that they are sufficiently large to handle 64-bit pointers. Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp?rev=148916&r1=148915&r2=148916&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Tue Jan 24 21:55:10 2012 @@ -14,6 +14,7 @@ #define DEBUG_TYPE "mips-reg-info" #include "Mips.h" +#include "MipsAnalyzeImmediate.h" #include "MipsSubtarget.h" #include "MipsRegisterInfo.h" #include "MipsMachineFunction.h" @@ -168,8 +169,8 @@ errs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); - int stackSize = MF.getFrameInfo()->getStackSize(); - int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + uint64_t stackSize = MF.getFrameInfo()->getStackSize(); + int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" @@ -205,13 +206,13 @@ // - If the frame object is any of the following, its offset must be adjusted // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. - int Offset; + int64_t Offset; if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex)) Offset = spOffset; else - Offset = spOffset + stackSize; + Offset = spOffset + (int64_t)stackSize; Offset += MI.getOperand(i+1).getImm(); @@ -219,20 +220,41 @@ // If MI is not a debug value, make sure Offset fits in the 16-bit immediate // field. - if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) { + if (!MI.isDebugValue() && !isInt<16>(Offset)) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); - int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) + - ((Offset & 0x8000) != 0); + MipsAnalyzeImmediate AnalyzeImm; + unsigned Size = Subtarget.isABI_N64() ? 64 : 32; + unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi; + unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); // FIXME: change this when mips goes MC". BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); - BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi); - BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg) - .addReg(Mips::AT); - FrameReg = Mips::AT; - Offset = (short)(Offset & 0xffff); + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, TII.get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq except for the last one. + for (++Inst; Inst != Seq.end() - 1; ++Inst) + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + + FrameReg = ATReg; + Offset = SignExtend64<16>(Inst->ImmOpnd); BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO)); } From ahatanaka at mips.com Tue Jan 24 22:12:05 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Wed, 25 Jan 2012 04:12:05 -0000 Subject: [llvm-commits] [llvm] r148917 - in /llvm/trunk: lib/Target/Mips/MipsFrameLowering.cpp test/CodeGen/Mips/largeimmprinting.ll Message-ID: <20120125041205.2E6DA2A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 22:12:04 2012 New Revision: 148917 URL: http://llvm.org/viewvc/llvm-project?rev=148917&view=rev Log: Modify MipsFrameLowering::emitPrologue and emitEpilogue. - Use MipsAnalyzeImmediate to expand immediates that do not fit in 16-bit. - Change the types of variables so that they are sufficiently large to handle 64-bit pointers. - Emit instructions to set register $28 in a function prologue after instructions which store callee-saved registers have been emitted. Modified: llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp llvm/trunk/test/CodeGen/Mips/largeimmprinting.ll Modified: llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp?rev=148917&r1=148916&r2=148917&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp Tue Jan 24 22:12:04 2012 @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsAnalyzeImmediate.h" #include "MipsFrameLowering.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" @@ -93,47 +94,40 @@ return true; } -static unsigned AlignOffset(unsigned Offset, unsigned Align) { - return (Offset + Align - 1) / Align * Align; -} - -// expand pair of register and immediate if the immediate doesn't fit in the -// 16-bit offset field. -// e.g. -// if OrigImm = 0x10000, OrigReg = $sp: -// generate the following sequence of instrs: -// lui $at, hi(0x10000) -// addu $at, $sp, $at -// -// (NewReg, NewImm) = ($at, lo(Ox10000)) -// return true -static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm, - unsigned& NewReg, int& NewImm, - MachineBasicBlock& MBB, - MachineBasicBlock::iterator I) { - // OrigImm fits in the 16-bit field - if (OrigImm < 0x8000 && OrigImm >= -0x8000) { - NewReg = OrigReg; - NewImm = OrigImm; - return false; - } - - MachineFunction* MF = MBB.getParent(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - DebugLoc DL = I->getDebugLoc(); - int ImmLo = (short)(OrigImm & 0xffff); - int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + - ((OrigImm & 0x8000) != 0); +// Build an instruction sequence to load an immediate that is too large to fit +// in 16-bit and add the result to Reg. +static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, + const MipsInstrInfo &TII, MachineBasicBlock& MBB, + MachineBasicBlock::iterator II, DebugLoc DL) { + unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; + unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu; + unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; + MipsAnalyzeImmediate AnalyzeImm; + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); // FIXME: change this when mips goes MC". - BuildMI(MBB, I, DL, TII->get(Mips::NOAT)); - BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi); - BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg) - .addReg(Mips::AT); - NewReg = Mips::AT; - NewImm = ImmLo; + BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); - return true; + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, TII.get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq. + for (++Inst; Inst != Seq.end(); ++Inst) + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg); + BuildMI(MBB, II, DL, TII.get(Mips::ATMACRO)); } void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -142,14 +136,12 @@ MipsFunctionInfo *MipsFI = MF.getInfo(); const MipsRegisterInfo *RegInfo = static_cast(MF.getTarget().getRegisterInfo()); + MachineRegisterInfo& MRI = MF.getRegInfo(); const MipsInstrInfo &TII = *static_cast(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); - unsigned NewReg = 0; - int NewImm = 0; - bool ATUsed; unsigned GP = STI.isABI_N64() ? Mips::GP_64 : Mips::GP; unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; @@ -165,33 +157,23 @@ unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ? (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) : MipsFI->getMaxCallFrameSize(); - unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) + - AlignOffset(MFI->getStackSize(), StackAlign); + uint64_t StackSize = RoundUpToAlignment(LocalVarAreaOffset, StackAlign) + + RoundUpToAlignment(MFI->getStackSize(), StackAlign); // Update stack size MFI->setStackSize(StackSize); BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // Emit instructions that set $gp using the the value of $t9. // O32 uses the directive .cpload while N32/64 requires three instructions to // do this. // TODO: Do not emit these instructions if no instructions use $gp. if (isPIC && STI.isABI_O32()) - BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)) .addReg(RegInfo->getPICCallReg()); - else if (STI.isABI_N64() || (isPIC && STI.isABI_N32())) { - // lui $28,%hi(%neg(%gp_rel(fname))) - // addu $28,$28,$25 - // addiu $28,$28,%lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, MBBI, dl, TII.get(LUi), GP) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, MBBI, dl, TII.get(ADDu), GP).addReg(GP).addReg(T9); - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), GP).addReg(GP) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - } + + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; @@ -200,13 +182,11 @@ std::vector &Moves = MMI.getFrameMoves(); MachineLocation DstML, SrcML; - // Adjust stack : addi sp, sp, (-imm) - ATUsed = expandRegLargeImmPair(SP, -StackSize, NewReg, NewImm, MBB, MBBI); - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); - - // FIXME: change this when mips goes MC". - if (ATUsed) - BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); + // Adjust stack. + if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize) + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); + else // Expand immediate that doesn't fit in 16-bit. + expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); // emit ".cfi_def_cfa_offset StackSize" MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); @@ -259,6 +239,21 @@ } } + if ((STI.isABI_N64() || (isPIC && STI.isABI_N32())) && + MRI.isPhysRegUsed(GP)) { + // lui $28,%hi(%neg(%gp_rel(fname))) + // addu $28,$28,$25 + // addiu $28,$28,%lo(%neg(%gp_rel(fname))) + MachineBasicBlock::iterator InsPos = llvm::prior(MBBI); + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, MBBI, dl, TII.get(LUi), GP) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), GP).addReg(GP).addReg(T9); + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), GP).addReg(GP) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + MBBI = ++InsPos; + } + // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. @@ -298,13 +293,6 @@ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - // Get the number of bytes from FrameInfo - unsigned StackSize = MFI->getStackSize(); - - unsigned NewReg = 0; - int NewImm = 0; - bool ATUsed = false; - // if framepointer enabled, restore the stack pointer. if (hasFP(MF)) { // Find the first instruction that restores a callee-saved register. @@ -317,15 +305,17 @@ BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); } - // adjust stack : insert addi sp, sp, (imm) - if (StackSize) { - ATUsed = expandRegLargeImmPair(SP, StackSize, NewReg, NewImm, MBB, MBBI); - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); - - // FIXME: change this when mips goes MC". - if (ATUsed) - BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); - } + // Get the number of bytes from FrameInfo + uint64_t StackSize = MFI->getStackSize(); + + if (!StackSize) + return; + + // Adjust stack. + if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize) + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize); + else // Expand immediate that doesn't fit in 16-bit. + expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); } void MipsFrameLowering:: Modified: llvm/trunk/test/CodeGen/Mips/largeimmprinting.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/largeimmprinting.ll?rev=148917&r1=148916&r2=148917&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Mips/largeimmprinting.ll (original) +++ llvm/trunk/test/CodeGen/Mips/largeimmprinting.ll Tue Jan 24 22:12:04 2012 @@ -7,8 +7,8 @@ define void @f() nounwind { entry: ; CHECK: lui $at, 65534 -; CHECK: addu $at, $sp, $at -; CHECK: addiu $sp, $at, -24 +; CHECK: addiu $at, $at, -24 +; CHECK: addu $sp, $sp, $at ; CHECK: .cprestore 65536 %agg.tmp = alloca %struct.S1, align 1 From ahatanaka at mips.com Tue Jan 24 22:19:23 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Wed, 25 Jan 2012 04:19:23 -0000 Subject: [llvm-commits] [llvm] r148918 - /llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp Message-ID: <20120125041923.26A382A6C12C@llvm.org> Author: ahatanak Date: Tue Jan 24 22:19:22 2012 New Revision: 148918 URL: http://llvm.org/viewvc/llvm-project?rev=148918&view=rev Log: Mark 64-bit register RA_64 unused too. Modified: llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp Modified: llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp?rev=148918&r1=148917&r2=148918&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsFrameLowering.cpp Tue Jan 24 22:19:22 2012 @@ -322,7 +322,6 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineRegisterInfo& MRI = MF.getRegInfo(); - unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // FIXME: remove this code if register allocator can correctly mark @@ -338,7 +337,9 @@ // To correct this, $ra is explicitly marked unused if there is no // function call. if (MF.getFrameInfo()->hasCalls()) - MRI.setPhysRegUsed(RA); - else - MRI.setPhysRegUnused(RA); + MRI.setPhysRegUsed(Mips::RA); + else { + MRI.setPhysRegUnused(Mips::RA); + MRI.setPhysRegUnused(Mips::RA_64); + } } From craig.topper at gmail.com Tue Jan 24 22:42:03 2012 From: craig.topper at gmail.com (Craig Topper) Date: Wed, 25 Jan 2012 04:42:03 -0000 Subject: [llvm-commits] [llvm] r148922 - in /llvm/trunk: include/llvm/IntrinsicsX86.td lib/Target/X86/X86InstrSSE.td Message-ID: <20120125044203.CEFED2A6C12C@llvm.org> Author: ctopper Date: Tue Jan 24 22:42:03 2012 New Revision: 148922 URL: http://llvm.org/viewvc/llvm-project?rev=148922&view=rev Log: Remove AVX 256-bit unaligned load intrinsics. 128-bit versions had been removed a while ago. Modified: llvm/trunk/include/llvm/IntrinsicsX86.td llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/include/llvm/IntrinsicsX86.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsX86.td?rev=148922&r1=148921&r2=148922&view=diff ============================================================================== --- llvm/trunk/include/llvm/IntrinsicsX86.td (original) +++ llvm/trunk/include/llvm/IntrinsicsX86.td Tue Jan 24 22:42:03 2012 @@ -1300,12 +1300,6 @@ // SIMD load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_loadu_pd_256 : GCCBuiltin<"__builtin_ia32_loadupd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>; - def int_x86_avx_loadu_ps_256 : GCCBuiltin<"__builtin_ia32_loadups256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; - def int_x86_avx_loadu_dq_256 : GCCBuiltin<"__builtin_ia32_loaddqu256">, - Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">, Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>; } Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=148922&r1=148921&r2=148922&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jan 24 22:42:03 2012 @@ -839,11 +839,8 @@ } -def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), (VMOVUPSYmr addr:$dst, VR256:$src)>; - -def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; @@ -3461,7 +3458,6 @@ } // ExeDomain = SSEPackedInt let Predicates = [HasAVX] in { - def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), (VMOVDQUYmr addr:$dst, VR256:$src)>; } From sabre at nondot.org Tue Jan 24 23:07:56 2012 From: sabre at nondot.org (Chris Lattner) Date: Tue, 24 Jan 2012 21:07:56 -0800 Subject: [llvm-commits] [llvm] r148901 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp In-Reply-To: References: <20120125015359.590F12A6C12C@llvm.org> Message-ID: <646D4BB4-3D0D-4362-A3BC-33A511BFCC80@nondot.org> Thanks Argyrios. Somehow I must have tested before rebuilding. Not sure how I could done that :) -Chris On Jan 24, 2012, at 6:47 PM, Argyrios Kyrtzidis wrote: > I reverted it in r148906 because it crashes tests, see > http://lab.llvm.org:8011/builders/llvm-x86_64-linux/builds/2635 > > -Argyrios > > On Jan 24, 2012, at 5:53 PM, Chris Lattner wrote: > >> Author: lattner >> Date: Tue Jan 24 19:53:58 2012 >> New Revision: 148901 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148901&view=rev >> Log: >> Introduce a new ConstantVector::getSplat constructor function to >> simplify a really common case. >> >> Modified: >> llvm/trunk/include/llvm/Constants.h >> llvm/trunk/lib/VMCore/Constants.cpp >> >> Modified: llvm/trunk/include/llvm/Constants.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148901&r1=148900&r2=148901&view=diff >> ============================================================================== >> --- llvm/trunk/include/llvm/Constants.h (original) >> +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 19:53:58 2012 >> @@ -489,6 +489,10 @@ >> // ConstantVector accessors >> static Constant *get(ArrayRef V); >> >> + /// getSplat - Return a ConstantVector with the specified constant in each >> + /// element. >> + static Constant *getSplat(unsigned NumElts, Constant *Elt); >> + >> /// Transparently provide more efficient getOperand methods. >> DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); >> >> @@ -757,6 +761,11 @@ >> static Constant *get(LLVMContext &Context, ArrayRef Elts); >> static Constant *get(LLVMContext &Context, ArrayRef Elts); >> >> + /// getSplat - Return a ConstantVector with the specified constant in each >> + /// element. The specified constant has to be a of a compatible type (i8/i16/ >> + /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. >> + static Constant *getSplat(unsigned NumElts, Constant *Elt); >> + >> /// getType - Specialize the getType() method to always return a VectorType, >> /// which reduces the amount of casting needed in parts of the compiler. >> /// >> >> Modified: llvm/trunk/lib/VMCore/Constants.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148901&r1=148900&r2=148901&view=diff >> ============================================================================== >> --- llvm/trunk/lib/VMCore/Constants.cpp (original) >> +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 19:53:58 2012 >> @@ -129,7 +129,7 @@ >> >> // Broadcast a scalar to a vector, if necessary. >> if (VectorType *VTy = dyn_cast(Ty)) >> - C = ConstantVector::get(std::vector(VTy->getNumElements(), C)); >> + C = ConstantVector::getSplat(VTy->getNumElements(), C); >> >> return C; >> } >> @@ -145,11 +145,9 @@ >> return ConstantFP::get(Ty->getContext(), FL); >> } >> >> - SmallVector Elts; >> VectorType *VTy = cast(Ty); >> - Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType())); >> - assert(Elts[0] && "Invalid AllOnes value!"); >> - return cast(ConstantVector::get(Elts)); >> + return ConstantVector::getSplat(VTy->getNumElements(), >> + getAllOnesValue(VTy->getElementType())); >> } >> >> void Constant::destroyConstantImpl() { >> @@ -394,9 +392,8 @@ >> } >> assert(VTy->getElementType()->isIntegerTy(1) && >> "True must be vector of i1 or i1."); >> - SmallVector Splat(VTy->getNumElements(), >> - ConstantInt::getTrue(Ty->getContext())); >> - return ConstantVector::get(Splat); >> + return ConstantVector::getSplat(VTy->getNumElements(), >> + ConstantInt::getTrue(Ty->getContext())); >> } >> >> Constant *ConstantInt::getFalse(Type *Ty) { >> @@ -407,9 +404,8 @@ >> } >> assert(VTy->getElementType()->isIntegerTy(1) && >> "False must be vector of i1 or i1."); >> - SmallVector Splat(VTy->getNumElements(), >> - ConstantInt::getFalse(Ty->getContext())); >> - return ConstantVector::get(Splat); >> + return ConstantVector::getSplat(VTy->getNumElements(), >> + ConstantInt::getFalse(Ty->getContext())); >> } >> >> >> @@ -433,8 +429,7 @@ >> >> // For vectors, broadcast the value. >> if (VectorType *VTy = dyn_cast(Ty)) >> - return ConstantVector::get(SmallVector> - 16>(VTy->getNumElements(), C)); >> + return ConstantVector::getSplat(VTy->getNumElements(), C); >> >> return C; >> } >> @@ -459,8 +454,7 @@ >> >> // For vectors, broadcast the value. >> if (VectorType *VTy = dyn_cast(Ty)) >> - return ConstantVector::get( >> - SmallVector(VTy->getNumElements(), C)); >> + return ConstantVector::getSplat(VTy->getNumElements(), C); >> >> return C; >> } >> @@ -506,8 +500,7 @@ >> >> // For vectors, broadcast the value. >> if (VectorType *VTy = dyn_cast(Ty)) >> - return ConstantVector::get( >> - SmallVector(VTy->getNumElements(), C)); >> + return ConstantVector::getSplat(VTy->getNumElements(), C); >> >> return C; >> } >> @@ -521,8 +514,7 @@ >> >> // For vectors, broadcast the value. >> if (VectorType *VTy = dyn_cast(Ty)) >> - return ConstantVector::get( >> - SmallVector(VTy->getNumElements(), C)); >> + return ConstantVector::getSplat(VTy->getNumElements(), C); >> >> return C; >> } >> @@ -537,15 +529,12 @@ >> >> >> Constant *ConstantFP::getZeroValueForNegation(Type* Ty) { >> - if (VectorType *PTy = dyn_cast(Ty)) >> - if (PTy->getElementType()->isFloatingPointTy()) { >> - SmallVector zeros(PTy->getNumElements(), >> - getNegativeZero(PTy->getElementType())); >> - return ConstantVector::get(zeros); >> - } >> - >> - if (Ty->isFloatingPointTy()) >> - return getNegativeZero(Ty); >> + if (Ty->getScalarType()->isFloatingPointTy()) { >> + Constant *C = getNegativeZero(Ty); >> + if (VectorType *VTy = dyn_cast(Ty)) >> + return ConstantVector::getSplat(VTy->getNumElements(), C); >> + return C; >> + } >> >> return Constant::getNullValue(Ty); >> } >> @@ -818,6 +807,12 @@ >> return pImpl->VectorConstants.getOrCreate(T, V); >> } >> >> +Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) { >> + SmallVector Elts(NumElts, V); >> + return get(Elts); >> +} >> + >> + >> // Utility function for determining if a ConstantExpr is a CastOp or not. This >> // can't be inline because we don't want to #include Instruction.h into >> // Constant.h >> @@ -2194,6 +2189,38 @@ >> return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); >> } >> >> +Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { >> + assert(isElementTypeCompatible(V->getType()) && >> + "Element type not compatible with ConstantData"); >> + if (ConstantInt *CI = dyn_cast(V)) { >> + if (CI->getType()->isIntegerTy(8)) { >> + SmallVector Elts(NumElts, CI->getZExtValue()); >> + return get(V->getContext(), Elts); >> + } >> + if (CI->getType()->isIntegerTy(16)) { >> + SmallVector Elts(NumElts, CI->getZExtValue()); >> + return get(V->getContext(), Elts); >> + } >> + if (CI->getType()->isIntegerTy(32)) { >> + SmallVector Elts(NumElts, CI->getZExtValue()); >> + return get(V->getContext(), Elts); >> + } >> + assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type"); >> + SmallVector Elts(NumElts, CI->getZExtValue()); >> + return get(V->getContext(), Elts); >> + } >> + >> + ConstantFP *CFP = cast(V); >> + if (CFP->getType()->isFloatTy()) { >> + SmallVector Elts(NumElts, CFP->getValueAPF().convertToFloat()); >> + return get(V->getContext(), Elts); >> + } >> + assert(CFP->getType()->isDoubleTy() && "Unsupported ConstantData type"); >> + SmallVector Elts(NumElts, CFP->getValueAPF().convertToDouble()); >> + return get(V->getContext(), Elts); >> +} >> + >> + >> /// getElementAsInteger - If this is a sequential container of integers (of >> /// any size), return the specified element in the low bits of a uint64_t. >> uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From sabre at nondot.org Tue Jan 24 23:19:54 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 05:19:54 -0000 Subject: [llvm-commits] [llvm] r148924 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120125051954.648192A6C12C@llvm.org> Author: lattner Date: Tue Jan 24 23:19:54 2012 New Revision: 148924 URL: http://llvm.org/viewvc/llvm-project?rev=148924&view=rev Log: reapply r148901 with a crucial fix. "Introduce a new ConstantVector::getSplat constructor function to simplify a really common case." Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148924&r1=148923&r2=148924&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Tue Jan 24 23:19:54 2012 @@ -489,6 +489,10 @@ // ConstantVector accessors static Constant *get(ArrayRef V); + /// getSplat - Return a ConstantVector with the specified constant in each + /// element. + static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -757,6 +761,11 @@ static Constant *get(LLVMContext &Context, ArrayRef Elts); static Constant *get(LLVMContext &Context, ArrayRef Elts); + /// getSplat - Return a ConstantVector with the specified constant in each + /// element. The specified constant has to be a of a compatible type (i8/i16/ + /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. + static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// getType - Specialize the getType() method to always return a VectorType, /// which reduces the amount of casting needed in parts of the compiler. /// Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148924&r1=148923&r2=148924&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Tue Jan 24 23:19:54 2012 @@ -129,7 +129,7 @@ // Broadcast a scalar to a vector, if necessary. if (VectorType *VTy = dyn_cast(Ty)) - C = ConstantVector::get(std::vector(VTy->getNumElements(), C)); + C = ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -145,11 +145,9 @@ return ConstantFP::get(Ty->getContext(), FL); } - SmallVector Elts; VectorType *VTy = cast(Ty); - Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType())); - assert(Elts[0] && "Invalid AllOnes value!"); - return cast(ConstantVector::get(Elts)); + return ConstantVector::getSplat(VTy->getNumElements(), + getAllOnesValue(VTy->getElementType())); } void Constant::destroyConstantImpl() { @@ -394,9 +392,8 @@ } assert(VTy->getElementType()->isIntegerTy(1) && "True must be vector of i1 or i1."); - SmallVector Splat(VTy->getNumElements(), - ConstantInt::getTrue(Ty->getContext())); - return ConstantVector::get(Splat); + return ConstantVector::getSplat(VTy->getNumElements(), + ConstantInt::getTrue(Ty->getContext())); } Constant *ConstantInt::getFalse(Type *Ty) { @@ -407,9 +404,8 @@ } assert(VTy->getElementType()->isIntegerTy(1) && "False must be vector of i1 or i1."); - SmallVector Splat(VTy->getNumElements(), - ConstantInt::getFalse(Ty->getContext())); - return ConstantVector::get(Splat); + return ConstantVector::getSplat(VTy->getNumElements(), + ConstantInt::getFalse(Ty->getContext())); } @@ -433,8 +429,7 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get(SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -459,8 +454,7 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get( - SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -506,8 +500,7 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get( - SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } @@ -521,31 +514,28 @@ // For vectors, broadcast the value. if (VectorType *VTy = dyn_cast(Ty)) - return ConstantVector::get( - SmallVector(VTy->getNumElements(), C)); + return ConstantVector::getSplat(VTy->getNumElements(), C); return C; } -ConstantFP* ConstantFP::getNegativeZero(Type* Ty) { +ConstantFP *ConstantFP::getNegativeZero(Type *Ty) { LLVMContext &Context = Ty->getContext(); - APFloat apf = cast (Constant::getNullValue(Ty))->getValueAPF(); + APFloat apf = cast(Constant::getNullValue(Ty))->getValueAPF(); apf.changeSign(); return get(Context, apf); } -Constant *ConstantFP::getZeroValueForNegation(Type* Ty) { - if (VectorType *PTy = dyn_cast(Ty)) - if (PTy->getElementType()->isFloatingPointTy()) { - SmallVector zeros(PTy->getNumElements(), - getNegativeZero(PTy->getElementType())); - return ConstantVector::get(zeros); - } - - if (Ty->isFloatingPointTy()) - return getNegativeZero(Ty); +Constant *ConstantFP::getZeroValueForNegation(Type *Ty) { + Type *ScalarTy = Ty->getScalarType(); + if (ScalarTy->isFloatingPointTy()) { + Constant *C = getNegativeZero(ScalarTy); + if (VectorType *VTy = dyn_cast(Ty)) + return ConstantVector::getSplat(VTy->getNumElements(), C); + return C; + } return Constant::getNullValue(Ty); } @@ -818,6 +808,12 @@ return pImpl->VectorConstants.getOrCreate(T, V); } +Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) { + SmallVector Elts(NumElts, V); + return get(Elts); +} + + // Utility function for determining if a ConstantExpr is a CastOp or not. This // can't be inline because we don't want to #include Instruction.h into // Constant.h @@ -2196,6 +2192,38 @@ return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); } +Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { + assert(isElementTypeCompatible(V->getType()) && + "Element type not compatible with ConstantData"); + if (ConstantInt *CI = dyn_cast(V)) { + if (CI->getType()->isIntegerTy(8)) { + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + if (CI->getType()->isIntegerTy(16)) { + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + if (CI->getType()->isIntegerTy(32)) { + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type"); + SmallVector Elts(NumElts, CI->getZExtValue()); + return get(V->getContext(), Elts); + } + + ConstantFP *CFP = cast(V); + if (CFP->getType()->isFloatTy()) { + SmallVector Elts(NumElts, CFP->getValueAPF().convertToFloat()); + return get(V->getContext(), Elts); + } + assert(CFP->getType()->isDoubleTy() && "Unsupported ConstantData type"); + SmallVector Elts(NumElts, CFP->getValueAPF().convertToDouble()); + return get(V->getContext(), Elts); +} + + /// getElementAsInteger - If this is a sequential container of integers (of /// any size), return the specified element in the low bits of a uint64_t. uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { From craig.topper at gmail.com Tue Jan 24 23:37:32 2012 From: craig.topper at gmail.com (Craig Topper) Date: Wed, 25 Jan 2012 05:37:32 -0000 Subject: [llvm-commits] [llvm] r148927 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86InstrInfo.cpp X86InstrSSE.td Message-ID: <20120125053733.05F7E2A6C12C@llvm.org> Author: ctopper Date: Tue Jan 24 23:37:32 2012 New Revision: 148927 URL: http://llvm.org/viewvc/llvm-project?rev=148927&view=rev Log: Custom lower phadd and phsub intrinsics to target specific nodes. Remove the patterns that are no longer necessary. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148927&r1=148926&r2=148927&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 24 23:37:32 2012 @@ -9302,6 +9302,18 @@ case Intrinsic::x86_avx_hsub_pd_256: return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_phadd_w_128: + case Intrinsic::x86_ssse3_phadd_d_128: + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_avx2_phadd_d: + return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_phsub_w_128: + case Intrinsic::x86_ssse3_phsub_d_128: + case Intrinsic::x86_avx2_phsub_w: + case Intrinsic::x86_avx2_phsub_d: + return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: case Intrinsic::x86_avx2_psllv_d_256: Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=148927&r1=148926&r2=148927&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Jan 24 23:37:32 2012 @@ -719,12 +719,12 @@ { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, - { X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 }, - { X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 }, + { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 }, + { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 }, { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, - { X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 }, + { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 }, { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, - { X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 }, + { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 }, { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, @@ -903,12 +903,12 @@ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, - { X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 }, + { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 }, { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, - { X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 }, - { X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 }, + { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 }, + { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 }, { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, - { X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 }, + { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 }, { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 }, { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 }, { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, @@ -1047,12 +1047,12 @@ { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 }, { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 }, - { X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_32 }, + { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, - { X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_32 }, - { X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_32 }, + { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, + { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 }, { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 }, - { X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_32 }, + { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 }, { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 }, { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 }, { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 }, Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=148927&r1=148926&r2=148927&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jan 24 23:37:32 2012 @@ -5079,6 +5079,28 @@ // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +/// SS3I_binop_rm - Simple SSSE3 bin op +multiclass SS3I_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS38I, + OpSize; + def rm : SS38I, OpSize; +} + /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. multiclass SS3I_binop_rm_int opc, string OpcodeStr, Intrinsic IntId128, bit Is2Addr = 1> { @@ -5118,16 +5140,16 @@ let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", - int_x86_ssse3_phadd_w_128, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", - int_x86_ssse3_phadd_d_128, 0>, VEX_4V; + defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", - int_x86_ssse3_phsub_w_128, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", - int_x86_ssse3_phsub_d_128, 0>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", @@ -5147,16 +5169,16 @@ let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", - int_x86_avx2_phadd_w>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", - int_x86_avx2_phadd_d>, VEX_4V; + defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", - int_x86_avx2_phsub_w>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", - int_x86_avx2_phsub_d>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", @@ -5177,16 +5199,16 @@ // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", - int_x86_ssse3_phadd_w_128>; - defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", - int_x86_ssse3_phadd_d_128>; + defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, + memopv2i64, i128mem>; + defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, + memopv2i64, i128mem>; + defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, + memopv2i64, i128mem>; + defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, + memopv2i64, i128mem>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128>; - defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", - int_x86_ssse3_phsub_w_128>; - defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", - int_x86_ssse3_phsub_d_128>; defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", int_x86_ssse3_phsub_sw_128>; defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", @@ -5216,15 +5238,6 @@ (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; - - def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), - (VPHADDWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), - (VPHADDDrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), - (VPHSUBWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), - (VPHSUBDrr128 VR128:$src1, VR128:$src2)>; } let Predicates = [HasAVX2] in { @@ -5234,15 +5247,6 @@ (VPSIGNWrr256 VR256:$src1, VR256:$src2)>; def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), (VPSIGNDrr256 VR256:$src1, VR256:$src2)>; - - def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)), - (VPHADDWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)), - (VPHADDDrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)), - (VPHSUBWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)), - (VPHSUBDrr256 VR256:$src1, VR256:$src2)>; } let Predicates = [HasSSSE3] in { @@ -5257,15 +5261,6 @@ (PSIGNWrr128 VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (PSIGNDrr128 VR128:$src1, VR128:$src2)>; - - def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDDrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBDrr128 VR128:$src1, VR128:$src2)>; } //===---------------------------------------------------------------------===// From sabre at nondot.org Wed Jan 25 00:02:56 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 06:02:56 -0000 Subject: [llvm-commits] [llvm] r148929 - in /llvm/trunk/lib: CodeGen/SelectionDAG/LegalizeDAG.cpp Target/X86/X86ISelLowering.cpp Transforms/InstCombine/InstCombineMulDivRem.cpp Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Transforms/Scalar/ScalarReplAggregates.cpp VMCore/Instructions.cpp Message-ID: <20120125060257.1828F2A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 00:02:56 2012 New Revision: 148929 URL: http://llvm.org/viewvc/llvm-project?rev=148929&view=rev Log: use ConstantVector::getSplat in a few places. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp llvm/trunk/lib/VMCore/Instructions.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=148929&r1=148928&r2=148929&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Wed Jan 25 00:02:56 2012 @@ -1699,7 +1699,7 @@ // If all elements are constants, create a load from the constant pool. if (isConstant) { - std::vector CV; + SmallVector CV; for (unsigned i = 0, e = NumElems; i != e; ++i) { if (ConstantFPSDNode *V = dyn_cast(Node->getOperand(i))) { Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148929&r1=148928&r2=148929&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 25 00:02:56 2012 @@ -7591,12 +7591,8 @@ Constant *C0 = ConstantVector::get(CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); - SmallVector CV1; - CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); - CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); - Constant *C1 = ConstantVector::get(CV1); + Constant *C1 = ConstantVector::getSplat(2, + ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); // Load the 64-bit value into an XMM register. @@ -7878,15 +7874,14 @@ EVT EltVT = VT; if (VT.isVector()) EltVT = VT.getVectorElementType(); - SmallVector CV; + Constant *C; if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); - CV.assign(2, C); + C = ConstantVector::getSplat(2, + ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); } else { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); - CV.assign(4, C); + C = ConstantVector::getSplat(4, + ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); } - Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -7904,15 +7899,12 @@ EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); } - SmallVector CV; - if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); - CV.assign(NumElts, C); - } else { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); - CV.assign(NumElts, C); - } - Constant *C = ConstantVector::get(CV); + Constant *C; + if (EltVT == MVT::f64) + C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); + else + C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); + C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -10149,9 +10141,7 @@ DAG.getConstant(23, MVT::i32)); ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U)); - - std::vector CV(4, CI); - Constant *C = ConstantVector::get(CV); + Constant *C = ConstantVector::getSplat(4, CI); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp?rev=148929&r1=148928&r2=148929&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Wed Jan 25 00:02:56 2012 @@ -698,7 +698,7 @@ hasNegative = true; if (hasNegative) { - std::vector Elts(VWidth); + SmallVector Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) { if (RHS->isNegative()) Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=148929&r1=148928&r2=148929&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Wed Jan 25 00:02:56 2012 @@ -851,8 +851,8 @@ } // If we changed the constant, return it. - Constant *NewCP = ConstantVector::get(Elts); - return NewCP != CV ? NewCP : 0; + Constant *NewCV = ConstantVector::get(Elts); + return NewCV != CV ? NewCV : 0; } if (ConstantDataVector *CDV = dyn_cast(V)) { Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=148929&r1=148928&r2=148929&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Wed Jan 25 00:02:56 2012 @@ -2157,8 +2157,7 @@ // If the requested value was a vector constant, create it. if (EltTy->isVectorTy()) { unsigned NumElts = cast(EltTy)->getNumElements(); - SmallVector Elts(NumElts, StoreVal); - StoreVal = ConstantVector::get(Elts); + StoreVal = ConstantVector::getSplat(NumElts, StoreVal); } } new StoreInst(StoreVal, EltPtr, MI); Modified: llvm/trunk/lib/VMCore/Instructions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instructions.cpp?rev=148929&r1=148928&r2=148929&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Instructions.cpp (original) +++ llvm/trunk/lib/VMCore/Instructions.cpp Wed Jan 25 00:02:56 2012 @@ -1873,46 +1873,27 @@ BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name, Instruction *InsertBefore) { Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); - return new BinaryOperator(Instruction::FSub, - zero, Op, + return new BinaryOperator(Instruction::FSub, zero, Op, Op->getType(), Name, InsertBefore); } BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd) { Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); - return new BinaryOperator(Instruction::FSub, - zero, Op, + return new BinaryOperator(Instruction::FSub, zero, Op, Op->getType(), Name, InsertAtEnd); } BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, Instruction *InsertBefore) { - Constant *C; - if (VectorType *PTy = dyn_cast(Op->getType())) { - C = Constant::getAllOnesValue(PTy->getElementType()); - C = ConstantVector::get( - std::vector(PTy->getNumElements(), C)); - } else { - C = Constant::getAllOnesValue(Op->getType()); - } - + Constant *C = Constant::getAllOnesValue(Op->getType()); return new BinaryOperator(Instruction::Xor, Op, C, Op->getType(), Name, InsertBefore); } BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd) { - Constant *AllOnes; - if (VectorType *PTy = dyn_cast(Op->getType())) { - // Create a vector of all ones values. - Constant *Elt = Constant::getAllOnesValue(PTy->getElementType()); - AllOnes = ConstantVector::get( - std::vector(PTy->getNumElements(), Elt)); - } else { - AllOnes = Constant::getAllOnesValue(Op->getType()); - } - + Constant *AllOnes = Constant::getAllOnesValue(Op->getType()); return new BinaryOperator(Instruction::Xor, Op, AllOnes, Op->getType(), Name, InsertAtEnd); } From sabre at nondot.org Wed Jan 25 00:16:33 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 06:16:33 -0000 Subject: [llvm-commits] [llvm] r148931 - in /llvm/trunk: include/llvm/Constant.h include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120125061633.1AE782A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 00:16:32 2012 New Revision: 148931 URL: http://llvm.org/viewvc/llvm-project?rev=148931&view=rev Log: constify some methods and add a new Constant::getAggregateElement helper method for the common operation of extracting an element out of a constant aggregate. Modified: llvm/trunk/include/llvm/Constant.h llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constant.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constant.h?rev=148931&r1=148930&r2=148931&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constant.h (original) +++ llvm/trunk/include/llvm/Constant.h Wed Jan 25 00:16:32 2012 @@ -97,6 +97,13 @@ /// constant exprs and other cases we can't handle, we return an empty vector. void getVectorElements(SmallVectorImpl &Elts) const; + /// getAggregateElement - For aggregates (struct/array/vector) return the + /// constant that corresponds to the specified element if possible, or null if + /// not. This can return null if the element index is a ConstantExpr, or if + /// 'this' is a constant expr. + Constant *getAggregateElement(unsigned Elt) const; + Constant *getAggregateElement(Constant *Elt) const; + /// destroyConstant - Called if some element of this constant is no longer /// valid. At this point only other constants may be on the use_list for this /// constant. Any constants on our Use list must also be destroy'd. The Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=148931&r1=148930&r2=148931&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Wed Jan 25 00:16:32 2012 @@ -316,19 +316,19 @@ /// getSequentialElement - If this CAZ has array or vector type, return a zero /// with the right element type. - Constant *getSequentialElement(); + Constant *getSequentialElement() const; /// getStructElement - If this CAZ has struct type, return a zero with the /// right element type for the specified element. - Constant *getStructElement(unsigned Elt); + Constant *getStructElement(unsigned Elt) const; /// getElementValue - Return a zero of the right value for the specified GEP /// index. - Constant *getElementValue(Constant *C); + Constant *getElementValue(Constant *C) const; /// getElementValue - Return a zero of the right value for the specified GEP /// index. - Constant *getElementValue(unsigned Idx); + Constant *getElementValue(unsigned Idx) const; /// Methods for support type inquiry through isa, cast, and dyn_cast: /// @@ -1157,19 +1157,19 @@ /// getSequentialElement - If this Undef has array or vector type, return a /// undef with the right element type. - UndefValue *getSequentialElement(); + UndefValue *getSequentialElement() const; /// getStructElement - If this undef has struct type, return a undef with the /// right element type for the specified element. - UndefValue *getStructElement(unsigned Elt); + UndefValue *getStructElement(unsigned Elt) const; /// getElementValue - Return an undef of the right value for the specified GEP /// index. - UndefValue *getElementValue(Constant *C); + UndefValue *getElementValue(Constant *C) const; /// getElementValue - Return an undef of the right value for the specified GEP /// index. - UndefValue *getElementValue(unsigned Idx); + UndefValue *getElementValue(unsigned Idx) const; virtual void destroyConstant(); Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=148931&r1=148930&r2=148931&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Wed Jan 25 00:16:32 2012 @@ -150,6 +150,39 @@ getAllOnesValue(VTy->getElementType())); } +/// getAggregateElement - For aggregates (struct/array/vector) return the +/// constant that corresponds to the specified element if possible, or null if +/// not. This can return null if the element index is a ConstantExpr, or if +/// 'this' is a constant expr. +Constant *Constant::getAggregateElement(unsigned Elt) const { + if (const ConstantStruct *CS = dyn_cast(this)) + return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0; + + if (const ConstantArray *CA = dyn_cast(this)) + return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0; + + if (const ConstantVector *CV = dyn_cast(this)) + return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0; + + if (const ConstantAggregateZero *CAZ =dyn_cast(this)) + return CAZ->getElementValue(Elt); + + if (const UndefValue *UV = dyn_cast(this)) + return UV->getElementValue(Elt); + + if (const ConstantDataSequential *CDS = dyn_cast(this)) + return CDS->getElementAsConstant(Elt); + return 0; +} + +Constant *Constant::getAggregateElement(Constant *Elt) const { + assert(isa(Elt->getType()) && "Index must be an integer"); + if (ConstantInt *CI = dyn_cast(Elt)) + return getAggregateElement(CI->getZExtValue()); + return 0; +} + + void Constant::destroyConstantImpl() { // When a Constant is destroyed, there may be lingering // references to the constant by other constants in the constant pool. These @@ -594,21 +627,21 @@ /// getSequentialElement - If this CAZ has array or vector type, return a zero /// with the right element type. -Constant *ConstantAggregateZero::getSequentialElement() { +Constant *ConstantAggregateZero::getSequentialElement() const { return Constant::getNullValue( cast(getType())->getElementType()); } /// getStructElement - If this CAZ has struct type, return a zero with the /// right element type for the specified element. -Constant *ConstantAggregateZero::getStructElement(unsigned Elt) { +Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const { return Constant::getNullValue( cast(getType())->getElementType(Elt)); } /// getElementValue - Return a zero of the right value for the specified GEP /// index if we can, otherwise return null (e.g. if C is a ConstantExpr). -Constant *ConstantAggregateZero::getElementValue(Constant *C) { +Constant *ConstantAggregateZero::getElementValue(Constant *C) const { if (isa(getType())) return getSequentialElement(); return getStructElement(cast(C)->getZExtValue()); @@ -616,7 +649,7 @@ /// getElementValue - Return a zero of the right value for the specified GEP /// index. -Constant *ConstantAggregateZero::getElementValue(unsigned Idx) { +Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const { if (isa(getType())) return getSequentialElement(); return getStructElement(Idx); @@ -629,19 +662,19 @@ /// getSequentialElement - If this undef has array or vector type, return an /// undef with the right element type. -UndefValue *UndefValue::getSequentialElement() { +UndefValue *UndefValue::getSequentialElement() const { return UndefValue::get(cast(getType())->getElementType()); } /// getStructElement - If this undef has struct type, return a zero with the /// right element type for the specified element. -UndefValue *UndefValue::getStructElement(unsigned Elt) { +UndefValue *UndefValue::getStructElement(unsigned Elt) const { return UndefValue::get(cast(getType())->getElementType(Elt)); } /// getElementValue - Return an undef of the right value for the specified GEP /// index if we can, otherwise return null (e.g. if C is a ConstantExpr). -UndefValue *UndefValue::getElementValue(Constant *C) { +UndefValue *UndefValue::getElementValue(Constant *C) const { if (isa(getType())) return getSequentialElement(); return getStructElement(cast(C)->getZExtValue()); @@ -649,7 +682,7 @@ /// getElementValue - Return an undef of the right value for the specified GEP /// index. -UndefValue *UndefValue::getElementValue(unsigned Idx) { +UndefValue *UndefValue::getElementValue(unsigned Idx) const { if (isa(getType())) return getSequentialElement(); return getStructElement(Idx); From craig.topper at gmail.com Wed Jan 25 00:43:11 2012 From: craig.topper at gmail.com (Craig Topper) Date: Wed, 25 Jan 2012 06:43:11 -0000 Subject: [llvm-commits] [llvm] r148933 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86InstrFragmentsSIMD.td X86InstrInfo.cpp X86InstrSSE.td Message-ID: <20120125064312.0DA6D2A6C12C@llvm.org> Author: ctopper Date: Wed Jan 25 00:43:11 2012 New Revision: 148933 URL: http://llvm.org/viewvc/llvm-project?rev=148933&view=rev Log: Custom lower PSIGN and PSHUFB intrinsics to their corresponding target specific nodes so we can remove the isel patterns. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148933&r1=148932&r2=148933&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 25 00:43:11 2012 @@ -9342,6 +9342,18 @@ case Intrinsic::x86_avx2_pcmpgt_q: return DAG.getNode(X86ISD::PCMPGT, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_pshuf_b_128: + case Intrinsic::x86_avx2_pshuf_b: + return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_psign_b_128: + case Intrinsic::x86_ssse3_psign_w_128: + case Intrinsic::x86_ssse3_psign_d_128: + case Intrinsic::x86_avx2_psign_b: + case Intrinsic::x86_avx2_psign_w: + case Intrinsic::x86_avx2_psign_d: + return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=148933&r1=148932&r2=148933&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Jan 25 00:43:11 2012 @@ -48,7 +48,7 @@ def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=148933&r1=148932&r2=148933&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Jan 25 00:43:11 2012 @@ -741,10 +741,10 @@ { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, - { X86::PSHUFBrr128, X86::PSHUFBrm128, TB_ALIGN_16 }, - { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, - { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, - { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, + { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 }, + { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 }, + { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 }, + { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 }, { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, @@ -927,10 +927,10 @@ { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, - { X86::VPSHUFBrr128, X86::VPSHUFBrm128, TB_ALIGN_16 }, - { X86::VPSIGNBrr128, X86::VPSIGNBrm128, TB_ALIGN_16 }, - { X86::VPSIGNWrr128, X86::VPSIGNWrm128, TB_ALIGN_16 }, - { X86::VPSIGNDrr128, X86::VPSIGNDrm128, TB_ALIGN_16 }, + { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 }, + { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 }, + { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 }, + { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 }, { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, @@ -1069,10 +1069,10 @@ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 }, { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 }, { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 }, - { X86::VPSHUFBrr256, X86::VPSHUFBrm256, TB_ALIGN_32 }, - { X86::VPSIGNBrr256, X86::VPSIGNBrm256, TB_ALIGN_32 }, - { X86::VPSIGNWrr256, X86::VPSIGNWrm256, TB_ALIGN_32 }, - { X86::VPSIGNDrr256, X86::VPSIGNDrm256, TB_ALIGN_32 }, + { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 }, + { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 }, + { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 }, + { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 }, { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 }, { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 }, { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 }, Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=148933&r1=148932&r2=148933&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Jan 25 00:43:11 2012 @@ -5148,20 +5148,20 @@ memopv2i64, i128mem, 0>, VEX_4V; defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, memopv2i64, i128mem, 0>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", - int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", - int_x86_ssse3_psign_b_128, 0>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", - int_x86_ssse3_psign_w_128, 0>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", - int_x86_ssse3_psign_d_128, 0>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; @@ -5177,20 +5177,20 @@ memopv4i64, i256mem, 0>, VEX_4V; defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, memopv4i64, i256mem, 0>, VEX_4V; + defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", int_x86_avx2_pmadd_ub_sw>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", - int_x86_avx2_pshuf_b>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", - int_x86_avx2_psign_b>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", - int_x86_avx2_psign_w>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", - int_x86_avx2_psign_d>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", int_x86_avx2_pmul_hr_sw>, VEX_4V; @@ -5207,62 +5207,25 @@ memopv2i64, i128mem>; defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, memopv2i64, i128mem>; + defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, + memopv2i64, i128mem>; + defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, + memopv2i64, i128mem>; + defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, + memopv2i64, i128mem>; + defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, + memopv2i64, i128mem>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128>; defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", int_x86_ssse3_phsub_sw_128>; defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", int_x86_ssse3_pmadd_ub_sw_128>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", - int_x86_ssse3_pshuf_b_128>; - defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", - int_x86_ssse3_psign_b_128>; - defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", - int_x86_ssse3_psign_w_128>; - defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", - int_x86_ssse3_psign_d_128>; } defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw_128>; } -let Predicates = [HasAVX] in { - def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (VPSHUFBrr128 VR128:$src, VR128:$mask)>; - def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (VPSHUFBrm128 VR128:$src, addr:$mask)>; - - def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; -} - -let Predicates = [HasAVX2] in { - def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNBrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNDrr256 VR256:$src1, VR256:$src2)>; -} - -let Predicates = [HasSSSE3] in { - def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>; - def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>; - - def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNDrr128 VR128:$src1, VR128:$src2)>; -} - //===---------------------------------------------------------------------===// // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// From sabre at nondot.org Wed Jan 25 00:48:06 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 06:48:06 -0000 Subject: [llvm-commits] [llvm] r148934 - in /llvm/trunk/lib: Analysis/ConstantFolding.cpp Analysis/ValueTracking.cpp Linker/LinkModules.cpp Transforms/IPO/GlobalOpt.cpp Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Message-ID: <20120125064808.285D92A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 00:48:06 2012 New Revision: 148934 URL: http://llvm.org/viewvc/llvm-project?rev=148934&view=rev Log: use Constant::getAggregateElement to simplify a bunch of code. Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp llvm/trunk/lib/Analysis/ValueTracking.cpp llvm/trunk/lib/Linker/LinkModules.cpp llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=148934&r1=148933&r2=148934&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Wed Jan 25 00:48:06 2012 @@ -311,30 +311,20 @@ // not reached. } - if (ConstantArray *CA = dyn_cast(C)) { - uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType()); + if (isa(C) || isa(C) || + isa(C)) { + Type *EltTy = cast(C->getType())->getElementType(); + uint64_t EltSize = TD.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; - for (; Index != CA->getType()->getNumElements(); ++Index) { - if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr, - BytesLeft, TD)) - return false; - if (EltSize >= BytesLeft) - return true; - - Offset = 0; - BytesLeft -= EltSize; - CurPtr += EltSize; - } - return true; - } - - if (ConstantVector *CV = dyn_cast(C)) { - uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType()); - uint64_t Index = ByteOffset / EltSize; - uint64_t Offset = ByteOffset - Index * EltSize; - for (; Index != CV->getType()->getNumElements(); ++Index) { - if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr, + uint64_t NumElts; + if (ArrayType *AT = dyn_cast(C->getType())) + NumElts = AT->getNumElements(); + else + NumElts = cast(C->getType())->getNumElements(); + + for (; Index != NumElts; ++Index) { + if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, BytesLeft, TD)) return false; if (EltSize >= BytesLeft) @@ -346,30 +336,12 @@ } return true; } - - if (ConstantDataSequential *CDS = dyn_cast(C)) { - uint64_t EltSize = CDS->getElementByteSize(); - uint64_t Index = ByteOffset / EltSize; - uint64_t Offset = ByteOffset - Index * EltSize; - for (unsigned e = CDS->getNumElements(); Index != e; ++Index) { - if (!ReadDataFromGlobal(CDS->getElementAsConstant(Index), Offset, CurPtr, - BytesLeft, TD)) - return false; - if (EltSize >= BytesLeft) - return true; - Offset = 0; - BytesLeft -= EltSize; - CurPtr += EltSize; - } - return true; - } - if (ConstantExpr *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::IntToPtr && CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) - return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, - BytesLeft, TD); + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + BytesLeft, TD); } // Otherwise, unknown initializer type. @@ -1010,11 +982,14 @@ ConstantExpr *CE) { if (!CE->getOperand(1)->isNullValue()) return 0; // Do not allow stepping over the value! - - SmallVector Indices(CE->getNumOperands()-2); - for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) - Indices[i-2] = CE->getOperand(i); - return ConstantFoldLoadThroughGEPIndices(C, Indices); + + // Loop over all of the operands, tracking down which value we are + // addressing. + for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { + C = C->getAggregateElement(CE->getOperand(i)); + if (C == 0) return 0; + } + return C; } /// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr @@ -1026,32 +1001,8 @@ // Loop over all of the operands, tracking down which value we are // addressing. for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - ConstantInt *Idx = dyn_cast(Indices[i]); - if (Idx == 0) return 0; - - uint64_t IdxVal = Idx->getZExtValue(); - - if (ConstantStruct *CS = dyn_cast(C)) { - C = CS->getOperand(IdxVal); - } else if (ConstantAggregateZero *CAZ = dyn_cast(C)){ - C = CAZ->getElementValue(Idx); - } else if (UndefValue *UV = dyn_cast(C)) { - C = UV->getElementValue(Idx); - } else if (ConstantArray *CA = dyn_cast(C)) { - if (IdxVal >= CA->getType()->getNumElements()) - return 0; - C = CA->getOperand(IdxVal); - } else if (ConstantDataSequential *CDS=dyn_cast(C)){ - if (IdxVal >= CDS->getNumElements()) - return 0; - C = CDS->getElementAsConstant(IdxVal); - } else if (ConstantVector *CV = dyn_cast(C)) { - if (IdxVal >= CV->getType()->getNumElements()) - return 0; - C = CV->getOperand(IdxVal); - } else { - return 0; - } + C = C->getAggregateElement(Indices[i]); + if (C == 0) return 0; } return C; } Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=148934&r1=148933&r2=148934&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp (original) +++ llvm/trunk/lib/Analysis/ValueTracking.cpp Wed Jan 25 00:48:06 2012 @@ -1493,19 +1493,12 @@ "Not looking at a struct or array?"); assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && "Invalid indices for type?"); - CompositeType *PTy = cast(V->getType()); - if (isa(V)) - return UndefValue::get(ExtractValueInst::getIndexedType(PTy, idx_range)); - if (isa(V)) - return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, - idx_range)); - if (isa(V) || isa(V)) - // Recursively process this constant - return FindInsertedValue(cast(V)->getOperand(idx_range[0]), - idx_range.slice(1), InsertBefore); - if (ConstantDataSequential *CDS = dyn_cast(V)) - return CDS->getElementAsConstant(idx_range[0]); + if (Constant *C = dyn_cast(V)) { + C = C->getAggregateElement(idx_range[0]); + if (C == 0) return 0; + return FindInsertedValue(C, idx_range.slice(1), InsertBefore); + } if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the Modified: llvm/trunk/lib/Linker/LinkModules.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Linker/LinkModules.cpp?rev=148934&r1=148933&r2=148934&view=diff ============================================================================== --- llvm/trunk/lib/Linker/LinkModules.cpp (original) +++ llvm/trunk/lib/Linker/LinkModules.cpp Wed Jan 25 00:48:06 2012 @@ -844,21 +844,10 @@ } static void getArrayElements(Constant *C, SmallVectorImpl &Dest) { - if (ConstantArray *I = dyn_cast(C)) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - Dest.push_back(I->getOperand(i)); - return; - } - - if (ConstantDataSequential *CDS = dyn_cast(C)) { - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) - Dest.push_back(CDS->getElementAsConstant(i)); - return; - } - - ConstantAggregateZero *CAZ = cast(C); - Dest.append(cast(C->getType())->getNumElements(), - CAZ->getSequentialElement()); + unsigned NumElements = cast(C->getType())->getNumElements(); + + for (unsigned i = 0; i != NumElements; ++i) + Dest.push_back(C->getAggregateElement(i)); } void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) { Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=148934&r1=148933&r2=148934&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Wed Jan 25 00:48:06 2012 @@ -276,38 +276,6 @@ return false; } -static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { - ConstantInt *CI = dyn_cast(Idx); - if (!CI) return 0; - unsigned IdxV = CI->getZExtValue(); - - if (ConstantStruct *CS = dyn_cast(Agg)) { - if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); - } else if (ConstantArray *CA = dyn_cast(Agg)) { - if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); - } else if (ConstantVector *CP = dyn_cast(Agg)) { - if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); - } else if (isa(Agg)) { - if (StructType *STy = dyn_cast(Agg->getType())) { - if (IdxV < STy->getNumElements()) - return Constant::getNullValue(STy->getElementType(IdxV)); - } else if (SequentialType *STy = - dyn_cast(Agg->getType())) { - return Constant::getNullValue(STy->getElementType()); - } - } else if (isa(Agg)) { - if (StructType *STy = dyn_cast(Agg->getType())) { - if (IdxV < STy->getNumElements()) - return UndefValue::get(STy->getElementType(IdxV)); - } else if (SequentialType *STy = - dyn_cast(Agg->getType())) { - return UndefValue::get(STy->getElementType()); - } - } - return 0; -} - - /// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This @@ -520,8 +488,7 @@ NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(STy->getContext()), i)); + Constant *In = Init->getAggregateElement(i); assert(In && "Couldn't get element of initializer?"); GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, GlobalVariable::InternalLinkage, @@ -553,8 +520,7 @@ uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { - Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(Init->getContext()), i)); + Constant *In = Init->getAggregateElement(i); assert(In && "Couldn't get element of initializer?"); GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=148934&r1=148933&r2=148934&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Wed Jan 25 00:48:06 2012 @@ -834,59 +834,39 @@ } UndefElts = 0; - if (ConstantVector *CV = dyn_cast(V)) { + + // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential. + if (Constant *C = dyn_cast(V)) { + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts.isAllOnesValue()) + return 0; + Type *EltTy = cast(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); - - std::vector Elts; - for (unsigned i = 0; i != VWidth; ++i) + + SmallVector Elts; + for (unsigned i = 0; i != VWidth; ++i) { if (!DemandedElts[i]) { // If not demanded, set to undef. Elts.push_back(Undef); UndefElts.setBit(i); - } else if (isa(CV->getOperand(i))) { // Already undef. + continue; + } + + Constant *Elt = C->getAggregateElement(i); + if (Elt == 0) return 0; + + if (isa(Elt)) { // Already undef. Elts.push_back(Undef); UndefElts.setBit(i); } else { // Otherwise, defined. - Elts.push_back(CV->getOperand(i)); + Elts.push_back(Elt); } - + } + // If we changed the constant, return it. Constant *NewCV = ConstantVector::get(Elts); - return NewCV != CV ? NewCV : 0; - } - - if (ConstantDataVector *CDV = dyn_cast(V)) { - // Check if this is identity. If so, return 0 since we are not simplifying - // anything. - if (DemandedElts.isAllOnesValue()) - return 0; - - // Simplify to a ConstantVector where the non-demanded elements are undef. - Constant *Undef = UndefValue::get(CDV->getElementType()); - - SmallVector Elts; - for (unsigned i = 0; i != VWidth; ++i) - Elts.push_back(DemandedElts[i] ? CDV->getElementAsConstant(i) : Undef); - UndefElts = DemandedElts ^ EltMask; - return ConstantVector::get(Elts); - - } - - if (ConstantAggregateZero *CAZ = dyn_cast(V)) { - // Check if this is identity. If so, return 0 since we are not simplifying - // anything. - if (DemandedElts.isAllOnesValue()) - return 0; - - // Simplify the CAZ to a ConstantVector where the non-demanded elements are - // set to undef. - Constant *Zero = CAZ->getSequentialElement(); - Constant *Undef = UndefValue::get(Zero->getType()); - SmallVector Elts; - for (unsigned i = 0; i != VWidth; ++i) - Elts.push_back(DemandedElts[i] ? Zero : Undef); - UndefElts = DemandedElts ^ EltMask; - return ConstantVector::get(Elts); + return NewCV != C ? NewCV : 0; } // Limit search depth. From evan.cheng at apple.com Wed Jan 25 00:53:33 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 24 Jan 2012 22:53:33 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <000001ccda35$239f0e10$6add2a30$@org> References: <000001ccda35$239f0e10$6add2a30$@org> Message-ID: Can you confirm that this change is not predicated on OptimizeForSize and it's not designed to trade off speed for code size? I'm pretty sure that's what you mean but I want to be sure. Thanks, Evan On Jan 23, 2012, at 5:11 PM, Zino Benaissa wrote: > Description: > This contribution extends LLVM greedy Register Allocator to optimize for code size when LLVM compiler targets ARM Thumb 2 instruction set. This heuristic favors assigning register R0 through R7 to operands used in instruction that can be encoded in 16 bits (16-bit is allowed only if R0-7 are used). Operands that appear most frequently in a function (and in instructions that qualify) get R0-7 register. > This heuristic is turned on by default and has impact on generated code only if -mthumb compiler switch is used. To turn this heuristic off use -disable-favor-r0-7 feature flag. > > This patch modifies: > 1) The LLVM greedy register allocator located in LLVM/CodeGen directory: To add the new code size heuristic. > 2) The ARM-specific flies located in LLVM/Target/ARM directory: To add the function that determines which instruction can be encoded in 16-bits and a fix to enable the compiler to emit CMN instruction in 16-bits encoding. > 3) The LLVM test suite: fix test/CodeGen/Thumb2/thumb2-cmn.ll test. > > Performance impact: > I focused on ?Os and ?mthumb flags. But observed similar improvement with ?O3 and ?mthumb. Runtime measured on Qualcomm 8660. > Code size: > - SPEC2000 benchmarks between 0 to 0.6% code size reduction (with no noticeable regression). > - EEMBC benchmarks between 0 to 6% reduction (no noticeable regression). Automotive and Networking average about 1% code size reduction and Consumer about 0.5%. > Runtime: > - SPEC2000 between -1% and 6% speed up (Spec2k/ammp 6%) > - EEMBC overall averages faster -1 to 5%. > Modified: > test/CodeGen/Thumb2/thumb2-cmn.ll > include/llvm/Target/TargetInstrInfo.h > include/llvm/CodeGen/LiveInterval.h > lib/Target/ARM/Thumb2SizeReduction.cpp > lib/Target/ARM/ARMBaseInstrInfo.cpp > lib/Target/ARM/ARMBaseInstrInfo.h > lib/CodeGen/RegAllocGreedy.cpp > lib/CodeGen/CalcSpillWeights.cpp > > for details see RACodeSize.txt > > Testing: > See ARMTestSuiteResult.txt and ARMSimple-Os-mthumb.txt > Note ?O3 is also completed on X86 and ARM CPUs > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/853e553a/attachment.html From evan.cheng at apple.com Wed Jan 25 01:24:20 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 24 Jan 2012 23:24:20 -0800 Subject: [llvm-commits] [PATH] Properly support ctor / dtor priorities on the targets which support them In-Reply-To: References: Message-ID: <5D2A915E-E6F9-4422-9CA0-A361EBA7088E@apple.com> LGTM Evan On Jan 21, 2012, at 3:16 AM, Anton Korobeynikov wrote: > Hi Bill > >> Could you add the patch? :-) > It was in the original message. Attached now as well. > > -- > With best regards, Anton Korobeynikov > Faculty of Mathematics and Mechanics, Saint Petersburg State University > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From evan.cheng at apple.com Wed Jan 25 01:29:30 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 24 Jan 2012 23:29:30 -0800 Subject: [llvm-commits] [llvm][PATCH - REVISED][Review request] X86 Instruction scheduler for the Intel Atom In-Reply-To: References: Message-ID: <4B708DD1-00D6-43F2-BE8E-808E23604B82@apple.com> Andy, can you review the patch? Thanks, Evan On Jan 23, 2012, at 3:05 PM, Gurd, Preston wrote: > Revision 2: Tests which were failing, when run on an Atom, due to the tests finding a schedule different from what was expected, have been changed to use ?-mcpu=generic? in order to prevent the Atom scheduler from running, so that all ?make check? tests pass. > > From: Gurd, Preston > Sent: Tuesday, January 17, 2012 4:29 PM > To: Evan Cheng > Cc: llvm-commits at cs.uiuc.edu > Subject: [llvm-commits] [llvm][PATCH - REVISED][Review request] X86 Instruction scheduler for the Intel Atom > > The attached patch implements most of an instruction scheduler for the Intel Atom. > > It adds an instruction itinerary to all x86 instructions, giving each a default latency of 1, using the InstrItinClass IIC_DEFAULT. > > It sets specific latencies for Atom for the instructions in files X86InstrCMovSetCC.td, X86InstrArithmetic.td, X86InstrControl.td, and X86InstrShiftRotate.td. The Atom latencies for the remainder of the x86 instructions will be set in subsequent patches. > > It adds a test to verify that the scheduler is working. > > I realize that this patch is kind of large, but please consider that the vast majority of the changes consist only of adding an instruction itinerary class name to an instruction. > > Revision: the patch also changes the scheduling preference to ?Hybrid? for i386 Atom, while leaving x86_64 as ILP. > > Please commit the patch if it seems acceptable. > > Preston > > > From: Evan Cheng [mailto:evan.cheng at apple.com] > Sent: Monday, January 16, 2012 12:01 PM > To: Gurd, Preston > Cc: llvm-commits at cs.uiuc.edu > Subject: Re: [llvm-commits] [llvm][PATCH][Review request] X86 Instruction scheduler for the Intel Atom > > Very nice. One question, I noticed you haven't changed the scheduling preference so x86_64 is still using ILP scheduler while i386 is using register pressure reduction scheduler. Have you tried changing the preference to latency scheduler for Atom? > > Evan > > On Jan 13, 2012, at 3:26 PM, Gurd, Preston wrote: > > > The attached patch implements most of an instruction scheduler for the Intel Atom. > > It adds an instruction itinerary to all x86 instructions, giving each a default latency of 1, using the InstrItinClass IIC_DEFAULT. > > It sets specific latencies for Atom for the instructions in files X86InstrCMovSetCC.td, X86InstrArithmetic.td, X86InstrControl.td, and X86InstrShiftRotate.td. The Atom latencies for the remainder of the x86 instructions will be set in subsequent patches. > > It adds a test to verify that the scheduler is working. > > I realize that this patch is kind of large, but please consider that the vast majority of the changes consist only of adding an instruction itinerary class name to an instruction. > > -- > Preston Gurd > Intel Waterloo > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/e9208258/attachment-0001.html From eli.friedman at gmail.com Wed Jan 25 01:42:16 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Tue, 24 Jan 2012 23:42:16 -0800 Subject: [llvm-commits] [llvm] r148929 - in /llvm/trunk/lib: CodeGen/SelectionDAG/LegalizeDAG.cpp Target/X86/X86ISelLowering.cpp Transforms/InstCombine/InstCombineMulDivRem.cpp Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Transforms/Scalar/Scala Message-ID: On Tue, Jan 24, 2012 at 10:02 PM, Chris Lattner wrote: > Author: lattner > Date: Wed Jan 25 00:02:56 2012 > New Revision: 148929 > > URL: http://llvm.org/viewvc/llvm-project?rev=148929&view=rev > Log: > use ConstantVector::getSplat in a few places. [...] > - ?SmallVector CV1; > - ?CV1.push_back( > - ? ?ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); > - ?CV1.push_back( > - ? ?ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); > - ?Constant *C1 = ConstantVector::get(CV1); > + ?Constant *C1 = ConstantVector::getSplat(2, > + ? ? ? ?ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); > ? SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); This change doesn't appear to preserve the semantics. -Eli From evan.cheng at apple.com Wed Jan 25 01:38:48 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 25 Jan 2012 07:38:48 -0000 Subject: [llvm-commits] [llvm] r148936 - /llvm/trunk/include/llvm/CodeGen/MachineInstr.h Message-ID: <20120125073848.7ED8C2A6C12C@llvm.org> Author: evancheng Date: Wed Jan 25 01:38:48 2012 New Revision: 148936 URL: http://llvm.org/viewvc/llvm-project?rev=148936&view=rev Log: Update hasProperty comment. Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=148936&r1=148935&r2=148936&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Wed Jan 25 01:38:48 2012 @@ -302,9 +302,6 @@ /// The first argument is the property being queried. /// The second argument indicates whether the query should look inside /// instruction bundles. - /// If the third argument is true, than the query can return true when *any* - /// of the bundled instructions has the queried property. If it's false, then - /// this can return true iff *all* of the instructions have the property. bool hasProperty(unsigned Flag, QueryType Type = AnyInBundle) const; /// isVariadic - Return true if this instruction can have a variable number of From chandlerc at gmail.com Wed Jan 25 01:40:15 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Wed, 25 Jan 2012 07:40:15 -0000 Subject: [llvm-commits] [llvm] r148937 - /llvm/trunk/include/llvm-c/Core.h Message-ID: <20120125074015.6279F2A6C12C@llvm.org> Author: chandlerc Date: Wed Jan 25 01:40:15 2012 New Revision: 148937 URL: http://llvm.org/viewvc/llvm-project?rev=148937&view=rev Log: Revert a tiny bit of r148553 which extended LLVM's function attributes to 64-bits, and added a new attribute in bit #32. Specifically, remove this new attribute from the enum used in the C API. It's not yet clear what the best approach is for exposing these new attributes in the C API, and several different proposals are on the table. Until then, we can simply not expose this bit in the API at all. Also, I've reverted a somewhat unrelated change in the same revision which switched from "1 << 31" to "1U << 31" for the top enum. While "1 << 31" is technically undefined behavior, implementations DTRT here. However, MS and -pedantic mode warn about non-'int' type enumerator values. If folks feel strongly about this I can put the 'U' back in, but it seemed best to wait for the proper solution. Modified: llvm/trunk/include/llvm-c/Core.h Modified: llvm/trunk/include/llvm-c/Core.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm-c/Core.h?rev=148937&r1=148936&r2=148937&view=diff ============================================================================== --- llvm/trunk/include/llvm-c/Core.h (original) +++ llvm/trunk/include/llvm-c/Core.h Wed Jan 25 01:40:15 2012 @@ -118,8 +118,12 @@ LLVMStackAlignment = 7<<26, LLVMReturnsTwice = 1 << 29, LLVMUWTable = 1 << 30, - LLVMNonLazyBind = 1U << 31, - LLVMAddressSafety = 1ULL << 32 + LLVMNonLazyBind = 1 << 31 + + // FIXME: This attribute is currently not included in the C API as + // a temporary measure until the API/ABI impact to the C API is understood + // and the path forward agreed upon. + //LLVMAddressSafety = 1ULL << 32 } LLVMAttribute; typedef enum { From chandlerc at google.com Wed Jan 25 01:47:25 2012 From: chandlerc at google.com (Chandler Carruth) Date: Tue, 24 Jan 2012 23:47:25 -0800 Subject: [llvm-commits] fix the MSVC warning in include/llvm-c/Core.h In-Reply-To: References: Message-ID: FYI, I think Nick wanted to chime in on this, but he's kinda busy, so I just wanted to update thread and put it on his radar. Kostya, as a temporary measure, and until we figure out the right long-term solution, in r148937 I've removed this attribute from the C API's enum, but only that enum. AFAICT, this should have no impact on your usage, and just cause trouble for folks mixing ASan and the C API, which seems very unlikely. On Mon, Jan 23, 2012 at 12:01 PM, Eli Friedman wrote: > On Mon, Jan 23, 2012 at 11:02 AM, Kostya Serebryany > wrote: > > My previous change in include/llvm-c/Core.h that introduced 64-bit > > Attributes (r148553) caused a warning > > while building with MSVC. http://llvm.org/bugs/show_bug.cgi?id=11828 > > The following patch fixes the problem (use "static const uint64_t" > instead > > of enum). > > Ok to commit? > > > > --kcc > > > > Index: include/llvm-c/Core.h > > =================================================================== > > --- include/llvm-c/Core.h (revision 148708) > > +++ include/llvm-c/Core.h (working copy) > > @@ -92,7 +92,7 @@ > > /** Used to get the users and usees of a Value. See the llvm::Use > class. */ > > typedef struct LLVMOpaqueUse *LLVMUseRef; > > > > -typedef enum { > > +static const uint64_t > > LLVMZExtAttribute = 1<<0, > > LLVMSExtAttribute = 1<<1, > > LLVMNoReturnAttribute = 1<<2, > > @@ -119,8 +119,8 @@ > > LLVMReturnsTwice = 1 << 29, > > LLVMUWTable = 1 << 30, > > LLVMNonLazyBind = 1U << 31, > > - LLVMAddressSafety = 1ULL << 32 > > -} LLVMAttribute; > > + LLVMAddressSafety = 1ULL << 32; > > +typedef uint64_t LLVMAttribute; > > > > typedef enum { > > /* Terminator Instructions */ > > Hmm... actually, I'm not sure this is okay; it's a > non-binary-compatible change to the C API. Nick seemed to think that such changes were inevitable and not a big deal. I'm not at all sure. I took the conservative move as I mentioned above and restored the C interface to its previous state as we don't need the functionality there anyways. We can figure out the right way to introduce it now w/o warnings or other noise. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120124/6baebc91/attachment.html From nicholas at mxc.ca Wed Jan 25 02:27:40 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 25 Jan 2012 08:27:40 -0000 Subject: [llvm-commits] [llvm] r148941 - in /llvm/trunk: lib/Analysis/InlineCost.cpp test/Transforms/Inline/alloca-bonus.ll Message-ID: <20120125082740.C7BB02A6C12C@llvm.org> Author: nicholas Date: Wed Jan 25 02:27:40 2012 New Revision: 148941 URL: http://llvm.org/viewvc/llvm-project?rev=148941&view=rev Log: Support pointer comparisons against constants, when looking at the inline-cost savings from a pointer argument becoming an alloca. Sometimes callees will even compare a pointer to null and then branch to an otherwise unreachable block! Detect these cases and compute the number of saved instructions, instead of bailing out and reporting no savings. Modified: llvm/trunk/lib/Analysis/InlineCost.cpp llvm/trunk/test/Transforms/Inline/alloca-bonus.ll Modified: llvm/trunk/lib/Analysis/InlineCost.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=148941&r1=148940&r2=148941&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp (original) +++ llvm/trunk/lib/Analysis/InlineCost.cpp Wed Jan 25 02:27:40 2012 @@ -222,6 +222,11 @@ if (!V->getType()->isPointerTy()) return 0; // Not a pointer unsigned Reduction = 0; + // Looking at ICmpInsts will never abort the analysis and return zero, and + // analyzing them is expensive, so save them for last so that we don't do + // extra work that we end up throwing out. + SmallVector ICmpInsts; + SmallVector Worklist; Worklist.push_back(V); do { @@ -271,10 +276,14 @@ case Intrinsic::memmove: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - // SROA can usually chew through these intrinsics. + // SROA can usually chew through these intrinsics. Reduction += InlineConstants::InstrCost; break; } + } else if (ICmpInst *ICI = dyn_cast(I)) { + if (!isa(ICI->getOperand(1))) + return 0; + ICmpInsts.push_back(ICI); } else { // If there is some other strange instruction, we're not going to be // able to do much if we inline this. @@ -283,6 +292,51 @@ } } while (!Worklist.empty()); + while (!ICmpInsts.empty()) { + ICmpInst *ICI = ICmpInsts.pop_back_val(); + + // An icmp pred (alloca, C) becomes true if the predicate is true when + // equal and false otherwise. + bool Result = ICI->isTrueWhenEqual(); + + SmallVector Worklist; + Worklist.push_back(ICI); + do { + Instruction *U = Worklist.pop_back_val(); + Reduction += InlineConstants::InstrCost; + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) { + Instruction *I = dyn_cast(*UI); + if (!I || I->mayHaveSideEffects()) continue; + if (I->getNumOperands() == 1) + Worklist.push_back(I); + if (BinaryOperator *BO = dyn_cast(I)) { + // If BO produces the same value as U, then the other operand is + // irrelevant and we can put it into the Worklist to continue + // deleting dead instructions. If BO produces the same value as the + // other operand, we can delete BO but that's it. + if (Result == true) { + if (BO->getOpcode() == Instruction::Or) + Worklist.push_back(I); + if (BO->getOpcode() == Instruction::And) + Reduction += InlineConstants::InstrCost; + } else { + if (BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor) + Reduction += InlineConstants::InstrCost; + if (BO->getOpcode() == Instruction::And) + Worklist.push_back(I); + } + } + if (BranchInst *BI = dyn_cast(I)) { + BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); + if (BB->getSinglePredecessor()) + Reduction += InlineConstants::InstrCost * BB->size(); + } + } + } while (!Worklist.empty()); + } + return Reduction; } Modified: llvm/trunk/test/Transforms/Inline/alloca-bonus.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/alloca-bonus.ll?rev=148941&r1=148940&r2=148941&view=diff ============================================================================== --- llvm/trunk/test/Transforms/Inline/alloca-bonus.ll (original) +++ llvm/trunk/test/Transforms/Inline/alloca-bonus.ll Wed Jan 25 02:27:40 2012 @@ -42,3 +42,42 @@ call void @llvm.lifetime.start(i64 0, i8* %E) ret void } + +define void @outer3() { +; CHECK: @outer3 +; CHECK-NOT: call void @inner3 + %ptr = alloca i32 + call void @inner3(i32* %ptr, i1 undef) + ret void +} + +define void @inner3(i32 *%ptr, i1 %x) { + %A = icmp eq i32* %ptr, null + %B = and i1 %x, %A + br i1 %A, label %bb.true, label %bb.false +bb.true: + ; This block musn't be counted in the inline cost. + %t1 = load i32* %ptr + %t2 = add i32 %t1, 1 + %t3 = add i32 %t2, 1 + %t4 = add i32 %t3, 1 + %t5 = add i32 %t4, 1 + %t6 = add i32 %t5, 1 + %t7 = add i32 %t6, 1 + %t8 = add i32 %t7, 1 + %t9 = add i32 %t8, 1 + %t10 = add i32 %t9, 1 + %t11 = add i32 %t10, 1 + %t12 = add i32 %t11, 1 + %t13 = add i32 %t12, 1 + %t14 = add i32 %t13, 1 + %t15 = add i32 %t14, 1 + %t16 = add i32 %t15, 1 + %t17 = add i32 %t16, 1 + %t18 = add i32 %t17, 1 + %t19 = add i32 %t18, 1 + %t20 = add i32 %t19, 1 + ret void +bb.false: + ret void +} From eli.friedman at gmail.com Wed Jan 25 03:04:07 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Wed, 25 Jan 2012 01:04:07 -0800 Subject: [llvm-commits] [llvm] r148941 - in /llvm/trunk: lib/Analysis/InlineCost.cpp test/Transforms/Inline/alloca-bonus.ll In-Reply-To: <20120125082740.C7BB02A6C12C@llvm.org> References: <20120125082740.C7BB02A6C12C@llvm.org> Message-ID: On Wed, Jan 25, 2012 at 12:27 AM, Nick Lewycky wrote: > Author: nicholas > Date: Wed Jan 25 02:27:40 2012 > New Revision: 148941 > > URL: http://llvm.org/viewvc/llvm-project?rev=148941&view=rev > Log: > Support pointer comparisons against constants, when looking at the inline-cost > savings from a pointer argument becoming an alloca. Sometimes callees will even > compare a pointer to null and then branch to an otherwise unreachable block! > Detect these cases and compute the number of saved instructions, instead of > bailing out and reporting no savings. [Comments inlined.] > + ? ? ?} else if (ICmpInst *ICI = dyn_cast(I)) { > + ? ? ? ?if (!isa(ICI->getOperand(1))) > + ? ? ? ? ?return 0; > + ? ? ? ?ICmpInsts.push_back(ICI); You probably want to restrict this to equality comparisons; I don't think we fold relational comparisons between an alloca and a constant in general. > + ? ? ? ?if (BranchInst *BI = dyn_cast(I)) { > + ? ? ? ? ?BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); > + ? ? ? ? ?if (BB->getSinglePredecessor()) > + ? ? ? ? ? ?Reduction += InlineConstants::InstrCost * BB->size(); Shouldn't you use NumBBInsts[BB] rather than BB->size()? -Eli From james.molloy at arm.com Wed Jan 25 03:27:18 2012 From: james.molloy at arm.com (James Molloy) Date: Wed, 25 Jan 2012 09:27:18 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> Message-ID: <000001ccdb43$88c93040$9a5b90c0$@molloy@arm.com> Hi Ana, For what it's worth, I've reviewed this and it looks good to me. Cheers, James -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Ana Pazos Sent: 25 January 2012 00:19 To: 'Anton Korobeynikov' Cc: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Anton, Here is an updated patch for the ARM fused multiply add/sub feature with the relevant suggestions incorporated. In this update: - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. - I kept setting .fpu=neon-vfpv4 code attribute because that is what the assembler understands. The additional changes mentioned in the email discussions I think belong to a separate patch: - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with fast-math flag. - VFPv3/VFPv4/NEON/NEON2 associations with FeatureFP16/FeatureD16. - Support to set -mfpu=neon2 in clang. Do you want this?? Thanks, Ana. -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Anton Korobeynikov Sent: Tuesday, January 24, 2012 12:34 PM To: James Molloy Cc: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub Hi James, > >From my discussions with colleagues we come to a different conclusion - that it is indeed possible to have the NEONv2 vector integer unit without the floating point unit of VFPv4. > We don't think it's possible to have VFPv4 with NEONv1 however. Well... this conflicts with that doc I mentioned. Where is the truth? :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From nicholas at mxc.ca Wed Jan 25 03:43:14 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 25 Jan 2012 09:43:14 -0000 Subject: [llvm-commits] [llvm] r148946 - /llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Message-ID: <20120125094314.9A30F2A6C12C@llvm.org> Author: nicholas Date: Wed Jan 25 03:43:14 2012 New Revision: 148946 URL: http://llvm.org/viewvc/llvm-project?rev=148946&view=rev Log: Gracefully degrade precision in branch probability numbers. Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=148946&r1=148945&r2=148946&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Wed Jan 25 03:43:14 2012 @@ -1466,6 +1466,29 @@ return true; } +/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In +/// the event of overflow, logically-shifts all four inputs right until the +/// multiply fits. +static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, + unsigned &BitsLost) { + BitsLost = 0; + bool Overflow = false; + APInt Result = A.umul_ov(B, Overflow); + if (Overflow) { + APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A); + do { + B = B.lshr(1); + ++BitsLost; + } while (B.ugt(MaxB)); + A = A.lshr(BitsLost); + C = C.lshr(BitsLost); + D = D.lshr(BitsLost); + Result = A * B; + } + return Result; +} + + /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. @@ -1665,32 +1688,64 @@ // we get: // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D. - bool Overflow1 = false, Overflow2 = false, Overflow3 = false; - bool Overflow4 = false, Overflow5 = false, Overflow6 = false; - APInt ProbTrue = A.umul_ov(C, Overflow1); - - APInt Tmp1 = A.umul_ov(D, Overflow2); - APInt Tmp2 = B.umul_ov(C, Overflow3); - APInt Tmp3 = B.umul_ov(D, Overflow4); - APInt Tmp4 = Tmp1.uadd_ov(Tmp2, Overflow5); - APInt ProbFalse = Tmp4.uadd_ov(Tmp3, Overflow6); - - APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse); - ProbTrue = ProbTrue.udiv(GCD); - ProbFalse = ProbFalse.udiv(GCD); - - if (Overflow1 || Overflow2 || Overflow3 || Overflow4 || Overflow5 || - Overflow6) { - DEBUG(dbgs() << "Overflow recomputing branch weight on: " << *PBI - << "when merging with: " << *BI); - PBI->setMetadata(LLVMContext::MD_prof, NULL); - } else { + // In the event of overflow, we want to drop the LSB of the input + // probabilities. + unsigned BitsLost; + + // Ignore overflow result on ProbTrue. + APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost); + + APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost); + if (BitsLost) { + ProbTrue = ProbTrue.lshr(BitsLost*2); + } + + APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost); + if (BitsLost) { + ProbTrue = ProbTrue.lshr(BitsLost*2); + Tmp1 = Tmp1.lshr(BitsLost*2); + } + + APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost); + if (BitsLost) { + ProbTrue = ProbTrue.lshr(BitsLost*2); + Tmp1 = Tmp1.lshr(BitsLost*2); + Tmp2 = Tmp2.lshr(BitsLost*2); + } + + bool Overflow1 = false, Overflow2 = false; + APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1); + APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2); + + if (Overflow1 || Overflow2) { + ProbTrue = ProbTrue.lshr(1); + Tmp1 = Tmp1.lshr(1); + Tmp2 = Tmp2.lshr(1); + Tmp3 = Tmp3.lshr(1); + Tmp4 = Tmp2 + Tmp3; + ProbFalse = Tmp4 + Tmp1; + } + + // The sum of branch weights must fit in 32-bits. + if (ProbTrue.isNegative() && ProbFalse.isNegative()) { + ProbTrue = ProbTrue.lshr(1); + ProbFalse = ProbFalse.lshr(1); + } + + if (ProbTrue != ProbFalse) { + // Normalize the result. + APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse); + ProbTrue = ProbTrue.udiv(GCD); + ProbFalse = ProbFalse.udiv(GCD); + LLVMContext &Context = BI->getContext(); Value *Ops[3]; Ops[0] = BI->getMetadata(LLVMContext::MD_prof)->getOperand(0); Ops[1] = ConstantInt::get(Context, ProbTrue); Ops[2] = ConstantInt::get(Context, ProbFalse); PBI->setMetadata(LLVMContext::MD_prof, MDNode::get(Context, Ops)); + } else { + PBI->setMetadata(LLVMContext::MD_prof, NULL); } } else { PBI->setMetadata(LLVMContext::MD_prof, NULL); From sabre at nondot.org Wed Jan 25 03:56:23 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 09:56:23 -0000 Subject: [llvm-commits] [llvm] r148947 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20120125095623.5E2AA2A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 03:56:22 2012 New Revision: 148947 URL: http://llvm.org/viewvc/llvm-project?rev=148947&view=rev Log: fix a bug I introduced in r148929, this is not a splat! Thanks to Eli for noticing. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148947&r1=148946&r2=148947&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 25 03:56:22 2012 @@ -7591,8 +7591,12 @@ Constant *C0 = ConstantVector::get(CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); - Constant *C1 = ConstantVector::getSplat(2, + SmallVector CV1; + CV1.push_back( ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); + CV1.push_back( + ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); + Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); // Load the 64-bit value into an XMM register. From sabre at nondot.org Wed Jan 25 04:00:37 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 02:00:37 -0800 Subject: [llvm-commits] [llvm] r148929 - in /llvm/trunk/lib: CodeGen/SelectionDAG/LegalizeDAG.cpp Target/X86/X86ISelLowering.cpp Transforms/InstCombine/InstCombineMulDivRem.cpp Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Transforms/Scalar/Scala In-Reply-To: References: Message-ID: <72BC3C41-9ABF-4982-85CC-95CD9663A236@nondot.org> On Jan 24, 2012, at 11:42 PM, Eli Friedman wrote: > On Tue, Jan 24, 2012 at 10:02 PM, Chris Lattner wrote: >> Author: lattner >> Date: Wed Jan 25 00:02:56 2012 >> New Revision: 148929 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148929&view=rev >> Log: >> use ConstantVector::getSplat in a few places. > > [...] > >> - SmallVector CV1; >> - CV1.push_back( >> - ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); >> - CV1.push_back( >> - ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); >> - Constant *C1 = ConstantVector::get(CV1); >> + Constant *C1 = ConstantVector::getSplat(2, >> + ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); >> SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); > > This change doesn't appear to preserve the semantics. Whoa, great catch! Fixed in r148947. Many thanks, -Chris From james.molloy at arm.com Wed Jan 25 04:11:49 2012 From: james.molloy at arm.com (James Molloy) Date: Wed, 25 Jan 2012 10:11:49 -0000 Subject: [llvm-commits] [PATCH] Fix assertion failure in MC Message-ID: <000101ccdb49$c0739e80$415adb80$@molloy@arm.com> Hi, The attached patch fixes an assertion failure in the MC when chains of aliases are used. This was discovered by PlumHall C++ and the testcase was reduced and anonymised from that. The main issue is if an alias points to another alias, there is no guarantee on the order that the MC layer will initialise the values in their MCSymbol representations. Because of this it is possible for an alias to purport to have a null Section field when it shouldn't, and an assertion fires in IsInSection(). The patch fixes this by changing IsInSection() to first follow aliases transitively before returning or asserting. Review requested. Cheers, James -------------- next part -------------- A non-text attachment was scrubbed... Name: isinsection.diff Type: application/octet-stream Size: 1538 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/6b3039d7/attachment.obj From tobias at grosser.es Wed Jan 25 04:12:52 2012 From: tobias at grosser.es (Tobias Grosser) Date: Wed, 25 Jan 2012 11:12:52 +0100 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327438907.11266.134.camel@sapling> References: <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling > <4F1ED40C.80306@grosser.es> <1327421849.11266.69.camel@sapling> <1327438907.11266.134.camel@s apling> Message-ID: <4F1FD5A4.9040301@grosser.es> On 01/24/2012 10:01 PM, Hal Finkel wrote: > I have attached the latest version of my basic-block autovectorization > pass. Nice. > With regard to the non-trivial cycle checking I had mentioned > previously, I implemented the "late abort" solution and made it the > default for cases where the full cycle check would be expensive (for > blocks that have many candidate pairs). For blocks with fewer candidate > pairs, the full cycle check is used. Good. > I believe that I have addressed all concerns raised thus far (except for > the container Value* -> Instruction* type changes, which Tobias said he > would be okay with having changed post commit). If I receive no > objections over the next few days, I'll commit. Alright. > I would like to thank everyone who has provided feedback, many of the > suggestions have proved quite valuable. A final nitpick: > + if (CallInst *C = dyn_cast(I)) { > + if (!isVectorizableIntrinsic(C)) > + return false; > + } else if (LoadInst *L = dyn_cast(I)) { > + // Vectorize simple loads if possbile: > + IsSimpleLoadStore = L->isSimple(); > + if (!IsSimpleLoadStore || NoMemOps) > + return false; > + } else if (StoreInst *S = dyn_cast(I)) { > + // Vectorize simple stores if possbile: > + IsSimpleLoadStore = S->isSimple(); > + if (!IsSimpleLoadStore || NoMemOps) > + return false; > + } else if (CastInst *C = dyn_cast(I)) { > + // We can vectorize casts, but not casts of pointer types, etc. > + if (NoCasts) > + return false; > + > + Type *SrcTy = C->getSrcTy(); > + if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy()) > + return false; > + > + Type *DestTy = C->getDestTy(); > + if (!DestTy->isSingleValueType() || DestTy->isPointerTy()) > + return false; > + } else if (!(I->isBinaryOp() || isa(I) || > + isa(I) || isa(I))) > + return false; You may want to add braces to a single statement branch, if the other branches have also braces. (I think I have seen this happening a couple of times). Cheers Tobi From victor.umansky at intel.com Wed Jan 25 04:19:33 2012 From: victor.umansky at intel.com (Umansky, Victor) Date: Wed, 25 Jan 2012 10:19:33 +0000 Subject: [llvm-commits] Bug fix in double-to-int conversions codegen in AVX: please review Message-ID: Hi The attached patch file includes a fix for the following bug in AVX codegen for double-to-int conversions: * "fptosi" and "fptoui" IR instructions are defined with round-to-zero rounding mode. * Currently for AVX mode for <4xdouble> and <8xdouble> the "VCVTPD2DQ.128" and "VCVTPD2DQ.256" instructions are selected (for "fp_to_sint" DAG node operation ) by AVX codegen. However they use round-to-nearest-even rounding mode. * Consequently, the conversion produces incorrect numbers. The fix is to replace selection of VCVTPD2DQ instructions with VCVTTPD2DQ instructions. The latter use truncate (i.e. round-to-zero) rounding mode. As "fp_to_sint" DAG node operation is used only for lowering of "fptosi" and "fptoui" IR instructions, the fix in X86InstrSSE.td definition file doesn't have an impact on other LLVM flows. The patch includes changes in the .td file, LIT test for the changes and a fix in a legacy LIT test (which produced asm code conflicting with LLVN IR spec). I'd like to commit the fix to the LLVM trunk, and your feedback will be mostly appreciated. Thanks, Victor --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/bb1ca7fc/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: fp_to_int.patch Type: application/octet-stream Size: 1933 bytes Desc: fp_to_int.patch Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/bb1ca7fc/attachment.obj From baldrick at free.fr Wed Jan 25 06:50:12 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 25 Jan 2012 12:50:12 -0000 Subject: [llvm-commits] [zorg] r148954 - /zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Message-ID: <20120125125012.CF0BA2A6C12C@llvm.org> Author: baldrick Date: Wed Jan 25 06:50:12 2012 New Revision: 148954 URL: http://llvm.org/viewvc/llvm-project?rev=148954&view=rev Log: This builder is being run on a machine that doesn't have gcc-4.6 libraries on it. The result is that programs built with gcc-4.6 (which itself is built during the bootstrap) fail to run because those libraries can't be located. They exist of course: inside the just built gcc-4.6's install directory. So add the appropriate paths to LD_LIBRARY_PATH in the environment. Modified: zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Modified: zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py?rev=148954&r1=148953&r2=148954&view=diff ============================================================================== --- zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py (original) +++ zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Wed Jan 25 06:50:12 2012 @@ -13,6 +13,14 @@ cc_settings += [WithProperties('CXX=' + gxx)] return cc_settings +def extractSearchPaths(rc, stdout, stderr): + mapping = {} + for l in stdout.split('\n'): + vals = l.split(': =', 1) + if len(vals) == 2: + mapping['gcc_' + vals[0]] = vals[1] + return mapping + def getDragonEggBootstrapFactory(triple, gcc_repository, extra_languages=[], extra_gcc_configure_args=[], @@ -31,19 +39,19 @@ command=['pwd'], property='builddir', description='set build dir', - workdir='.')) + workdir='.', env=env)) # Checkout LLVM sources. f.addStep(SVN(name='svn-llvm', mode='update', baseURL='http://llvm.org/svn/llvm-project/llvm/', defaultBranch='trunk', - workdir='llvm.src')) + workdir='llvm.src', env=env)) # Checkout DragonEgg sources. f.addStep(SVN(name='svn-dragonegg', mode='update', baseURL='http://llvm.org/svn/llvm-project/dragonegg/', defaultBranch='trunk', - workdir='dragonegg.src')) + workdir='dragonegg.src', env=env)) # Checkout GCC. This is usually a specific known good revision (supplied by # appending @revision to the URL). The SVN step can't handle that. As it @@ -53,9 +61,10 @@ f.addStep(ShellCommand(name='svn-gcc', command=svn_co, haltOnFailure=True, - workdir='.')) + workdir='.', env=env)) # Do the boostrap. + cur_env = env prev_gcc = None # C compiler built during the previous stage. prev_gxx = None # C++ compiler built during the previous stage. prev_plugin = None # Plugin built during the previous stage. @@ -71,26 +80,26 @@ command=['rm', '-rf', gcc_obj_dir], haltOnFailure = True, description=['rm build dir', 'gcc', stage], - workdir='.', env=env)) + workdir='.', env=cur_env)) f.addStep(Configure(name='configure.gcc.%s' % stage, command=(['../gcc.src/configure', WithProperties('--prefix=%%(builddir)s/%s' % gcc_install_dir)] + gcc_configure_args + getCCSetting(prev_gcc, prev_gxx)), haltOnFailure = True, description=['configure', 'gcc', stage], - workdir=gcc_obj_dir, env=env)) + workdir=gcc_obj_dir, env=cur_env)) f.addStep(WarningCountingShellCommand(name = 'compile.gcc.%s' % stage, command = ['nice', '-n', '10', 'make', WithProperties('-j%s' % jobs)], haltOnFailure = True, description=['compile', 'gcc', stage], - workdir=gcc_obj_dir, env=env)) + workdir=gcc_obj_dir, env=cur_env)) f.addStep(WarningCountingShellCommand(name = 'install.gcc.%s' % stage, command = ['nice', '-n', '10', 'make', 'install'], haltOnFailure = True, description=['install', 'gcc', stage], - workdir=gcc_obj_dir, env=env)) + workdir=gcc_obj_dir, env=cur_env)) # From this point on build everything using the just built GCC. prev_gcc = '%(builddir)s/'+gcc_install_dir+'/bin/gcc' @@ -99,11 +108,21 @@ prev_gcc += ' -fplugin=' + prev_plugin prev_gxx += ' -fplugin=' + prev_plugin -# FIXME: The built libstdc++ and libgcc may be more recent than the system versions. -# FIXME: Set the library path so that programs compiled with the just built GCC will -# FIXME: start successfully, rather than failing due to shared library dependencies. -# FIXME: export LD_LIBRARY_PATH=`$CC -print-search-dirs | grep "^libraries:" | \ -# FIXME: sed "s/^libraries: *=//"`:$LD_LIBRARY_PATH + # The built libstdc++ and libgcc may well be more recent than the system + # versions. Set the library path so that programs compiled with the just + # built GCC will start successfully, rather than failing due to missing + # shared library dependencies. + f.addStep(buildbot.steps.shell.SetProperty(name = 'gcc.search.paths.%s' % stage, + command=[WithProperties(prev_gcc), + '-print-search-dirs'], + extract_fn=extractSearchPaths, + description=['gcc', 'search paths', + stage], env=cur_env)) + cur_env = cur_env.copy(); + if 'LD_LIBRARY_PATH' in env: + cur_env['LD_LIBRARY_PATH'] = WithProperties('%(gcc_libraries)s'+':'+env['LD_LIBRARY_PATH']) + else: + cur_env['LD_LIBRARY_PATH'] = WithProperties('%(gcc_libraries)s') # Build LLVM with the just built GCC and install it. llvm_obj_dir = 'llvm.obj.%s' % stage @@ -115,26 +134,26 @@ command=['rm', '-rf', llvm_obj_dir], haltOnFailure = True, description=['rm build dir', 'llvm', stage], - workdir='.', env=env)) + workdir='.', env=cur_env)) f.addStep(Configure(name='configure.llvm.%s' % stage, command=(['../llvm.src/configure', WithProperties('--prefix=%%(builddir)s/%s' % llvm_install_dir)] + llvm_configure_args + getCCSetting(prev_gcc, prev_gxx)), haltOnFailure = True, description=['configure', 'llvm', stage], - workdir=llvm_obj_dir, env=env)) + workdir=llvm_obj_dir, env=cur_env)) f.addStep(WarningCountingShellCommand(name = 'compile.llvm.%s' % stage, command = ['nice', '-n', '10', 'make', WithProperties('-j%s' % jobs)], haltOnFailure = True, description=['compile', 'llvm', stage], - workdir=llvm_obj_dir, env=env)) + workdir=llvm_obj_dir, env=cur_env)) f.addStep(WarningCountingShellCommand(name = 'install.llvm.%s' % stage, command = ['nice', '-n', '10', 'make', 'install'], haltOnFailure = True, description=['install', 'llvm', stage], - workdir=llvm_obj_dir, env=env)) + workdir=llvm_obj_dir, env=cur_env)) # Build dragonegg with the just built LLVM and GCC. dragonegg_pre_obj_dir = 'dragonegg.obj.pre.%s' % stage @@ -143,7 +162,7 @@ command=['rm', '-rf', dragonegg_pre_obj_dir], haltOnFailure = True, description=['rm build dir', 'dragonegg pre', stage], - workdir='.', env=env)) + workdir='.', env=cur_env)) f.addStep(WarningCountingShellCommand( name = 'compile.dragonegg.pre.%s' % stage, command = ['nice', '-n', '10', @@ -155,7 +174,7 @@ ] + getCCSetting(prev_gcc, prev_gxx), haltOnFailure = True, description=['compile', 'dragonegg pre', stage], - workdir=dragonegg_pre_obj_dir, env=env)) + workdir=dragonegg_pre_obj_dir, env=cur_env)) prev_gcc = '%(builddir)s/'+gcc_install_dir+'/bin/gcc -fplugin=%(builddir)s/'+dragonegg_pre_obj_dir+'/dragonegg.so' prev_gxx = '%(builddir)s/'+gcc_install_dir+'/bin/g++ -fplugin=%(builddir)s/'+dragonegg_pre_obj_dir+'/dragonegg.so' @@ -166,7 +185,7 @@ command=['rm', '-rf', dragonegg_obj_dir], haltOnFailure = True, description=['rm build dir', 'dragonegg', stage], - workdir='.', env=env)) + workdir='.', env=cur_env)) f.addStep(WarningCountingShellCommand( name = 'compile.dragonegg.%s' % stage, command = ['nice', '-n', '10', @@ -179,7 +198,7 @@ ] + getCCSetting(prev_gcc, prev_gxx), haltOnFailure = True, description=['compile', 'dragonegg', stage], - workdir=dragonegg_obj_dir, env=env)) + workdir=dragonegg_obj_dir, env=cur_env)) # Ensure that the following stages use the just built plugin. prev_plugin = '%(builddir)s/'+dragonegg_obj_dir+'/dragonegg.so' @@ -195,6 +214,6 @@ 'done'], haltOnFailure = True, description=['compare', 'stages', '2', 'and', '3'], - workdir='dragonegg.obj.stage3', env=env)) + workdir='dragonegg.obj.stage3', env=cur_env)) return f From baldrick at free.fr Wed Jan 25 07:19:10 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 25 Jan 2012 13:19:10 -0000 Subject: [llvm-commits] [zorg] r148955 - /zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Message-ID: <20120125131910.0E36E2A6C12C@llvm.org> Author: baldrick Date: Wed Jan 25 07:19:09 2012 New Revision: 148955 URL: http://llvm.org/viewvc/llvm-project?rev=148955&view=rev Log: Make sure the path update doesn't fail in stage2 due to the plugin option being considered part of the gcc executable's name. Modified: zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Modified: zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py?rev=148955&r1=148954&r2=148955&view=diff ============================================================================== --- zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py (original) +++ zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Wed Jan 25 07:19:09 2012 @@ -104,9 +104,6 @@ # From this point on build everything using the just built GCC. prev_gcc = '%(builddir)s/'+gcc_install_dir+'/bin/gcc' prev_gxx = '%(builddir)s/'+gcc_install_dir+'/bin/g++' - if prev_plugin is not None: - prev_gcc += ' -fplugin=' + prev_plugin - prev_gxx += ' -fplugin=' + prev_plugin # The built libstdc++ and libgcc may well be more recent than the system # versions. Set the library path so that programs compiled with the just @@ -116,6 +113,7 @@ command=[WithProperties(prev_gcc), '-print-search-dirs'], extract_fn=extractSearchPaths, + haltOnFailure = True, description=['gcc', 'search paths', stage], env=cur_env)) cur_env = cur_env.copy(); @@ -124,6 +122,11 @@ else: cur_env['LD_LIBRARY_PATH'] = WithProperties('%(gcc_libraries)s') + # Build everything using the DragonEgg plugin from the previous stage. + if prev_plugin is not None: + prev_gcc += ' -fplugin=' + prev_plugin + prev_gxx += ' -fplugin=' + prev_plugin + # Build LLVM with the just built GCC and install it. llvm_obj_dir = 'llvm.obj.%s' % stage llvm_install_dir = 'llvm.install' # Name is embedded in object files, so From asl at math.spbu.ru Wed Jan 25 07:42:05 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Wed, 25 Jan 2012 17:42:05 +0400 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> Message-ID: Hi Ana, > In this update: > - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. > - I kept setting .fpu=neon-vfpv4 code attribute because that is what the > assembler understands. Looks ok. > The additional changes mentioned in the email discussions I think belong to > a separate patch: > - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with fast-math > flag. They should definitely not be. They are not less precise! They are "exactly precise" as two separate ops. It's just FMA which has greater precision than usual thanks to 1 rounding. And it's FMA which needs to be associated with -ffast-math on VFPv2 > - VFPv3/VFPv4/NEON/NEON2 associations with FeatureFP16/FeatureD16. Right. But in a separate patch, please. > - Support to set -mfpu=neon2 in clang. Do you want this?? We should be compatible with gcc in this matter. What does it do? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From baldrick at free.fr Wed Jan 25 07:39:35 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 25 Jan 2012 13:39:35 -0000 Subject: [llvm-commits] [www-releases] r148956 - in /www-releases/trunk/3.0: dragonegg-3.0.tgz dragonegg-3.0.tgz.sig Message-ID: <20120125133935.560E92A6C12C@llvm.org> Author: baldrick Date: Wed Jan 25 07:39:35 2012 New Revision: 148956 URL: http://llvm.org/viewvc/llvm-project?rev=148956&view=rev Log: Some web-pages mention(ed) the release as being .tgz rather than .tar.gz. Ensure that people using .tgz can still get the release. Added: www-releases/trunk/3.0/dragonegg-3.0.tgz (with props) www-releases/trunk/3.0/dragonegg-3.0.tgz.sig (with props) Added: www-releases/trunk/3.0/dragonegg-3.0.tgz URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/3.0/dragonegg-3.0.tgz?rev=148956&view=auto ============================================================================== --- www-releases/trunk/3.0/dragonegg-3.0.tgz (added) +++ www-releases/trunk/3.0/dragonegg-3.0.tgz Wed Jan 25 07:39:35 2012 @@ -0,0 +1 @@ +link dragonegg-3.0.tar.gz \ No newline at end of file Propchange: www-releases/trunk/3.0/dragonegg-3.0.tgz ------------------------------------------------------------------------------ svn:special = * Added: www-releases/trunk/3.0/dragonegg-3.0.tgz.sig URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/3.0/dragonegg-3.0.tgz.sig?rev=148956&view=auto ============================================================================== --- www-releases/trunk/3.0/dragonegg-3.0.tgz.sig (added) +++ www-releases/trunk/3.0/dragonegg-3.0.tgz.sig Wed Jan 25 07:39:35 2012 @@ -0,0 +1 @@ +link dragonegg-3.0.tar.gz.sig \ No newline at end of file Propchange: www-releases/trunk/3.0/dragonegg-3.0.tgz.sig ------------------------------------------------------------------------------ svn:special = * From anton at korobeynikov.info Wed Jan 25 07:46:03 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Wed, 25 Jan 2012 17:46:03 +0400 Subject: [llvm-commits] [PATCH][Review request] IEEE quad software emulation library call In-Reply-To: <95DD8BA8AA50B14BBFB86A1D541FA3809EAB583D@exchdb03.mips.com> References: <95DD8BA8AA50B14BBFB86A1D541FA3809EAB2E46@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA3809EAB2E71@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA3809EAB542D@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA3809EAB5468@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA3809EAB583D@exchdb03.mips.com> Message-ID: Akira, > Okay, if that doesn't work, I think I will have to legalize f128 during type legalization. > Is the patch fine as it is now? Or does it need further changes? I'm still thinking about better approach. It seems silly to legalize f128 to i128, and try to "recover" from this afterwards. Maybe we can somehow not legalize arguments to libcalls here... ? And use the common codegenerator code to do all necessary CC stuff. Do you have any ideas here? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From hfinkel at anl.gov Wed Jan 25 09:07:44 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Wed, 25 Jan 2012 09:07:44 -0600 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> Message-ID: <1327504064.2489.110.camel@sapling> On Wed, 2012-01-25 at 17:42 +0400, Anton Korobeynikov wrote: > Hi Ana, > > > In this update: > > - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. > > - I kept setting .fpu=neon-vfpv4 code attribute because that is what the > > assembler understands. > Looks ok. > > > The additional changes mentioned in the email discussions I think belong to > > a separate patch: > > - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with fast-math > > flag. > They should definitely not be. They are not less precise! They are > "exactly precise" as two separate ops. It's just FMA which has greater > precision than usual thanks to 1 rounding. > And it's FMA which needs to be associated with -ffast-math on VFPv2 Just to be clear, are you advocating associating this with UnsafeFPMath or with !NoExcessFPPrecision? I think that it should be the latter, as that is what the PPC backend does (and that seems to match the intent of the TargetOptions API authors), but unlike -ffast-math (-enable-unsafe-fp-math), this will cause the patterns to be enabled by default. -Hal > > > - VFPv3/VFPv4/NEON/NEON2 associations with FeatureFP16/FeatureD16. > Right. But in a separate patch, please. > > > - Support to set -mfpu=neon2 in clang. Do you want this?? > We should be compatible with gcc in this matter. What does it do? > -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From asl at math.spbu.ru Wed Jan 25 09:12:40 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Wed, 25 Jan 2012 19:12:40 +0400 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <1327504064.2489.110.camel@sapling> References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> <1327504064.2489.110.camel@sapling> Message-ID: Hal, > Just to be clear, are you advocating associating this with UnsafeFPMath > or with !NoExcessFPPrecision? I think that it should be the latter, as > that is what the PPC backend does (and that seems to match the intent of > the TargetOptions API authors), but unlike -ffast-math > (-enable-unsafe-fp-math), this will cause the patterns to be enabled by > default. In the patch Ana posted FMA patterns were associated with ~NoExcessFPPrecision. This is pretty much fine. But the "usual" VMLA patterns should not be "downgraded" to LessPreciseFPMAD, because it's not true. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From scanon at apple.com Wed Jan 25 09:27:16 2012 From: scanon at apple.com (Stephen Canon) Date: Wed, 25 Jan 2012 10:27:16 -0500 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <1327504064.2489.110.camel@sapling> References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> <1327504064.2489.110.camel@sapling> Message-ID: On Jan 25, 2012, at 10:07 AM, Hal Finkel wrote: > On Wed, 2012-01-25 at 17:42 +0400, Anton Korobeynikov wrote: >> Hi Ana, >> >>> In this update: >>> - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. >>> - I kept setting .fpu=neon-vfpv4 code attribute because that is what the >>> assembler understands. >> Looks ok. >> >>> The additional changes mentioned in the email discussions I think belong to >>> a separate patch: >>> - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with fast-math >>> flag. >> They should definitely not be. They are not less precise! They are >> "exactly precise" as two separate ops. It's just FMA which has greater >> precision than usual thanks to 1 rounding. >> And it's FMA which needs to be associated with -ffast-math on VFPv2 > > Just to be clear, are you advocating associating this with UnsafeFPMath > or with !NoExcessFPPrecision? I think that it should be the latter, as > that is what the PPC backend does (and that seems to match the intent of > the TargetOptions API authors), but unlike -ffast-math > (-enable-unsafe-fp-math), this will cause the patterns to be enabled by > default. Controlling contracting a*b + c to fma(a,b,c) is a thorny issue. Such contractions often give more accurate results, but they can also sabotage certain important calculations. As an example, consider squaring a complex number: double complex z = CMPLX(M_PI, M_PI); double complex w = z*z; Let's call the real and imaginary parts of z x and y, respectively. Then the real part of w is given by: double real_w = x*x - y*y; If evaluated without contraction, x*x and y*y are both rounded to the same value, so the subtraction cancels exactly and produces the correct result. If contraction is used, then we get something like: double real_w = fma(x, x, -y*y); Since no rounding occurs on the intermediate product x*x, the result is not exactly zero, but is instead the low 53 bits of the exact product. This sort of effect can introduce nasty asymmetries into certain calculations. It's fine for them to be enabled by default, but it should be possible to toggle them independent of other numerical controls. !NoExcessFPPrecision is pretty close to the right idea. -ffast-math seems wrong. I should point out that the C standard defines the FP_CONTRACT pragma for exactly this purpose (7.12.2). Off the top of my head, I'm not sure what other languages have to say on the subject. - Steve From james.molloy at arm.com Wed Jan 25 09:38:12 2012 From: james.molloy at arm.com (James Molloy) Date: Wed, 25 Jan 2012 15:38:12 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> <1327504064.2489.110.camel@sapling> Message-ID: <000601ccdb77$58ee38a0$0acaa9e0$@molloy@arm.com> > I should point out that the C standard defines the FP_CONTRACT pragma for exactly this purpose (7.12.2). Off the top of my head, I'm not sure what other languages have to say on the subject. Incidentally we've been looking into how to support that pragma, and there is currently no way to control LLVM's behaviour on FMA on a per-block basis, just per-module. Perhaps an attribute that could be applied to fmuls, fadds and fsubs might be an interesting option? This all depends on just how important this pragma and being "fully standards compliant" in general is. GCC doesn't support it, for example (although defaults to no FMA unless -fast-math is specified). James -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Stephen Canon Sent: 25 January 2012 15:27 To: Hal Finkel Cc: llvm-commits at cs.uiuc.edu; Anton Korobeynikov Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions On Jan 25, 2012, at 10:07 AM, Hal Finkel wrote: > On Wed, 2012-01-25 at 17:42 +0400, Anton Korobeynikov wrote: >> Hi Ana, >> >>> In this update: >>> - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. >>> - I kept setting .fpu=neon-vfpv4 code attribute because that is what the >>> assembler understands. >> Looks ok. >> >>> The additional changes mentioned in the email discussions I think belong to >>> a separate patch: >>> - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with fast-math >>> flag. >> They should definitely not be. They are not less precise! They are >> "exactly precise" as two separate ops. It's just FMA which has greater >> precision than usual thanks to 1 rounding. >> And it's FMA which needs to be associated with -ffast-math on VFPv2 > > Just to be clear, are you advocating associating this with UnsafeFPMath > or with !NoExcessFPPrecision? I think that it should be the latter, as > that is what the PPC backend does (and that seems to match the intent of > the TargetOptions API authors), but unlike -ffast-math > (-enable-unsafe-fp-math), this will cause the patterns to be enabled by > default. Controlling contracting a*b + c to fma(a,b,c) is a thorny issue. Such contractions often give more accurate results, but they can also sabotage certain important calculations. As an example, consider squaring a complex number: double complex z = CMPLX(M_PI, M_PI); double complex w = z*z; Let's call the real and imaginary parts of z x and y, respectively. Then the real part of w is given by: double real_w = x*x - y*y; If evaluated without contraction, x*x and y*y are both rounded to the same value, so the subtraction cancels exactly and produces the correct result. If contraction is used, then we get something like: double real_w = fma(x, x, -y*y); Since no rounding occurs on the intermediate product x*x, the result is not exactly zero, but is instead the low 53 bits of the exact product. This sort of effect can introduce nasty asymmetries into certain calculations. It's fine for them to be enabled by default, but it should be possible to toggle them independent of other numerical controls. !NoExcessFPPrecision is pretty close to the right idea. -ffast-math seems wrong. I should point out that the C standard defines the FP_CONTRACT pragma for exactly this purpose (7.12.2). Off the top of my head, I'm not sure what other languages have to say on the subject. - Steve _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From hfinkel at anl.gov Wed Jan 25 10:17:59 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Wed, 25 Jan 2012 10:17:59 -0600 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <000601ccdb77$58ee38a0$0acaa9e0$@molloy@arm.com> References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> <1327504064.2489.110.camel@sapling> <000601ccdb77$58ee38a0$0acaa9e0$@molloy@arm.com> Message-ID: <1327508279.2489.167.camel@sapling> On Wed, 2012-01-25 at 15:38 +0000, James Molloy wrote: > > I should point out that the C standard defines the FP_CONTRACT pragma for > exactly this purpose (7.12.2). This is an excellent point. > Off the top of my head, I'm not sure what > other languages have to say on the subject. > > Incidentally we've been looking into how to support that pragma, and there > is currently no way to control LLVM's behaviour on FMA on a per-block basis, > just per-module. Great! > > Perhaps an attribute that could be applied to fmuls, fadds and fsubs might > be an interesting option? Do you mean something like the nsw/nuw flags on add, etc. that means "don't compute me at excess precision"? > > This all depends on just how important this pragma and being "fully > standards compliant" in general is. GCC doesn't support it, for example > (although defaults to no FMA unless -fast-math is specified). GCC's lack of support for this is probably not something we should aim to emulate ;) -Hal > > James > > -----Original Message----- > From: llvm-commits-bounces at cs.uiuc.edu > [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Stephen Canon > Sent: 25 January 2012 15:27 > To: Hal Finkel > Cc: llvm-commits at cs.uiuc.edu; Anton Korobeynikov > Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply > add/subtract instructions > > On Jan 25, 2012, at 10:07 AM, Hal Finkel wrote: > > > On Wed, 2012-01-25 at 17:42 +0400, Anton Korobeynikov wrote: > >> Hi Ana, > >> > >>> In this update: > >>> - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. > >>> - I kept setting .fpu=neon-vfpv4 code attribute because that is what the > >>> assembler understands. > >> Looks ok. > >> > >>> The additional changes mentioned in the email discussions I think belong > to > >>> a separate patch: > >>> - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with > fast-math > >>> flag. > >> They should definitely not be. They are not less precise! They are > >> "exactly precise" as two separate ops. It's just FMA which has greater > >> precision than usual thanks to 1 rounding. > >> And it's FMA which needs to be associated with -ffast-math on VFPv2 > > > > Just to be clear, are you advocating associating this with UnsafeFPMath > > or with !NoExcessFPPrecision? I think that it should be the latter, as > > that is what the PPC backend does (and that seems to match the intent of > > the TargetOptions API authors), but unlike -ffast-math > > (-enable-unsafe-fp-math), this will cause the patterns to be enabled by > > default. > > Controlling contracting a*b + c to fma(a,b,c) is a thorny issue. Such > contractions often give more accurate results, but they can also sabotage > certain important calculations. As an example, consider squaring a complex > number: > > double complex z = CMPLX(M_PI, M_PI); > double complex w = z*z; > > Let's call the real and imaginary parts of z x and y, respectively. Then > the real part of w is given by: > > double real_w = x*x - y*y; > > If evaluated without contraction, x*x and y*y are both rounded to the same > value, so the subtraction cancels exactly and produces the correct result. > If contraction is used, then we get something like: > > double real_w = fma(x, x, -y*y); > > Since no rounding occurs on the intermediate product x*x, the result is not > exactly zero, but is instead the low 53 bits of the exact product. This > sort of effect can introduce nasty asymmetries into certain calculations. > It's fine for them to be enabled by default, but it should be possible to > toggle them independent of other numerical controls. !NoExcessFPPrecision > is pretty close to the right idea. -ffast-math seems wrong. > > I should point out that the C standard defines the FP_CONTRACT pragma for > exactly this purpose (7.12.2). Off the top of my head, I'm not sure what > other languages have to say on the subject. > > - Steve > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > > -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From James.Molloy at arm.com Wed Jan 25 10:23:57 2012 From: James.Molloy at arm.com (James Molloy) Date: Wed, 25 Jan 2012 16:23:57 +0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <1327508279.2489.167.camel@sapling> References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> <1327504064.2489.110.camel@sapling> <000601ccdb77$58ee38a0$0acaa9e0$@molloy@arm.com> <1327508279.2489.167.camel@sapling> Message-ID: > > > > Perhaps an attribute that could be applied to fmuls, fadds and fsubs might > > be an interesting option? > > Do you mean something like the nsw/nuw flags on add, etc. that means > "don't compute me at excess precision"? Yes, that's exactly my proposal. I wasn't sure how well received it might be however, given that we like to avoid adding extra attributes if possible. I also didn't think anyone cared enough, but this conversation has changed my mind slightly... -----Original Message----- From: Hal Finkel [mailto:hfinkel at anl.gov] Sent: 25 January 2012 16:18 To: James Molloy Cc: 'Stephen Canon'; llvm-commits at cs.uiuc.edu; Anton Korobeynikov Subject: RE: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions On Wed, 2012-01-25 at 15:38 +0000, James Molloy wrote: > > I should point out that the C standard defines the FP_CONTRACT pragma for > exactly this purpose (7.12.2). This is an excellent point. > Off the top of my head, I'm not sure what > other languages have to say on the subject. > > Incidentally we've been looking into how to support that pragma, and there > is currently no way to control LLVM's behaviour on FMA on a per-block basis, > just per-module. Great! > > Perhaps an attribute that could be applied to fmuls, fadds and fsubs might > be an interesting option? Do you mean something like the nsw/nuw flags on add, etc. that means "don't compute me at excess precision"? > > This all depends on just how important this pragma and being "fully > standards compliant" in general is. GCC doesn't support it, for example > (although defaults to no FMA unless -fast-math is specified). GCC's lack of support for this is probably not something we should aim to emulate ;) -Hal > > James > > -----Original Message----- > From: llvm-commits-bounces at cs.uiuc.edu > [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Stephen Canon > Sent: 25 January 2012 15:27 > To: Hal Finkel > Cc: llvm-commits at cs.uiuc.edu; Anton Korobeynikov > Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply > add/subtract instructions > > On Jan 25, 2012, at 10:07 AM, Hal Finkel wrote: > > > On Wed, 2012-01-25 at 17:42 +0400, Anton Korobeynikov wrote: > >> Hi Ana, > >> > >>> In this update: > >>> - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. > >>> - I kept setting .fpu=neon-vfpv4 code attribute because that is what the > >>> assembler understands. > >> Looks ok. > >> > >>> The additional changes mentioned in the email discussions I think belong > to > >>> a separate patch: > >>> - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with > fast-math > >>> flag. > >> They should definitely not be. They are not less precise! They are > >> "exactly precise" as two separate ops. It's just FMA which has greater > >> precision than usual thanks to 1 rounding. > >> And it's FMA which needs to be associated with -ffast-math on VFPv2 > > > > Just to be clear, are you advocating associating this with UnsafeFPMath > > or with !NoExcessFPPrecision? I think that it should be the latter, as > > that is what the PPC backend does (and that seems to match the intent of > > the TargetOptions API authors), but unlike -ffast-math > > (-enable-unsafe-fp-math), this will cause the patterns to be enabled by > > default. > > Controlling contracting a*b + c to fma(a,b,c) is a thorny issue. Such > contractions often give more accurate results, but they can also sabotage > certain important calculations. As an example, consider squaring a complex > number: > > double complex z = CMPLX(M_PI, M_PI); > double complex w = z*z; > > Let's call the real and imaginary parts of z x and y, respectively. Then > the real part of w is given by: > > double real_w = x*x - y*y; > > If evaluated without contraction, x*x and y*y are both rounded to the same > value, so the subtraction cancels exactly and produces the correct result. > If contraction is used, then we get something like: > > double real_w = fma(x, x, -y*y); > > Since no rounding occurs on the intermediate product x*x, the result is not > exactly zero, but is instead the low 53 bits of the exact product. This > sort of effect can introduce nasty asymmetries into certain calculations. > It's fine for them to be enabled by default, but it should be possible to > toggle them independent of other numerical controls. !NoExcessFPPrecision > is pretty close to the right idea. -ffast-math seems wrong. > > I should point out that the C standard defines the FP_CONTRACT pragma for > exactly this purpose (7.12.2). Off the top of my head, I'm not sure what > other languages have to say on the subject. > > - Steve > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > > -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory -- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you. From hfinkel at anl.gov Wed Jan 25 10:37:36 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Wed, 25 Jan 2012 10:37:36 -0600 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> <1327504064.2489.110.camel@sapling> <000601ccdb77$58ee38a0$0acaa9e0$@molloy@arm.com> <1327508279.2489.167.camel@sapling> Message-ID: <1327509456.2489.172.camel@sapling> On Wed, 2012-01-25 at 16:23 +0000, James Molloy wrote: > > > > > > Perhaps an attribute that could be applied to fmuls, fadds and fsubs might > > > be an interesting option? > > > > Do you mean something like the nsw/nuw flags on add, etc. that means > > "don't compute me at excess precision"? > > Yes, that's exactly my proposal. I wasn't sure how well received it might be however, given that we like to avoid adding extra attributes if possible. I also didn't think anyone cared enough, but this conversation has changed my mind slightly... > FWIW, I agree that this is important. Would you like to ask about this on llvm-dev? -Hal > -----Original Message----- > From: Hal Finkel [mailto:hfinkel at anl.gov] > Sent: 25 January 2012 16:18 > To: James Molloy > Cc: 'Stephen Canon'; llvm-commits at cs.uiuc.edu; Anton Korobeynikov > Subject: RE: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions > > On Wed, 2012-01-25 at 15:38 +0000, James Molloy wrote: > > > I should point out that the C standard defines the FP_CONTRACT pragma for > > exactly this purpose (7.12.2). > > This is an excellent point. > > > Off the top of my head, I'm not sure what > > other languages have to say on the subject. > > > > Incidentally we've been looking into how to support that pragma, and there > > is currently no way to control LLVM's behaviour on FMA on a per-block basis, > > just per-module. > > Great! > > > > > Perhaps an attribute that could be applied to fmuls, fadds and fsubs might > > be an interesting option? > > Do you mean something like the nsw/nuw flags on add, etc. that means > "don't compute me at excess precision"? > > > > > This all depends on just how important this pragma and being "fully > > standards compliant" in general is. GCC doesn't support it, for example > > (although defaults to no FMA unless -fast-math is specified). > > GCC's lack of support for this is probably not something we should aim > to emulate ;) > > -Hal > > > > > James > > > > -----Original Message----- > > From: llvm-commits-bounces at cs.uiuc.edu > > [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Stephen Canon > > Sent: 25 January 2012 15:27 > > To: Hal Finkel > > Cc: llvm-commits at cs.uiuc.edu; Anton Korobeynikov > > Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply > > add/subtract instructions > > > > On Jan 25, 2012, at 10:07 AM, Hal Finkel wrote: > > > > > On Wed, 2012-01-25 at 17:42 +0400, Anton Korobeynikov wrote: > > >> Hi Ana, > > >> > > >>> In this update: > > >>> - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. > > >>> - I kept setting .fpu=neon-vfpv4 code attribute because that is what the > > >>> assembler understands. > > >> Looks ok. > > >> > > >>> The additional changes mentioned in the email discussions I think belong > > to > > >>> a separate patch: > > >>> - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with > > fast-math > > >>> flag. > > >> They should definitely not be. They are not less precise! They are > > >> "exactly precise" as two separate ops. It's just FMA which has greater > > >> precision than usual thanks to 1 rounding. > > >> And it's FMA which needs to be associated with -ffast-math on VFPv2 > > > > > > Just to be clear, are you advocating associating this with UnsafeFPMath > > > or with !NoExcessFPPrecision? I think that it should be the latter, as > > > that is what the PPC backend does (and that seems to match the intent of > > > the TargetOptions API authors), but unlike -ffast-math > > > (-enable-unsafe-fp-math), this will cause the patterns to be enabled by > > > default. > > > > Controlling contracting a*b + c to fma(a,b,c) is a thorny issue. Such > > contractions often give more accurate results, but they can also sabotage > > certain important calculations. As an example, consider squaring a complex > > number: > > > > double complex z = CMPLX(M_PI, M_PI); > > double complex w = z*z; > > > > Let's call the real and imaginary parts of z x and y, respectively. Then > > the real part of w is given by: > > > > double real_w = x*x - y*y; > > > > If evaluated without contraction, x*x and y*y are both rounded to the same > > value, so the subtraction cancels exactly and produces the correct result. > > If contraction is used, then we get something like: > > > > double real_w = fma(x, x, -y*y); > > > > Since no rounding occurs on the intermediate product x*x, the result is not > > exactly zero, but is instead the low 53 bits of the exact product. This > > sort of effect can introduce nasty asymmetries into certain calculations. > > It's fine for them to be enabled by default, but it should be possible to > > toggle them independent of other numerical controls. !NoExcessFPPrecision > > is pretty close to the right idea. -ffast-math seems wrong. > > > > I should point out that the C standard defines the FP_CONTRACT pragma for > > exactly this purpose (7.12.2). Off the top of my head, I'm not sure what > > other languages have to say on the subject. > > > > - Steve > > _______________________________________________ > > llvm-commits mailing list > > llvm-commits at cs.uiuc.edu > > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > > > > > > > > -- > Hal Finkel > Postdoctoral Appointee > Leadership Computing Facility > Argonne National Laboratory > > > > -- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you. -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From james.molloy at arm.com Wed Jan 25 10:40:09 2012 From: james.molloy at arm.com (James Molloy) Date: Wed, 25 Jan 2012 16:40:09 -0000 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: <1327509456.2489.172.camel@sapling> References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> <1327504064.2489.110.camel@sapling> <000601ccdb77$58ee38a0$0acaa9e0$@molloy@arm.com> <1327508279.2489.167.camel@sapling> <1327509456.2489.172.camel@sapling> Message-ID: <000701ccdb80$00c8d3c0$025a7b40$@molloy@arm.com> Yeah why not. I'll formulate a rational proposal and post to llvm-dev in the morning, but this is not a priority for me at the moment unfortunately. -----Original Message----- From: Hal Finkel [mailto:hfinkel at anl.gov] Sent: 25 January 2012 16:38 To: James Molloy Cc: 'Stephen Canon'; llvm-commits at cs.uiuc.edu; Anton Korobeynikov Subject: RE: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions On Wed, 2012-01-25 at 16:23 +0000, James Molloy wrote: > > > > > > Perhaps an attribute that could be applied to fmuls, fadds and fsubs might > > > be an interesting option? > > > > Do you mean something like the nsw/nuw flags on add, etc. that means > > "don't compute me at excess precision"? > > Yes, that's exactly my proposal. I wasn't sure how well received it might be however, given that we like to avoid adding extra attributes if possible. I also didn't think anyone cared enough, but this conversation has changed my mind slightly... > FWIW, I agree that this is important. Would you like to ask about this on llvm-dev? -Hal > -----Original Message----- > From: Hal Finkel [mailto:hfinkel at anl.gov] > Sent: 25 January 2012 16:18 > To: James Molloy > Cc: 'Stephen Canon'; llvm-commits at cs.uiuc.edu; Anton Korobeynikov > Subject: RE: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions > > On Wed, 2012-01-25 at 15:38 +0000, James Molloy wrote: > > > I should point out that the C standard defines the FP_CONTRACT pragma for > > exactly this purpose (7.12.2). > > This is an excellent point. > > > Off the top of my head, I'm not sure what > > other languages have to say on the subject. > > > > Incidentally we've been looking into how to support that pragma, and there > > is currently no way to control LLVM's behaviour on FMA on a per-block basis, > > just per-module. > > Great! > > > > > Perhaps an attribute that could be applied to fmuls, fadds and fsubs might > > be an interesting option? > > Do you mean something like the nsw/nuw flags on add, etc. that means > "don't compute me at excess precision"? > > > > > This all depends on just how important this pragma and being "fully > > standards compliant" in general is. GCC doesn't support it, for example > > (although defaults to no FMA unless -fast-math is specified). > > GCC's lack of support for this is probably not something we should aim > to emulate ;) > > -Hal > > > > > James > > > > -----Original Message----- > > From: llvm-commits-bounces at cs.uiuc.edu > > [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Stephen Canon > > Sent: 25 January 2012 15:27 > > To: Hal Finkel > > Cc: llvm-commits at cs.uiuc.edu; Anton Korobeynikov > > Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply > > add/subtract instructions > > > > On Jan 25, 2012, at 10:07 AM, Hal Finkel wrote: > > > > > On Wed, 2012-01-25 at 17:42 +0400, Anton Korobeynikov wrote: > > >> Hi Ana, > > >> > > >>> In this update: > > >>> - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. > > >>> - I kept setting .fpu=neon-vfpv4 code attribute because that is what the > > >>> assembler understands. > > >> Looks ok. > > >> > > >>> The additional changes mentioned in the email discussions I think belong > > to > > >>> a separate patch: > > >>> - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with > > fast-math > > >>> flag. > > >> They should definitely not be. They are not less precise! They are > > >> "exactly precise" as two separate ops. It's just FMA which has greater > > >> precision than usual thanks to 1 rounding. > > >> And it's FMA which needs to be associated with -ffast-math on VFPv2 > > > > > > Just to be clear, are you advocating associating this with UnsafeFPMath > > > or with !NoExcessFPPrecision? I think that it should be the latter, as > > > that is what the PPC backend does (and that seems to match the intent of > > > the TargetOptions API authors), but unlike -ffast-math > > > (-enable-unsafe-fp-math), this will cause the patterns to be enabled by > > > default. > > > > Controlling contracting a*b + c to fma(a,b,c) is a thorny issue. Such > > contractions often give more accurate results, but they can also sabotage > > certain important calculations. As an example, consider squaring a complex > > number: > > > > double complex z = CMPLX(M_PI, M_PI); > > double complex w = z*z; > > > > Let's call the real and imaginary parts of z x and y, respectively. Then > > the real part of w is given by: > > > > double real_w = x*x - y*y; > > > > If evaluated without contraction, x*x and y*y are both rounded to the same > > value, so the subtraction cancels exactly and produces the correct result. > > If contraction is used, then we get something like: > > > > double real_w = fma(x, x, -y*y); > > > > Since no rounding occurs on the intermediate product x*x, the result is not > > exactly zero, but is instead the low 53 bits of the exact product. This > > sort of effect can introduce nasty asymmetries into certain calculations. > > It's fine for them to be enabled by default, but it should be possible to > > toggle them independent of other numerical controls. !NoExcessFPPrecision > > is pretty close to the right idea. -ffast-math seems wrong. > > > > I should point out that the C standard defines the FP_CONTRACT pragma for > > exactly this purpose (7.12.2). Off the top of my head, I'm not sure what > > other languages have to say on the subject. > > > > - Steve > > _______________________________________________ > > llvm-commits mailing list > > llvm-commits at cs.uiuc.edu > > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > > > > > > > > -- > Hal Finkel > Postdoctoral Appointee > Leadership Computing Facility > Argonne National Laboratory > > > > -- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you. -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From zinob at codeaurora.org Wed Jan 25 11:07:42 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Wed, 25 Jan 2012 09:07:42 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: References: <000001ccda35$239f0e10$6add2a30$@org> Message-ID: <000701ccdb83$d9e14680$8da3d380$@org> Yes Evan, which is why this heuristic is also on when -O3 is used. There are more opportunity if we desire to pursue them (for -Os specifically). E.g., Use callee-save register, do less coalescing, do less re-materialization for large immediate, use flatter weight as Jacob proposed. This heuristic improves overall register allocator for Thumb 2 mode. Thanks, -Zino From: Evan Cheng [mailto:evan.cheng at apple.com] Sent: Tuesday, January 24, 2012 10:54 PM To: Zino Benaissa Cc: llvm-commits at cs.uiuc.edu; rajav at codeaurora.org Subject: Re: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set Can you confirm that this change is not predicated on OptimizeForSize and it's not designed to trade off speed for code size? I'm pretty sure that's what you mean but I want to be sure. Thanks, Evan On Jan 23, 2012, at 5:11 PM, Zino Benaissa wrote: Description: This contribution extends LLVM greedy Register Allocator to optimize for code size when LLVM compiler targets ARM Thumb 2 instruction set. This heuristic favors assigning register R0 through R7 to operands used in instruction that can be encoded in 16 bits (16-bit is allowed only if R0-7 are used). Operands that appear most frequently in a function (and in instructions that qualify) get R0-7 register. This heuristic is turned on by default and has impact on generated code only if -mthumb compiler switch is used. To turn this heuristic off use -disable-favor-r0-7 feature flag. This patch modifies: 1) The LLVM greedy register allocator located in LLVM/CodeGen directory: To add the new code size heuristic. 2) The ARM-specific flies located in LLVM/Target/ARM directory: To add the function that determines which instruction can be encoded in 16-bits and a fix to enable the compiler to emit CMN instruction in 16-bits encoding. 3) The LLVM test suite: fix test/CodeGen/Thumb2/thumb2-cmn.ll test. Performance impact: I focused on -Os and -mthumb flags. But observed similar improvement with -O3 and -mthumb. Runtime measured on Qualcomm 8660. Code size: - SPEC2000 benchmarks between 0 to 0.6% code size reduction (with no noticeable regression). - EEMBC benchmarks between 0 to 6% reduction (no noticeable regression). Automotive and Networking average about 1% code size reduction and Consumer about 0.5%. Runtime: - SPEC2000 between -1% and 6% speed up (Spec2k/ammp 6%) - EEMBC overall averages faster -1 to 5%. Modified: test/CodeGen/Thumb2/thumb2-cmn.ll include/llvm/Target/TargetInstrInfo.h include/llvm/CodeGen/LiveInterval.h lib/Target/ARM/Thumb2SizeReduction.cpp lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMBaseInstrInfo.h lib/CodeGen/RegAllocGreedy.cpp lib/CodeGen/CalcSpillWeights.cpp for details see RACodeSize.txt Testing: See ARMTestSuiteResult.txt and ARMSimple-Os-mthumb.txt Note -O3 is also completed on X86 and ARM CPUs ___________ ____________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/ba54ea22/attachment.html From nicholas at mxc.ca Wed Jan 25 12:54:13 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 25 Jan 2012 18:54:13 -0000 Subject: [llvm-commits] [llvm] r148964 - /llvm/trunk/lib/Analysis/InlineCost.cpp Message-ID: <20120125185413.9165D2A6C12C@llvm.org> Author: nicholas Date: Wed Jan 25 12:54:13 2012 New Revision: 148964 URL: http://llvm.org/viewvc/llvm-project?rev=148964&view=rev Log: Use precomputed BB size instead of BB->size(). Modified: llvm/trunk/lib/Analysis/InlineCost.cpp Modified: llvm/trunk/lib/Analysis/InlineCost.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=148964&r1=148963&r2=148964&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp (original) +++ llvm/trunk/lib/Analysis/InlineCost.cpp Wed Jan 25 12:54:13 2012 @@ -331,7 +331,7 @@ if (BranchInst *BI = dyn_cast(I)) { BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); if (BB->getSinglePredecessor()) - Reduction += InlineConstants::InstrCost * BB->size(); + Reduction += InlineConstants::InstrCost * NumBBInsts[BB]; } } } while (!Worklist.empty()); From nicholas at mxc.ca Wed Jan 25 12:58:15 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 25 Jan 2012 10:58:15 -0800 Subject: [llvm-commits] [llvm] r148941 - in /llvm/trunk: lib/Analysis/InlineCost.cpp test/Transforms/Inline/alloca-bonus.ll In-Reply-To: References: <20120125082740.C7BB02A6C12C@llvm.org> Message-ID: <4F2050C7.1030009@mxc.ca> Eli Friedman wrote: > On Wed, Jan 25, 2012 at 12:27 AM, Nick Lewycky wrote: >> Author: nicholas >> Date: Wed Jan 25 02:27:40 2012 >> New Revision: 148941 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148941&view=rev >> Log: >> Support pointer comparisons against constants, when looking at the inline-cost >> savings from a pointer argument becoming an alloca. Sometimes callees will even >> compare a pointer to null and then branch to an otherwise unreachable block! >> Detect these cases and compute the number of saved instructions, instead of >> bailing out and reporting no savings. > > [Comments inlined.] > >> + } else if (ICmpInst *ICI = dyn_cast(I)) { >> + if (!isa(ICI->getOperand(1))) >> + return 0; >> + ICmpInsts.push_back(ICI); > > You probably want to restrict this to equality comparisons; I don't > think we fold relational comparisons between an alloca and a constant > in general. We do. This fits in with the rule that you aren't allowed to guess an alloca. >> + if (BranchInst *BI = dyn_cast(I)) { >> + BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); >> + if (BB->getSinglePredecessor()) >> + Reduction += InlineConstants::InstrCost * BB->size(); > > Shouldn't you use NumBBInsts[BB] rather than BB->size()? Good catch! Fixed in r148964. Nick From marina.yatsina at intel.com Wed Jan 25 05:56:48 2012 From: marina.yatsina at intel.com (Yatsina, Marina) Date: Wed, 25 Jan 2012 11:56:48 +0000 Subject: [llvm-commits] [llvm] r134741 - in /llvm/trunk/lib/Target/X86: MCTargetDesc/X86MCTargetDesc.cpp X86Subtarget.cpp References: <20110708211415.2D93E2A6C12C@llvm.org> <7DE70FDACDE4CD4887C4278C12A2E30506DE39@HASMSX104.ger.corp.intel.com> Message-ID: Hi, I have found a bug introduced by commit 134741. The commit added use of macros that are not defined on Windows and they are causing X86Subtarget to choose "generic" as the CPUName. I've opened Bug #11834 on the problem: http://www.llvm.org/bugs/show_bug.cgi?id=11834 I've also attached a fix to this mail and to the bug opened in bugzilla. Thank you, Marina. -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Evan Cheng Sent: Saturday, July 09, 2011 00:14 To: llvm-commits at cs.uiuc.edu Subject: [llvm-commits] [llvm] r134741 - in /llvm/trunk/lib/Target/X86: MCTargetDesc/X86MCTargetDesc.cpp X86Subtarget.cpp Author: evancheng Date: Fri Jul 8 16:14:14 2011 New Revision: 134741 URL: http://llvm.org/viewvc/llvm-project?rev=134741&view=rev Log: For non-x86 host, used generic as CPU name. Modified: llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp llvm/trunk/lib/Target/X86/X86Subtarget.cpp Modified: llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp?rev=134741&r1=134740&r2=134741&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp (original) +++ llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp Fri Jul 8 16:14:14 2011 @@ -140,8 +140,13 @@ } std::string CPUName = CPU; - if (CPUName.empty()) + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } if (ArchFS.empty() && CPUName.empty() && hasX86_64()) // Auto-detect if host is 64-bit capable, it's the default if true. Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=134741&r1=134740&r2=134741&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original) +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Fri Jul 8 16:14:14 2011 @@ -258,12 +258,17 @@ ArchFS = FS; } - std::string CPUName = CPU; - if (CPUName.empty()) - CPUName = sys::getHostCPUName(); - // Determine default and user specified characteristics - if (!CPUName.empty() || !ArchFS.empty()) { + if (!ArchFS.empty()) { + std::string CPUName = CPU; + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } + // If feature string is not empty, parse features string. ParseSubtargetFeatures(CPUName, ArchFS); // All X86-64 CPUs also have SSE2, however user might request no SSE via _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. -------------- next part -------------- A non-text attachment was scrubbed... Name: ChooseCorrectCPUName.patch Type: application/octet-stream Size: 1218 bytes Desc: ChooseCorrectCPUName.patch Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/a632d485/attachment.obj From eli.friedman at gmail.com Wed Jan 25 13:24:05 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Wed, 25 Jan 2012 11:24:05 -0800 Subject: [llvm-commits] [llvm] r148941 - in /llvm/trunk: lib/Analysis/InlineCost.cpp test/Transforms/Inline/alloca-bonus.ll In-Reply-To: <4F2050C7.1030009@mxc.ca> References: <20120125082740.C7BB02A6C12C@llvm.org> <4F2050C7.1030009@mxc.ca> Message-ID: On Wed, Jan 25, 2012 at 10:58 AM, Nick Lewycky wrote: > Eli Friedman wrote: >> >> On Wed, Jan 25, 2012 at 12:27 AM, Nick Lewycky ?wrote: >>> >>> Author: nicholas >>> Date: Wed Jan 25 02:27:40 2012 >>> New Revision: 148941 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=148941&view=rev >>> Log: >>> Support pointer comparisons against constants, when looking at the >>> inline-cost >>> savings from a pointer argument becoming an alloca. Sometimes callees >>> will even >>> compare a pointer to null and then branch to an otherwise unreachable >>> block! >>> Detect these cases and compute the number of saved instructions, instead >>> of >>> bailing out and reporting no savings. >> >> >> [Comments inlined.] >> >>> + ? ? ?} else if (ICmpInst *ICI = dyn_cast(I)) { >>> + ? ? ? ?if (!isa(ICI->getOperand(1))) >>> + ? ? ? ? ?return 0; >>> + ? ? ? ?ICmpInsts.push_back(ICI); >> >> >> You probably want to restrict this to equality comparisons; I don't >> think we fold relational comparisons between an alloca and a constant >> in general. > > > We do. This fits in with the rule that you aren't allowed to guess an > alloca. Is that rule documented somewhere? -Eli From eli.friedman at gmail.com Wed Jan 25 13:32:35 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Wed, 25 Jan 2012 11:32:35 -0800 Subject: [llvm-commits] Bug fix in double-to-int conversions codegen in AVX: please review In-Reply-To: References: Message-ID: On Wed, Jan 25, 2012 at 2:19 AM, Umansky, Victor wrote: > Hi > > The attached patch file includes a fix for the following bug in AVX codegen > for double-to-int conversions: > > "fptosi" and "fptoui" IR instructions are defined with round-to-zero > rounding mode. > Currently for AVX mode for <4xdouble> and <8xdouble>? the "VCVTPD2DQ.128" > and "VCVTPD2DQ.256" instructions are selected (for ?fp_to_sint? DAG node > operation ) by AVX codegen. However they use round-to-nearest-even rounding > mode. > Consequently, the conversion produces incorrect numbers. > > > The fix is to replace selection of VCVTPD2DQ instructions with VCVTTPD2DQ > instructions. The latter use truncate (i.e. round-to-zero) rounding mode. > As ?fp_to_sint? DAG node operation is used only for lowering of? "fptosi" > and "fptoui" IR instructions, the fix in X86InstrSSE.td definition file > doesn?t have an impact on other LLVM flows. > > The patch includes changes in the .td file, LIT test for the changes and a > fix in a legacy LIT test (which produced asm code conflicting with LLVN IR > spec). > > I?d like to commit the fix to the LLVM trunk, and your feedback will be > mostly appreciated. --- test/CodeGen/X86/avx-fp2int.ll (revision 0) +++ test/CodeGen/X86/avx-fp2int.ll (revision 0) @@ -0,0 +1,19 @@ +;; Check that FP_TO_SINT and FP_TO_INT generate convert with truncate + +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s I assume you mean FP_TO_UINT? Also, please make the RUN-line the first line of the file. Otherwise, patch looks good; please commit. -Eli From grosbach at apple.com Wed Jan 25 13:52:01 2012 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 25 Jan 2012 19:52:01 -0000 Subject: [llvm-commits] [llvm] r148969 - in /llvm/trunk: lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/arm-it-block.s Message-ID: <20120125195201.A45BF2A6C12C@llvm.org> Author: grosbach Date: Wed Jan 25 13:52:01 2012 New Revision: 148969 URL: http://llvm.org/viewvc/llvm-project?rev=148969&view=rev Log: ARM assemly parsing and validation of IT instruction. "Although a Thumb2 instruction, the IT mnemonic shall be permitted in ARM mode, and the condition verified to match the condition code(s) on the following instruction(s)." PR11853 Added: llvm/trunk/test/MC/ARM/arm-it-block.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=148969&r1=148968&r2=148969&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Wed Jan 25 13:52:01 2012 @@ -5179,3 +5179,7 @@ // "neg" is and alias for "rsb rd, rn, #0" def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// 'it' blocks in ARM mode just validate the predicates. The IT itself +// is discarded. +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=148969&r1=148968&r2=148969&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Wed Jan 25 13:52:01 2012 @@ -5080,10 +5080,11 @@ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); // Check the IT block state first. - // NOTE: In Thumb mode, the BKPT instruction has the interesting property of - // being allowed in IT blocks, but not being predicable. It just always + // NOTE: BKPT instruction has the interesting property of being + // allowed in IT blocks, but not being predicable. It just always // executes. - if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) { + if (inITBlock() && Inst.getOpcode() != ARM::tBKPT && + Inst.getOpcode() != ARM::BKPT) { unsigned bit = 1; if (ITState.FirstCond) ITState.FirstCond = false; @@ -7048,6 +7049,7 @@ } return false; } + case ARM::ITasm: case ARM::t2IT: { // The mask bits for all but the first condition are represented as // the low bit of the condition code value implies 't'. We currently @@ -7154,6 +7156,11 @@ // block. forwardITPosition(); + // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and + // doesn't actually encode. + if (Inst.getOpcode() == ARM::ITasm) + return false; + Out.EmitInstruction(Inst); return false; case Match_MissingFeature: Added: llvm/trunk/test/MC/ARM/arm-it-block.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/arm-it-block.s?rev=148969&view=auto ============================================================================== --- llvm/trunk/test/MC/ARM/arm-it-block.s (added) +++ llvm/trunk/test/MC/ARM/arm-it-block.s Wed Jan 25 13:52:01 2012 @@ -0,0 +1,11 @@ +@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s + .syntax unified + .globl _func + +_func: +@ CHECK: _func: + it eq + moveq r2, r3 +@ 'it' is parsed but not encoded. +@ CHECK-NOT: it +@ CHECK: moveq r2, r3 @ encoding: [0x03,0x20,0xa0,0x01] From dblaikie at gmail.com Wed Jan 25 13:57:38 2012 From: dblaikie at gmail.com (David Blaikie) Date: Wed, 25 Jan 2012 11:57:38 -0800 Subject: [llvm-commits] Twine/StringRef enhancements & usage In-Reply-To: References: Message-ID: Bumping just to get this update off my todo list. Applying this approach (passing Twine by default, using "TwineString" whenever a StringRef is required internally (to simplify the stringification of the twine) & avoiding it when we already have a std::string that needs to be populated, etc) has some issues: 1) inherently this would increase stack usage across LLVM - having to allocate the SmallString inside TwineString for every passed string. This might be a non-trivial tradeoff if we applied this universally (could be a counterargument for the "we should have a uniform way to pass strings rather than having callees have to make an informed decision about whether their callers are likely to pass concatenated expressions" - which is a pity. It'd be nice if we had a reliable, consistent approach) 2) currently (see the small clang patch for an example) since Twine is implicitly constructed from a StringRef reference and SmallString has an implicit conversion to StringRef - you can't just pass a SmallString where a Twine is required (it would require two user-defined conversions). We could flesh out Twine a bit to account for this (& the various other things StringRef can implicitly convert from) 3) I've added an extra constructor template to StringRef for arrays - so that we don't have to use strlen on all those string literals that get converted to StringRef. It has an assert (to catch the case where you pass a big buffer array in, not a simple string literal) which is no more costly than the original constructor such code would've called in the worst case of an asserts build anyway. Anyway - just some ideas. On Fri, Sep 2, 2011 at 12:38 AM, David Blaikie wrote: > [+Chris, since he seemed to take an interest in the design discussion > surrounding this originally] > > Bump/ping/etc. > > This has been around for a few weeks now. Is this the sort of stuff > that I'd be better off getting getting commit access for & just having > it post-commit reviewed? > > - David > > On Wed, Aug 24, 2011 at 9:40 PM, David Blaikie wrote: >> >> >> On Wed, Aug 24, 2011 at 3:38 PM, Jordy Rose wrote: >>> >>> A couple comments, though this certainly isn't an area of the code I'm >>> that familiar with. >> >> Thanks for looking >> >>> >>> - Instead of appendTo(string&) and assignTo(string&), why not just add >>> operator+=(string&, const Twine&) and operator=(string&, const Twine&) ? >>> Seems more C++ to me. >> >> You're right about appendTo, op+=(std::string&, const Twine&) could just be >> a friend function of Twine. I'll make that change. >> Unfortunately op=(std::string&, const Twine&) can't be done because op= must >> be a non-static member function (of std::string). This is why I hadn't done >> appendTo as op+= too - I'd assumed it had the same restriction, though that >> doesn't appear to be the case. >> >>> >>> - Re: toNullTerminatedStringRef: A StringRef created from a >>> null-terminated C string drops the null terminator, so you can't just "test >>> the last character" to see if it's a null. In fact, having the last >>> character of a StringRef be null is probably a bug. (Of course, you can't >>> test /past/ the last character either, because one byte past valid memory is >>> guaranteed to be a valid address but not guaranteed to be dereferenceable.) >> >> Agreed - pity, though. [I wonder if we could squeeze in a bit (the high bit >> of the length?) somewhere to store "is this null terminated" - it seems a >> pity to lose that so often/so easily when going into the StringRef domain] >> >>> >>> - You've got several copies of SafeBool.h in the file. I'm guessing this >>> is the result of reverting and then reapplying patches. (I do this all the >>> time too.) >> >> Hrm, thanks for that. I'll make a fresh diff. I've done that & manually >> inspected the diff file & I only see SafeBool.h listed once now. >> >>> >>> - TwineString definitely seems evil, but I haven't really thought about it >>> hard enough to give a good reason why. >> >> Oh, it is rather evil, just a moderately quick & dirty, but not utterly >> broken, solution. The most concrete reason I can come up with is that it >> muddies StringRef's semantics, mostly - TwineString is a StringRef, but it >> doesn't at all have the semantics of a StringRef, in fact it has the >> semantics of a string (mostly... some of the time... if it's not just >> actually a StringRef) >> To quote Chris from a previous email where this was discussed: >> >> "While it is kinda gross, a subclass of StringRef is probably the lowest >> friction path to do this." - >> http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-July/041804.html >> Not to say that I'm not open to other ideas... >> >>> >>> I don't get why you're using a SmallVectorImpl instead of a SmallVector or >>> SmallString, though. >> >> I think I just used SmallVectorImpl because it's the type that toStringRef >> required - but SmallString would make more sense if it's got no additional >> overhead/quirks (or SmallVector, presumably it really doesn't have extra >> overhead). >> Settled on SmallString. >> - David >> -------------- next part -------------- A non-text attachment was scrubbed... Name: stringref_twine.diff Type: application/octet-stream Size: 388 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/46427ce8/attachment.obj -------------- next part -------------- A non-text attachment was scrubbed... Name: stringref_twine_clang.diff Type: application/octet-stream Size: 666 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/46427ce8/attachment-0001.obj From dblaikie at gmail.com Wed Jan 25 14:05:03 2012 From: dblaikie at gmail.com (David Blaikie) Date: Wed, 25 Jan 2012 12:05:03 -0800 Subject: [llvm-commits] Twine/StringRef enhancements & usage In-Reply-To: References: Message-ID: Now with real patches. On Wed, Jan 25, 2012 at 11:57 AM, David Blaikie wrote: > Bumping just to get this update off my todo list. > > Applying this approach (passing Twine by default, using "TwineString" > whenever a StringRef is required internally (to simplify the > stringification of the twine) & avoiding it when we already have a > std::string that needs to be populated, etc) has some issues: > > 1) inherently this would increase stack usage across LLVM - having to > allocate the SmallString inside TwineString for every passed string. > This might be a non-trivial tradeoff if we applied this universally > (could be a counterargument for the "we should have a uniform way to > pass strings rather than having callees have to make an informed > decision about whether their callers are likely to pass concatenated > expressions" - which is a pity. It'd be nice if we had a reliable, > consistent approach) > > 2) currently (see the small clang patch for an example) since Twine is > implicitly constructed from a StringRef reference and SmallString has > an implicit conversion to StringRef - you can't just pass a > SmallString where a Twine is required (it would require two > user-defined conversions). We could flesh out Twine a bit to account > for this (& the various other things StringRef can implicitly convert > from) > > 3) I've added an extra constructor template to StringRef for arrays - > so that we don't have to use strlen on all those string literals that > get converted to StringRef. It has an assert (to catch the case where > you pass a big buffer array in, not a simple string literal) which is > no more costly than the original constructor such code would've called > in the worst case of an asserts build anyway. > > Anyway - just some ideas. > > On Fri, Sep 2, 2011 at 12:38 AM, David Blaikie wrote: >> [+Chris, since he seemed to take an interest in the design discussion >> surrounding this originally] >> >> Bump/ping/etc. >> >> This has been around for a few weeks now. Is this the sort of stuff >> that I'd be better off getting getting commit access for & just having >> it post-commit reviewed? >> >> - David >> >> On Wed, Aug 24, 2011 at 9:40 PM, David Blaikie wrote: >>> >>> >>> On Wed, Aug 24, 2011 at 3:38 PM, Jordy Rose wrote: >>>> >>>> A couple comments, though this certainly isn't an area of the code I'm >>>> that familiar with. >>> >>> Thanks for looking >>> >>>> >>>> - Instead of appendTo(string&) and assignTo(string&), why not just add >>>> operator+=(string&, const Twine&) and operator=(string&, const Twine&) ? >>>> Seems more C++ to me. >>> >>> You're right about appendTo, op+=(std::string&, const Twine&) could just be >>> a friend function of Twine. I'll make that change. >>> Unfortunately op=(std::string&, const Twine&) can't be done because op= must >>> be a non-static member function (of std::string). This is why I hadn't done >>> appendTo as op+= too - I'd assumed it had the same restriction, though that >>> doesn't appear to be the case. >>> >>>> >>>> - Re: toNullTerminatedStringRef: A StringRef created from a >>>> null-terminated C string drops the null terminator, so you can't just "test >>>> the last character" to see if it's a null. In fact, having the last >>>> character of a StringRef be null is probably a bug. (Of course, you can't >>>> test /past/ the last character either, because one byte past valid memory is >>>> guaranteed to be a valid address but not guaranteed to be dereferenceable.) >>> >>> Agreed - pity, though. [I wonder if we could squeeze in a bit (the high bit >>> of the length?) somewhere to store "is this null terminated" - it seems a >>> pity to lose that so often/so easily when going into the StringRef domain] >>> >>>> >>>> - You've got several copies of SafeBool.h in the file. I'm guessing this >>>> is the result of reverting and then reapplying patches. (I do this all the >>>> time too.) >>> >>> Hrm, thanks for that. I'll make a fresh diff. I've done that & manually >>> inspected the diff file & I only see SafeBool.h listed once now. >>> >>>> >>>> - TwineString definitely seems evil, but I haven't really thought about it >>>> hard enough to give a good reason why. >>> >>> Oh, it is rather evil, just a moderately quick & dirty, but not utterly >>> broken, solution. The most concrete reason I can come up with is that it >>> muddies StringRef's semantics, mostly - TwineString is a StringRef, but it >>> doesn't at all have the semantics of a StringRef, in fact it has the >>> semantics of a string (mostly... some of the time... if it's not just >>> actually a StringRef) >>> To quote Chris from a previous email where this was discussed: >>> >>> "While it is kinda gross, a subclass of StringRef is probably the lowest >>> friction path to do this." - >>> http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-July/041804.html >>> Not to say that I'm not open to other ideas... >>> >>>> >>>> I don't get why you're using a SmallVectorImpl instead of a SmallVector or >>>> SmallString, though. >>> >>> I think I just used SmallVectorImpl because it's the type that toStringRef >>> required - but SmallString would make more sense if it's got no additional >>> overhead/quirks (or SmallVector, presumably it really doesn't have extra >>> overhead). >>> Settled on SmallString. >>> - David >>> -------------- next part -------------- A non-text attachment was scrubbed... Name: stringref_twine.diff Type: application/octet-stream Size: 34500 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/d47f51ea/attachment.obj -------------- next part -------------- A non-text attachment was scrubbed... Name: stringref_twine_clang.diff Type: application/octet-stream Size: 659 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/d47f51ea/attachment-0001.obj From lhames at gmail.com Wed Jan 25 14:34:29 2012 From: lhames at gmail.com (Lang Hames) Date: Wed, 25 Jan 2012 12:34:29 -0800 Subject: [llvm-commits] [llvm] r148408 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp In-Reply-To: <36787BD6-3935-408F-8D99-87B8D06A1F48@2pi.dk> References: <20120118194831.B1E472A6C12C@llvm.org> <520A0357-4C35-4667-8B34-3F832750CE41@apple.com> <36787BD6-3935-408F-8D99-87B8D06A1F48@2pi.dk> Message-ID: Oops - this slipped through the cracks. Sorry about the late reply. Jakob nailed it (mostly) - you'd only want to continue the loop to trigger the assert. The change is correct though. Beware the double negative: The original, "#ifndef NDEBUG", reads as "if not not debugging", so the break only went in in debug mode. :) - Lang. On Wed, Jan 18, 2012 at 5:46 PM, Jakob Stoklund Olesen wrote: > > On Jan 18, 2012, at 2:08 PM, Bill Wendling wrote: > > > On Jan 18, 2012, at 11:48 AM, Lang Hames wrote: > > > >> Author: lhames > >> Date: Wed Jan 18 13:48:31 2012 > >> New Revision: 148408 > >> > >> URL: http://llvm.org/viewvc/llvm-project?rev=148408&view=rev > >> Log: > >> Fixed macro condition. > >> > >> Modified: > >> llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > >> > >> Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > >> URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=148408&r1=148407&r2=148408&view=diff > >> > ============================================================================== > >> --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) > >> +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Jan 18 13:48:31 > 2012 > >> @@ -823,7 +823,7 @@ > >> assert(!RegOp && > >> "Can't rematerialize instruction with multiple register > operand!"); > >> RegOp = MO.getReg(); > >> -#ifndef NDEBUG > >> +#ifdef NDEBUG > >> break; > >> #endif > >> } > >> > > What's the purpose of having this 'break' statement only in release > mode? That would appear to cause possible different behavior between > release and debug modes... > > The only purpose of continuing the loop would be to fire the assertion > above, so I actually think Lang's change is wrong. > > It's pretty gross, particularly without a comment. > > /jakob > > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/4e13c7bb/attachment-0001.html From mcrosier at apple.com Wed Jan 25 14:57:36 2012 From: mcrosier at apple.com (Chad Rosier) Date: Wed, 25 Jan 2012 12:57:36 -0800 Subject: [llvm-commits] [llvm] r148408 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp In-Reply-To: References: <20120118194831.B1E472A6C12C@llvm.org> <520A0357-4C35-4667-8B34-3F832750CE41@apple.com> <36787BD6-3935-408F-8D99-87B8D06A1F48@2pi.dk> Message-ID: <0C19C77E-979A-44AD-8979-985AC90F5181@apple.com> Lang, I believe the change is correct. Per Jakob's comment, would you mind adding a comment to the source? Chad On Jan 25, 2012, at 12:34 PM, Lang Hames wrote: > Oops - this slipped through the cracks. Sorry about the late reply. > > Jakob nailed it (mostly) - you'd only want to continue the loop to trigger the assert. > > The change is correct though. Beware the double negative: The original, "#ifndef NDEBUG", reads as "if not not debugging", so the break only went in in debug mode. :) > > - Lang. > > On Wed, Jan 18, 2012 at 5:46 PM, Jakob Stoklund Olesen wrote: > > On Jan 18, 2012, at 2:08 PM, Bill Wendling wrote: > > > On Jan 18, 2012, at 11:48 AM, Lang Hames wrote: > > > >> Author: lhames > >> Date: Wed Jan 18 13:48:31 2012 > >> New Revision: 148408 > >> > >> URL: http://llvm.org/viewvc/llvm-project?rev=148408&view=rev > >> Log: > >> Fixed macro condition. > >> > >> Modified: > >> llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > >> > >> Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=148408&r1=148407&r2=148408&view=diff > >> ============================================================================== > >> --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) > >> +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Jan 18 13:48:31 2012 > >> @@ -823,7 +823,7 @@ > >> assert(!RegOp && > >> "Can't rematerialize instruction with multiple register operand!"); > >> RegOp = MO.getReg(); > >> -#ifndef NDEBUG > >> +#ifdef NDEBUG > >> break; > >> #endif > >> } > >> > > What's the purpose of having this 'break' statement only in release mode? That would appear to cause possible different behavior between release and debug modes... > > The only purpose of continuing the loop would be to fire the assertion above, so I actually think Lang's change is wrong. > > It's pretty gross, particularly without a comment. > > /jakob > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/f58cba53/attachment.html From baldrick at free.fr Wed Jan 25 15:16:45 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 25 Jan 2012 21:16:45 -0000 Subject: [llvm-commits] [dragonegg] r148980 - /dragonegg/trunk/src/Backend.cpp Message-ID: <20120125211645.72D992A6C12C@llvm.org> Author: baldrick Date: Wed Jan 25 15:16:45 2012 New Revision: 148980 URL: http://llvm.org/viewvc/llvm-project?rev=148980&view=rev Log: On mingw, gcc decorates function names with @8 etc, then LLVM codegen does it again: @8 at 8. Tell gcc not to decorate and just let LLVM do it. Modified: dragonegg/trunk/src/Backend.cpp Modified: dragonegg/trunk/src/Backend.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/Backend.cpp?rev=148980&r1=148979&r2=148980&view=diff ============================================================================== --- dragonegg/trunk/src/Backend.cpp (original) +++ dragonegg/trunk/src/Backend.cpp Wed Jan 25 15:16:45 2012 @@ -85,6 +85,9 @@ #error Unsupported GCC major version #endif +// TODO: In GCC, add targhooks.h to the list of plugin headers and remove this. +extern "C" tree default_mangle_decl_assembler_name (tree, tree); + // Non-zero if libcalls should not be simplified. int flag_no_simplify_libcalls; @@ -1406,6 +1409,10 @@ // Ensure that thunks are turned into functions rather than output directly // as assembler. targetm.asm_out.can_output_mi_thunk = no_target_thunks; + + // Ensure that GCC doesn't decorate stdcall and fastcall function names: + // LLVM codegen takes care of this, and we don't want them decorated twice. + targetm.mangle_decl_assembler_name = default_mangle_decl_assembler_name; } From lhames at gmail.com Wed Jan 25 15:53:24 2012 From: lhames at gmail.com (Lang Hames) Date: Wed, 25 Jan 2012 21:53:24 -0000 Subject: [llvm-commits] [llvm] r148984 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Message-ID: <20120125215324.4238E2A6C12C@llvm.org> Author: lhames Date: Wed Jan 25 15:53:23 2012 New Revision: 148984 URL: http://llvm.org/viewvc/llvm-project?rev=148984&view=rev Log: Always break upon finding a vreg operand (in Release as well as +Asserts). Remove assertion which can no longer trigger. Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=148984&r1=148983&r2=148984&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Jan 25 15:53:23 2012 @@ -819,13 +819,8 @@ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !allocatableRegs_[Reg]) continue; - // FIXME: For now, only remat MI with at most one register operand. - assert(!RegOp && - "Can't rematerialize instruction with multiple register operand!"); RegOp = MO.getReg(); -#ifdef NDEBUG - break; -#endif + break; // Found vreg operand - leave the loop. } return RegOp; } From lhames at gmail.com Wed Jan 25 15:58:00 2012 From: lhames at gmail.com (Lang Hames) Date: Wed, 25 Jan 2012 13:58:00 -0800 Subject: [llvm-commits] [llvm] r148408 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp In-Reply-To: <0C19C77E-979A-44AD-8979-985AC90F5181@apple.com> References: <20120118194831.B1E472A6C12C@llvm.org> <520A0357-4C35-4667-8B34-3F832750CE41@apple.com> <36787BD6-3935-408F-8D99-87B8D06A1F48@2pi.dk> <0C19C77E-979A-44AD-8979-985AC90F5181@apple.com> Message-ID: On Jakob's suggestion I've made the break unconditional and removed the assert in r148984. - Lang. On Wed, Jan 25, 2012 at 12:57 PM, Chad Rosier wrote: > Lang, > I believe the change is correct. Per Jakob's comment, would you mind > adding a comment to the source? > > Chad > > On Jan 25, 2012, at 12:34 PM, Lang Hames wrote: > > Oops - this slipped through the cracks. Sorry about the late reply. > > Jakob nailed it (mostly) - you'd only want to continue the loop to trigger > the assert. > > The change is correct though. Beware the double negative: The original, > "#ifndef NDEBUG", reads as "if not not debugging", so the break only went > in in debug mode. :) > > - Lang. > > On Wed, Jan 18, 2012 at 5:46 PM, Jakob Stoklund Olesen wrote: > >> >> On Jan 18, 2012, at 2:08 PM, Bill Wendling wrote: >> >> > On Jan 18, 2012, at 11:48 AM, Lang Hames wrote: >> > >> >> Author: lhames >> >> Date: Wed Jan 18 13:48:31 2012 >> >> New Revision: 148408 >> >> >> >> URL: http://llvm.org/viewvc/llvm-project?rev=148408&view=rev >> >> Log: >> >> Fixed macro condition. >> >> >> >> Modified: >> >> llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp >> >> >> >> Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp >> >> URL: >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=148408&r1=148407&r2=148408&view=diff >> >> >> ============================================================================== >> >> --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) >> >> +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Jan 18 >> 13:48:31 2012 >> >> @@ -823,7 +823,7 @@ >> >> assert(!RegOp && >> >> "Can't rematerialize instruction with multiple register >> operand!"); >> >> RegOp = MO.getReg(); >> >> -#ifndef NDEBUG >> >> +#ifdef NDEBUG >> >> break; >> >> #endif >> >> } >> >> >> > What's the purpose of having this 'break' statement only in release >> mode? That would appear to cause possible different behavior between >> release and debug modes... >> >> The only purpose of continuing the loop would be to fire the assertion >> above, so I actually think Lang's change is wrong. >> >> It's pretty gross, particularly without a comment. >> >> /jakob >> >> > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/efdd1a67/attachment.html From grosbach at apple.com Wed Jan 25 16:00:23 2012 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 25 Jan 2012 22:00:23 -0000 Subject: [llvm-commits] [llvm] r148985 - in /llvm/trunk: bindings/ocaml/llvm/META.llvm.in docs/GettingStarted.html docs/GettingStartedVS.html docs/doxygen.header docs/doxygen.intro lib/Support/CommandLine.cpp llvm.spec.in Message-ID: <20120125220023.B7CAC2A6C12C@llvm.org> Author: grosbach Date: Wed Jan 25 16:00:23 2012 New Revision: 148985 URL: http://llvm.org/viewvc/llvm-project?rev=148985&view=rev Log: Tidy up. s/Low Level Virtual Machine/LLVM/. LLVM isn't an acronym anymore. Modified: llvm/trunk/bindings/ocaml/llvm/META.llvm.in llvm/trunk/docs/GettingStarted.html llvm/trunk/docs/GettingStartedVS.html llvm/trunk/docs/doxygen.header llvm/trunk/docs/doxygen.intro llvm/trunk/lib/Support/CommandLine.cpp llvm/trunk/llvm.spec.in Modified: llvm/trunk/bindings/ocaml/llvm/META.llvm.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/bindings/ocaml/llvm/META.llvm.in?rev=148985&r1=148984&r2=148985&view=diff ============================================================================== --- llvm/trunk/bindings/ocaml/llvm/META.llvm.in (original) +++ llvm/trunk/bindings/ocaml/llvm/META.llvm.in Wed Jan 25 16:00:23 2012 @@ -1,6 +1,6 @@ name = "llvm" version = "@PACKAGE_VERSION@" -description = "Low Level Virtual Machine OCaml bindings" +description = "LLVM OCaml bindings" archive(byte) = "llvm.cma" archive(native) = "llvm.cmxa" directory = "." Modified: llvm/trunk/docs/GettingStarted.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/GettingStarted.html?rev=148985&r1=148984&r2=148985&view=diff ============================================================================== --- llvm/trunk/docs/GettingStarted.html (original) +++ llvm/trunk/docs/GettingStarted.html Wed Jan 25 16:00:23 2012 @@ -82,10 +82,9 @@

First, LLVM comes in three pieces. The first piece is the LLVM suite. This contains all of the tools, libraries, and header files -needed to use the low level virtual machine. It contains an -assembler, disassembler, bitcode analyzer and bitcode optimizer. It -also contains basic regression tests that can be used to test the LLVM -tools and the GCC front end.

+needed to use LLVM. It contains an assembler, disassembler, bitcode +analyzer and bitcode optimizer. It also contains basic regression tests that +can be used to test the LLVM tools and the GCC front end.

The second piece is the GCC front end. This component provides a version of GCC that compiles C and C++ code into LLVM bitcode. Currently, the GCC front Modified: llvm/trunk/docs/GettingStartedVS.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/GettingStartedVS.html?rev=148985&r1=148984&r2=148985&view=diff ============================================================================== --- llvm/trunk/docs/GettingStartedVS.html (original) +++ llvm/trunk/docs/GettingStartedVS.html Wed Jan 25 16:00:23 2012 @@ -44,7 +44,7 @@

There are many different projects that compose LLVM. The first is the LLVM suite. This contains all of the tools, libraries, and header files needed to - use the low level virtual machine. It contains an assembler, disassembler, + use LLVM. It contains an assembler, disassembler, bitcode analyzer and bitcode optimizer. It also contains a test suite that can be used to test the LLVM tools.

Modified: llvm/trunk/docs/doxygen.header URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/doxygen.header?rev=148985&r1=148984&r2=148985&view=diff ============================================================================== --- llvm/trunk/docs/doxygen.header (original) +++ llvm/trunk/docs/doxygen.header Wed Jan 25 16:00:23 2012 @@ -2,7 +2,7 @@ - + LLVM: $title Modified: llvm/trunk/docs/doxygen.intro URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/doxygen.intro?rev=148985&r1=148984&r2=148985&view=diff ============================================================================== --- llvm/trunk/docs/doxygen.intro (original) +++ llvm/trunk/docs/doxygen.intro Wed Jan 25 16:00:23 2012 @@ -1,7 +1,7 @@ -/// @mainpage Low Level Virtual Machine +/// @mainpage LLVM /// /// @section main_intro Introduction -/// Welcome to the Low Level Virtual Machine (LLVM). +/// Welcome to LLVM. /// /// This documentation describes the @b internal software that makes /// up LLVM, not the @b external use of LLVM. There are no instructions Modified: llvm/trunk/lib/Support/CommandLine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CommandLine.cpp?rev=148985&r1=148984&r2=148985&view=diff ============================================================================== --- llvm/trunk/lib/Support/CommandLine.cpp (original) +++ llvm/trunk/lib/Support/CommandLine.cpp Wed Jan 25 16:00:23 2012 @@ -1345,7 +1345,7 @@ public: void print() { raw_ostream &OS = outs(); - OS << "Low Level Virtual Machine (http://llvm.org/):\n" + OS << "LLVM (http://llvm.org/):\n" << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION; #ifdef LLVM_VERSION_INFO OS << LLVM_VERSION_INFO; Modified: llvm/trunk/llvm.spec.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/llvm.spec.in?rev=148985&r1=148984&r2=148985&view=diff ============================================================================== --- llvm/trunk/llvm.spec.in (original) +++ llvm/trunk/llvm.spec.in Wed Jan 25 16:00:23 2012 @@ -1,7 +1,7 @@ Name: @PACKAGE_NAME@ Version: @PACKAGE_VERSION@ Release: 0 -Summary: The Low Level Virtual Machine (An Optimizing Compiler Infrastructure) +Summary: LLVM (An Optimizing Compiler Infrastructure) License: University of Illinois/NCSA Open Source License Vendor: None (open source) Group: Development/Compilers From nlewycky at google.com Wed Jan 25 16:04:23 2012 From: nlewycky at google.com (Nick Lewycky) Date: Wed, 25 Jan 2012 14:04:23 -0800 Subject: [llvm-commits] Proposal/patch: Enable bitcode streaming In-Reply-To: References: <583C473A-9640-46A7-89FE-63BF0B97E6C6@apple.com> Message-ID: On 20 January 2012 11:55, Derek Schuff wrote: > And finally, the StreamingMemoryObject implementation, modified > BitcodeReader, and modifed llvm-dis.cpp using the streaming interface. > Please take a look > Overall this looks good. I'm especially happy with some of the refactoring inside BitcodeReader! Comments: --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -21,31 +21,41 @@ namespace llvm { class MemoryBuffer; class ModulePass; class BitstreamWriter; + class DataStreamer; class LLVMContext; class raw_ostream; I realize these were unsorted when you got here, but please alphabetize them. + /// If 'verify' is true, check that the file fits in the buffer. + static inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, + const unsigned char *&BufEnd, + bool Verify) { I didn't really understand the comment. I think what you're doing is disabling the check that the buffer contained the whole header. Could you make it "bool SkipPastEnd" instead? +DataStreamer *getDataFileStreamer(const std::string &Filename, + std::string *Err); Please line up the argument to the (. --- a/include/llvm/Support/StreamableMemoryObject.h +++ b/include/llvm/Support/StreamableMemoryObject.h @@ -12,6 +12,9 @@ #define STREAMABLEMEMORYOBJECT_H_ #include "llvm/Support/MemoryObject.h" +#include +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/DataStream.h" ADT, Support, then headers. See http://llvm.org/docs/CodingStandards.html#scf_includes . +/// StreamingMemoryObject - interface to data which is actually streamed from +/// at DataStreamer. In addition to inherited members, it has the +/// dropLeadingBytes and setKnownObjectSize methods which are not applicable +/// to non-streamed objects +class StreamingMemoryObject : public StreamableMemoryObject { I think that's a full sentence missing a period. It feels *awfully weird* to have StreamableMemoryObject and StreamingMemoryObject, and both of them are interfaces. The comment doesn't seem to sufficiently explain what's going on here. (The DataStreamer can stream from a Streaming but not with a Streamable? What?) + // fetch enough bytes such that Pos can be read or EOF is reached + // (i.e. BytesRead > Pos). Return true if Pos can be read. + // Unlike most of the functions in BitcodeReader, returns true on success. + bool fetchToPos(size_t Pos) { Comment should start with a capital. + bool fetchToPos(size_t Pos) { + if (EOFReached) return Pos < ObjectSize; + while (Pos >= BytesRead) { + Bytes.resize(BytesRead + kChunkSize); + size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped], + kChunkSize); Why is kChunkSize so special? Why not ask for all the bytes up until Pos? The comment on DataStreamer::GetBytes doesn't give any reason not to ask for as many bytes as you want? +bool BitcodeReader::SuspendModuleParse() { + // save our current position + NextUnreadBit = Stream.GetCurrentBitNo(); + return false; +} What's up with that returning bool? + // ParseModule will parse the next body in the stream and set its + // position in the DeferredFunctionInfo map Sentence needs period. + unsigned char buf[16]; + if (Bytes->readBytes(0, 16, buf, NULL) == -1) + return Error("Bitcode stream must be at least 16 bytes in length"); + + if (!isBitcode(buf, buf + 16)) { + return Error("Invalid bitcode signature"); + } So, uh, braces or no braces around one-line return statements? :-) +Module *llvm::getStreamedBitcodeModule(const std::string &name, + DataStreamer *streamer, + LLVMContext &Context, + std::string *ErrMsg) { These args don't line up. --- /dev/null +++ b/lib/Support/DataStream.cpp @@ -0,0 +1,96 @@ +//===--- llvm/Support/DataStream.cpp - Lazy streamed Data -*- C++ -*-===// Don't include emacs mode markers (the -*- C++ -*- bit) on .cpp files, only on .h files. +// Very simple stream backed by a file. Mostly useful for stdin and debugging; +// actual file access is probably still best done with mmap +class DataFileStreamer : public DataStreamer { + int Fd; Sentence seeking full stop. +DataStreamer *getDataFileStreamer(const std::string &Filename, + std::string *StrError) { Line up to the ( again. + if (e != success) { + *StrError = std::string() + "Could not open " + Filename + ": " + + e.message() + "\n"; + return NULL; + } Optional: std::string("Could not open ") + Filename, and also putting the + on the previous line instead of starting a line with the operator. --- a/lib/Support/StreamableMemoryObject.cpp +++ b/lib/Support/StreamableMemoryObject.cpp In this file you added some spurious blank lines. Please don't do that. +bool StreamingMemoryObject::isObjectEnd(uint64_t address) { + if (ObjectSize) return address == ObjectSize; + fetchToPos(address); + return address == BytesRead; +} Shouldn't that end with "return address == ObjectSize"? If the file is larger than 'address' bytes, won't this read up to address bytes, then stop, leaving BytesRead == address? ObjectSize on the other hand isn't set until EOF is reached. +int StreamingMemoryObject::readBytes(uint64_t address, + uint64_t size, + uint8_t* buf, + uint64_t* copied) { Misaligned. + //StreamableMemoryObject + Please remove. Nick > thanks, > -Derek > > > On Wed, Jan 18, 2012 at 4:05 PM, Derek Schuff wrote: > >> Hi Chris & Nick, >> Attached is a very slightly updated version of patch number 1, and patch >> number 2, with a new StreamableMemoryObject (derived from MemoryObject), >> suitable for streaming usage. >> One consequence of deriving from MemoryObject is that I had to make the >> getExtent and readByte/readBytes methods of MemoryObject no longer const, >> since they are definitely not const in the StreamableMemoryObject. This >> resulted in having to remove const in several usages of MemoryObject. It >> seemed less bad than adding a bunch of mutable data members in >> StreamableMemoryObject or creating a near-duplicate of MemoryObject. >> >> The third patch is nearly ready as well, please let me know what you >> think. >> thanks, >> -Derek >> >> >> On Thu, Jan 12, 2012 at 3:09 PM, Derek Schuff wrote: >> >>> >>>> Overall, my recommendation would be to split this into three patches: >>>> the first patch would just increase the abstraction level of the bitcode >>>> reader, by adding various predicates that you need and tidy things up. The >>>> second would extend MemoryObject as needed to add the functionality that >>>> you need. The third would actually switch the meat of the bitcode reader >>>> to be lazy-streaming, and switch a tool to use it. >>>> >>>> >>> >>> Attached is patch 1 as listed above: a refactor of BistreamCursor to use >>> an offset rather than raw pointers, and abstract the relevant operations >>> into functions. For now BitstreamReader is the same. I've tested it locally >>> and it works in isolation and is hopefully ready to apply. Patch 2, a >>> subclass of MemoryObject (StreamableMemoryObject) to replace the >>> BitstreamBytes class here, is upcoming next. >>> >>> thanks, >>> -Derek >>> >> >> > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/bb5fe28b/attachment.html From stoklund at 2pi.dk Wed Jan 25 16:06:17 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 25 Jan 2012 14:06:17 -0800 Subject: [llvm-commits] [llvm] r148408 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp In-Reply-To: References: <20120118194831.B1E472A6C12C@llvm.org> <520A0357-4C35-4667-8B34-3F832750CE41@apple.com> <36787BD6-3935-408F-8D99-87B8D06A1F48@2pi.dk> <0C19C77E-979A-44AD-8979-985AC90F5181@apple.com> Message-ID: <28905A1B-ADB6-43D6-9F05-B04C1F05580A@2pi.dk> On Jan 25, 2012, at 1:58 PM, Lang Hames wrote: > On Jakob's suggestion I've made the break unconditional and removed the assert in r148984. Thanks, Lang. Since this code is no longer used to perform rematerialization, I don't think the assertion is worth the required acrobatics. This function is only used to compute spill weights. /jakob From lhames at gmail.com Wed Jan 25 16:11:06 2012 From: lhames at gmail.com (Lang Hames) Date: Wed, 25 Jan 2012 22:11:06 -0000 Subject: [llvm-commits] [llvm] r148986 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Message-ID: <20120125221106.8386A2A6C12C@llvm.org> Author: lhames Date: Wed Jan 25 16:11:06 2012 New Revision: 148986 URL: http://llvm.org/viewvc/llvm-project?rev=148986&view=rev Log: Don't add live ranges for aliases of physregs that are live in to the function. They don't appear to be used, and are inconsistent with handling of other physreg intervals (i.e. intervals that are not live-in) where ranges are not inserted for aliases. Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=148986&r1=148985&r2=148986&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Jan 25 16:11:06 2012 @@ -582,11 +582,6 @@ for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), LE = MBB->livein_end(); LI != LE; ++LI) { handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); - // Multiple live-ins can alias the same register. - for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS) - if (!hasInterval(*AS)) - handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), - true); } // Skip over empty initial indices. From asl at math.spbu.ru Wed Jan 25 16:24:20 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Wed, 25 Jan 2012 22:24:20 -0000 Subject: [llvm-commits] [llvm] r148990 - in /llvm/trunk: include/llvm/CodeGen/AsmPrinter.h include/llvm/CodeGen/TargetLoweringObjectFileImpl.h include/llvm/MC/MCObjectFileInfo.h include/llvm/Target/TargetLoweringObjectFile.h lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/CodeGen/TargetLoweringObjectFileImpl.cpp lib/MC/MCObjectFileInfo.cpp lib/Target/ARM/ARMTargetObjectFile.cpp lib/Target/ARM/ARMTargetObjectFile.h test/CodeGen/ARM/ctor_order.ll test/CodeGen/X86/2011-08-29-InitOrder.ll Message-ID: <20120125222420.837842A6C12C@llvm.org> Author: asl Date: Wed Jan 25 16:24:19 2012 New Revision: 148990 URL: http://llvm.org/viewvc/llvm-project?rev=148990&view=rev Log: Properly emit ctors / dtors with priorities into desired sections and let linker handle the rest. This finally fixes PR5329 Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h llvm/trunk/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h llvm/trunk/include/llvm/MC/MCObjectFileInfo.h llvm/trunk/include/llvm/Target/TargetLoweringObjectFile.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/CodeGen/TargetLoweringObjectFileImpl.cpp llvm/trunk/lib/MC/MCObjectFileInfo.cpp llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.cpp llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.h llvm/trunk/test/CodeGen/ARM/ctor_order.ll llvm/trunk/test/CodeGen/X86/2011-08-29-InitOrder.ll Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/AsmPrinter.h?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h (original) +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h Wed Jan 25 16:24:19 2012 @@ -471,7 +471,7 @@ const MachineBasicBlock *MBB, unsigned uid) const; void EmitLLVMUsedList(const Constant *List); - void EmitXXStructorList(const Constant *List); + void EmitXXStructorList(const Constant *List, bool isCtor); GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy *C); }; } Modified: llvm/trunk/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h (original) +++ llvm/trunk/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h Wed Jan 25 16:24:19 2012 @@ -65,6 +65,11 @@ virtual MCSymbol * getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const; + + virtual const MCSection * + getStaticCtorSection(unsigned Priority = 65535) const; + virtual const MCSection * + getStaticDtorSection(unsigned Priority = 65535) const; }; Modified: llvm/trunk/include/llvm/MC/MCObjectFileInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCObjectFileInfo.h?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCObjectFileInfo.h (original) +++ llvm/trunk/include/llvm/MC/MCObjectFileInfo.h Wed Jan 25 16:24:19 2012 @@ -22,10 +22,6 @@ class MCContext; class MCSection; class Triple; - - namespace Structors { - enum OutputOrder { None, PriorityOrder, ReversePriorityOrder }; - } class MCObjectFileInfo { protected: @@ -167,11 +163,6 @@ const MCSection *DrectveSection; const MCSection *PDataSection; const MCSection *XDataSection; - - /// StructorOutputOrder - Whether the static ctor/dtor list should be output - /// in no particular order, in order of increasing priority or the reverse: - /// in order of decreasing priority (the default). - Structors::OutputOrder StructorOutputOrder; // Default is reverse order. public: void InitMCObjectFileInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, @@ -197,8 +188,6 @@ const MCSection *getTextSection() const { return TextSection; } const MCSection *getDataSection() const { return DataSection; } const MCSection *getBSSSection() const { return BSSSection; } - const MCSection *getStaticCtorSection() const { return StaticCtorSection; } - const MCSection *getStaticDtorSection() const { return StaticDtorSection; } const MCSection *getLSDASection() const { return LSDASection; } const MCSection *getCompactUnwindSection() const{ return CompactUnwindSection; @@ -300,10 +289,6 @@ return EHFrameSection; } - Structors::OutputOrder getStructorOutputOrder() const { - return StructorOutputOrder; - } - private: enum Environment { IsMachO, IsELF, IsCOFF }; Environment Env; Modified: llvm/trunk/include/llvm/Target/TargetLoweringObjectFile.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLoweringObjectFile.h?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLoweringObjectFile.h (original) +++ llvm/trunk/include/llvm/Target/TargetLoweringObjectFile.h Wed Jan 25 16:24:19 2012 @@ -121,7 +121,18 @@ const MCExpr * getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const; - + + virtual const MCSection * + getStaticCtorSection(unsigned Priority = 65535) const { + (void)Priority; + return StaticCtorSection; + } + virtual const MCSection * + getStaticDtorSection(unsigned Priority = 65535) const { + (void)Priority; + return StaticDtorSection; + } + protected: virtual const MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Wed Jan 25 16:24:19 2012 @@ -1207,12 +1207,8 @@ assert(GV->hasInitializer() && "Not a special LLVM global!"); - const TargetData *TD = TM.getTargetData(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { - OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); - EmitAlignment(Align); - EmitXXStructorList(GV->getInitializer()); + EmitXXStructorList(GV->getInitializer(), /* isCtor */ true); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1224,9 +1220,7 @@ } if (GV->getName() == "llvm.global_dtors") { - OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); - EmitAlignment(Align); - EmitXXStructorList(GV->getInitializer()); + EmitXXStructorList(GV->getInitializer(), /* isCtor */ false); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1256,7 +1250,7 @@ } } -typedef std::pair Structor; +typedef std::pair Structor; static bool priority_order(const Structor& lhs, const Structor& rhs) { return lhs.first < rhs.first; @@ -1264,7 +1258,7 @@ /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. -void AsmPrinter::EmitXXStructorList(const Constant *List) { +void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { // Should be an array of '{ int, void ()* }' structs. The first value is the // init priority. if (!isa(List)) return; @@ -1290,19 +1284,20 @@ CS->getOperand(1))); } - // Emit the function pointers in reverse priority order. - switch (getObjFileLowering().getStructorOutputOrder()) { - case Structors::None: - break; - case Structors::PriorityOrder: - std::sort(Structors.begin(), Structors.end(), priority_order); - break; - case Structors::ReversePriorityOrder: - std::sort(Structors.rbegin(), Structors.rend(), priority_order); - break; - } - for (unsigned i = 0, e = Structors.size(); i != e; ++i) + // Emit the function pointers in the target-specific order + const TargetData *TD = TM.getTargetData(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), priority_order); + for (unsigned i = 0, e = Structors.size(); i != e; ++i) { + const MCSection *OutputSection = + (isCtor ? + getObjFileLowering().getStaticCtorSection(Structors[i].first) : + getObjFileLowering().getStaticDtorSection(Structors[i].first)); + OutStreamer.SwitchSection(OutputSection); + if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) + EmitAlignment(Align); EmitGlobalConstant(Structors[i].second); + } } //===--------------------------------------------------------------------===// Modified: llvm/trunk/lib/CodeGen/TargetLoweringObjectFileImpl.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetLoweringObjectFileImpl.cpp?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringObjectFileImpl.cpp (original) +++ llvm/trunk/lib/CodeGen/TargetLoweringObjectFileImpl.cpp Wed Jan 25 16:24:19 2012 @@ -340,6 +340,32 @@ getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +const MCSection * +TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticCtorSection; + + std::string Name = std::string(".ctors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + +const MCSection * +TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticDtorSection; + + std::string Name = std::string(".dtors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + //===----------------------------------------------------------------------===// // MachO //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/MC/MCObjectFileInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCObjectFileInfo.cpp?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCObjectFileInfo.cpp (original) +++ llvm/trunk/lib/MC/MCObjectFileInfo.cpp Wed Jan 25 16:24:19 2012 @@ -31,8 +31,6 @@ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) CommDirectiveSupportsAlignment = false; - StructorOutputOrder = Structors::PriorityOrder; - TextSection // .text = Ctx->getMachOSection("__TEXT", "__text", MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, @@ -260,12 +258,10 @@ } } - StructorOutputOrder = Structors::ReversePriorityOrder; - // ELF BSSSection = Ctx->getELFSection(".bss", ELF::SHT_NOBITS, - ELF::SHF_WRITE |ELF::SHF_ALLOC, + ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getBSS()); TextSection = @@ -389,8 +385,6 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { // COFF - StructorOutputOrder = Structors::ReversePriorityOrder; - TextSection = Ctx->getCOFFSection(".text", COFF::IMAGE_SCN_CNT_CODE | Modified: llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.cpp?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.cpp Wed Jan 25 16:24:19 2012 @@ -14,6 +14,7 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace dwarf; @@ -24,8 +25,9 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); + isAAPCS_ABI = TM.getSubtarget().isAAPCS_ABI(); - if (TM.getSubtarget().isAAPCS_ABI()) { + if (isAAPCS_ABI) { StaticCtorSection = getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | @@ -36,7 +38,6 @@ ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); - StructorOutputOrder = Structors::PriorityOrder; LSDASection = NULL; } @@ -46,3 +47,33 @@ 0, SectionKind::getMetadata()); } + +const MCSection * +ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticCtorSection(Priority); + + if (Priority == 65535) + return StaticCtorSection; + + // Emit ctors in priority order. + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + +const MCSection * +ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticDtorSection(Priority); + + if (Priority == 65535) + return StaticDtorSection; + + // Emit dtors in priority order. + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} Modified: llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.h?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.h (original) +++ llvm/trunk/lib/Target/ARM/ARMTargetObjectFile.h Wed Jan 25 16:24:19 2012 @@ -20,6 +20,7 @@ class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; + bool isAAPCS_ABI; public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), @@ -31,6 +32,9 @@ virtual const MCSection *getAttributesSection() const { return AttributesSection; } + + const MCSection * getStaticCtorSection(unsigned Priority) const; + const MCSection * getStaticDtorSection(unsigned Priority) const; }; } // end namespace llvm Modified: llvm/trunk/test/CodeGen/ARM/ctor_order.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/ctor_order.ll?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/ctor_order.ll (original) +++ llvm/trunk/test/CodeGen/ARM/ctor_order.ll Wed Jan 25 16:24:19 2012 @@ -6,13 +6,15 @@ ; DARWIN: .long _f151 ; DARWIN-NEXT: .long _f152 -; ELF: .section .ctors,"aw",%progbits +; ELF: .section .ctors.65384,"aw",%progbits +; ELF: .long f151 +; ELF: .section .ctors.65383,"aw",%progbits ; ELF: .long f152 -; ELF-NEXT: .long f151 -; GNUEABI: .section .init_array,"aw",%init_array +; GNUEABI: .section .init_array.151,"aw",%init_array ; GNUEABI: .long f151 -; GNUEABI-NEXT: .long f152 +; GNUEABI: .section .init_array.152,"aw",%init_array +; GNUEABI: .long f152 @llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ] Modified: llvm/trunk/test/CodeGen/X86/2011-08-29-InitOrder.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-08-29-InitOrder.ll?rev=148990&r1=148989&r2=148990&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2011-08-29-InitOrder.ll (original) +++ llvm/trunk/test/CodeGen/X86/2011-08-29-InitOrder.ll Wed Jan 25 16:24:19 2012 @@ -3,22 +3,28 @@ ; PR5329 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }] -; CHECK-DEFAULT: construct_3 -; CHECK-DEFAULT: construct_2 -; CHECK-DEFAULT: construct_1 +; CHECK-DEFAULT .section .ctors.64535,"aw", at progbits +; CHECK-DEFAULT: .long construct_1 +; CHECK-DEFAULT: .section .ctors.63535,"aw", at progbits +; CHECK-DEFAULT: .long construct_2 +; CHECK-DEFAULT: .section .ctors.62535,"aw", at progbits +; CHECK-DEFAULT: .long construct_3 -; CHECK-DARWIN: construct_1 -; CHECK-DARWIN: construct_2 -; CHECK-DARWIN: construct_3 +; CHECK-DARWIN: .long _construct_1 +; CHECK-DARWIN-NEXT: .long _construct_2 +; CHECK-DARWIN-NEXT: .long _construct_3 @llvm.global_dtors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @destruct_2 }, { i32, void ()* } { i32 1000, void ()* @destruct_1 }, { i32, void ()* } { i32 3000, void ()* @destruct_3 }] -; CHECK-DEFAULT: destruct_3 -; CHECK-DEFAULT: destruct_2 -; CHECK-DEFAULT: destruct_1 +; CHECK-DEFAULT: .section .dtors.64535,"aw", at progbits +; CHECK-DEFAULT: .long destruct_1 +; CHECK-DEFAULT: .section .dtors.63535,"aw", at progbits +; CHECK-DEFAULT: .long destruct_2 +; CHECK-DEFAULT: .section .dtors.62535,"aw", at progbits +; CHECK-DEFAULT: .long destruct_3 -; CHECK-DARWIN: destruct_1 -; CHECK-DARWIN: destruct_2 -; CHECK-DARWIN: destruct_3 +; CHECK-DARWIN: .long _destruct_1 +; CHECK-DARWIN-NEXT: .long _destruct_2 +; CHECK-DARWIN-NEXT: .long _destruct_3 declare void @construct_1() declare void @construct_2() From stoklund at 2pi.dk Wed Jan 25 16:34:36 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 25 Jan 2012 14:34:36 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <001c01ccdacd$befdd9c0$3cf98d40$@org> References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> <000c01ccda5b$76b208c0$64161a40$@org> <38F583CE-3516-421A-84C2-46978621E648@apple.com> <901B7A01-6E81-4807-A78F-2922C100117D@2pi.dk> <001c01ccdacd$befdd9c0$3cf98d40$@org> Message-ID: <226102FF-900A-4896-870C-F46C00B82BD4@2pi.dk> On Jan 24, 2012, at 11:24 AM, Zino Benaissa wrote: > Yes, this is heuristic by definition it is a guess. Nothing wrong with that. > The way to look at it is the other way: > 1. If the offset of load/store is too large then don?t bother assigning R0-7 > 2. If both operands of ADD are not kill then don?t bother assigning R0-7 > 3. If immediate of ADD is too large don?t bother assigning R0-7 > ? > The goal is to eliminate as much as possible candidates that compete for R0-R7 so that the RA does a better assignment of R0-R7 (which ultimately increases 16-bits encoding). > Returning 2 fails to do this. You may as well return 0 instead of 2 J? Does this negative bias mean that VirtReg.bytes is 0 for most virtual registers? How many get VirtReg.bytes > 0? As I am reading your changes to the eviction policy, you are completely replacing spill weights with a code size metric for live ranges with Virteg.bytes > 0. Is that the intention? /jakob From grosbach at apple.com Wed Jan 25 16:34:48 2012 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 25 Jan 2012 14:34:48 -0800 Subject: [llvm-commits] [llvm] r148653 - in /llvm/trunk: include/llvm/Object/ObjectFile.h include/llvm/Support/Endian.h lib/Object/ELFObjectFile.cpp In-Reply-To: <9BBE4537D1BAAB479E9E8F9D4234619D322E28@HASMSX103.ger.corp.intel.com> References: <20120122090104.4CD0B2A6C12C@llvm.org> <14B0BD87-2FF4-417D-B932-3F18E7E939B4@apple.com> <9BBE4537D1BAAB479E9E8F9D4234619D322E28@HASMSX103.ger.corp.intel.com> Message-ID: <972604FC-7661-4379-98CC-310F83C4A943@apple.com> On Jan 23, 2012, at 11:35 PM, Bendersky, Eli wrote: >> -----Original Message----- >> From: Jim Grosbach [mailto:grosbach at apple.com] >> Sent: Monday, January 23, 2012 20:38 >> To: Bendersky, Eli >> Cc: llvm-commits at cs.uiuc.edu >> Subject: Re: [llvm-commits] [llvm] r148653 - in /llvm/trunk: >> include/llvm/Object/ObjectFile.h include/llvm/Support/Endian.h >> lib/Object/ELFObjectFile.cpp >> >> Hi Eli, >> >> This patch uses std::vector quite a lot. Have you considered SmallVector? It >> seems likely that may be a better fit in at least some cases. >> >> -Jim >> > > Hi Jim, > > Thanks for taking the time to review the patch. We'll be happy for the code to conform to the LLVM coding philosophy, but would be happy for some advice. We did consider SmallVector, but eventually just went with std::vector. Here is the reasoning. > > Currently std::vector is used in 2 places in the added code: > > 1. It's being passed to the constructor of DyldELFObject as a simple method of tracking memory allocations. The same vector is being passed by reference to other methods of the object. This solution is probably temporary, since we plan to eventually roll a more sophisticated memory manager (Ashok Thirumurthi explained this intention in a separate discussion earlier). This vector is created once in the calling code, and the functions in ELFObjectFile just add pointers to it with push_back. > 2. Another std::vector is created in the rebaseObject method to keep track of the addresses of COMMON symbols. Note that we don't know in advance how many such symbols there are. > > Also note that rebaseObject is just called once per JITted module. > > IIUC, SmallVector's chief advantage over std::vector is that it can allocate some elements on the stack without malloc, so when we have a good guess of the maximal amount of elements the vector will contain, SmallVector provides a way to avoid a malloc. > > In light of the above, which use(s) of std::vector would you recommend replacing with SmallVector? > > Thanks in advance, > Eli > > > Hi Eli, That's pretty similar to what the MachO side of things is doing. SmallVector is nearly equivalent to std::vector, so even just using it to optimize for a set of common cases w/ not too many objects in it can be very handy, especially when the contained objects are small like they are here. I wouldn't go so far to say that it's a requirement to use SmallVector<> here or anything like that, but it would be my personal choice barring seeing actual performance comparisons demonstrating otherwise. -Jim > > > > > --------------------------------------------------------------------- > Intel Israel (74) Limited > > This e-mail and any attachments may contain confidential material for > the sole use of the intended recipient(s). Any review or distribution > by others is strictly prohibited. If you are not the intended > recipient, please contact the sender and delete all copies. > From stoklund at 2pi.dk Wed Jan 25 17:36:27 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 25 Jan 2012 23:36:27 -0000 Subject: [llvm-commits] [llvm] r148996 - in /llvm/trunk: lib/CodeGen/ProcessImplicitDefs.cpp test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll Message-ID: <20120125233627.DC9FC2A6C12C@llvm.org> Author: stoklund Date: Wed Jan 25 17:36:27 2012 New Revision: 148996 URL: http://llvm.org/viewvc/llvm-project?rev=148996&view=rev Log: Improve sub-register def handling in ProcessImplicitDefs. This boils down to using MachineOperand::readsReg() more. This fixes PR11829 where a use ended up after the first def when lowering REG_SEQUENCE instructions involving IMPLICIT_DEFs. Modified: llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll Modified: llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp?rev=148996&r1=148995&r2=148996&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp (original) +++ llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp Wed Jan 25 17:36:27 2012 @@ -50,10 +50,10 @@ SmallSet &ImpDefRegs) { switch(OpIdx) { case 1: - return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isCopy() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); case 2: - return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); default: return false; } @@ -66,7 +66,7 @@ MachineOperand &MO1 = MI->getOperand(1); if (MO1.getReg() != Reg) return false; - if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg())) + if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg())) return true; return false; } @@ -105,7 +105,9 @@ MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { - if (MI->getOperand(0).getSubReg()) + ImpDefMIs.push_back(MI); + // Is this a sub-register read-modify-write? + if (MI->getOperand(0).readsReg()) continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); @@ -113,12 +115,11 @@ for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } - ImpDefMIs.push_back(MI); continue; } // Eliminate %reg1032:sub = COPY undef. - if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + if (MI->isCopy() && MI->getOperand(0).readsReg()) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { @@ -140,7 +141,7 @@ bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) + if (!MO.isReg() || !MO.readsReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -172,10 +173,10 @@ continue; } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - // Make sure other uses of + // Make sure other reads of Reg are also marked . for (unsigned j = i+1; j != e; ++j) { MachineOperand &MOJ = MI->getOperand(j); - if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg) + if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) MOJ.setIsUndef(); } ImpDefRegs.erase(Reg); Modified: llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll?rev=148996&r1=148995&r2=148996&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll (original) +++ llvm/trunk/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll Wed Jan 25 17:36:27 2012 @@ -1,5 +1,6 @@ ; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing ; PR11841 +; PR11829 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" target triple = "armv7-none-linux-eabi" @@ -37,7 +38,30 @@ ret void } +define arm_aapcs_vfpcc void @foo2() nounwind uwtable { +entry: + br i1 undef, label %for.end, label %cond.end295 + +cond.end295: ; preds = %entry + %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> + %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float> + %1 = bitcast <4 x float> undef to <2 x i64> + %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> + %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float> + tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind + tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind + unreachable + +for.end: ; preds = %entry + ret void +} + declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind !0 = metadata !{metadata !"omnipotent char", metadata !1} !1 = metadata !{metadata !"Simple C/C++ TBAA", null} From sabre at nondot.org Wed Jan 25 17:49:49 2012 From: sabre at nondot.org (Chris Lattner) Date: Wed, 25 Jan 2012 23:49:49 -0000 Subject: [llvm-commits] [llvm] r148998 - in /llvm/trunk: include/llvm/Instructions.h lib/VMCore/Instructions.cpp Message-ID: <20120125234949.664F32A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 17:49:49 2012 New Revision: 148998 URL: http://llvm.org/viewvc/llvm-project?rev=148998&view=rev Log: add some helper methods to ShuffleVectorInst and enhance its "isValidOperands" and "getMaskValue" methods to allow ConstantDataSequential. Modified: llvm/trunk/include/llvm/Instructions.h llvm/trunk/lib/VMCore/Instructions.cpp Modified: llvm/trunk/include/llvm/Instructions.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Instructions.h?rev=148998&r1=148997&r2=148998&view=diff ============================================================================== --- llvm/trunk/include/llvm/Instructions.h (original) +++ llvm/trunk/include/llvm/Instructions.h Wed Jan 25 17:49:49 2012 @@ -1670,10 +1670,25 @@ /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + Constant *getMask() const { + return reinterpret_cast(getOperand(2)); + } + /// getMaskValue - Return the index from the shuffle mask for the specified /// output result. This is either -1 if the element is undef or a number less /// than 2*numelements. int getMaskValue(unsigned i) const; + + /// getShuffleMask - Return the full mask for this instruction, where each + /// element is the element number and undef's are returned as -1. + void getShuffleMask(SmallVectorImpl &Mask) const; + + SmallVector getShuffleMask() const { + SmallVector Mask; + getShuffleMask(Mask); + return Mask; + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const ShuffleVectorInst *) { return true; } Modified: llvm/trunk/lib/VMCore/Instructions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instructions.cpp?rev=148998&r1=148997&r2=148998&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Instructions.cpp (original) +++ llvm/trunk/lib/VMCore/Instructions.cpp Wed Jan 25 17:49:49 2012 @@ -1576,53 +1576,84 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, const Value *Mask) { + // V1 and V2 must be vectors of the same type. if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType()) return false; + // Mask must be vector of i32. VectorType *MaskTy = dyn_cast(Mask->getType()); if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32)) return false; // Check to see if Mask is valid. + if (isa(Mask) || isa(Mask)) + return true; + if (const ConstantVector *MV = dyn_cast(Mask)) { - VectorType *VTy = cast(V1->getType()); + unsigned V1Size = cast(V1->getType())->getNumElements(); for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) { - if (ConstantInt* CI = dyn_cast(MV->getOperand(i))) { - if (CI->uge(VTy->getNumElements()*2)) + if (ConstantInt *CI = dyn_cast(MV->getOperand(i))) { + if (CI->uge(V1Size*2)) return false; } else if (!isa(MV->getOperand(i))) { return false; } } - } else if (!isa(Mask) && !isa(Mask)) { - // The bitcode reader can create a place holder for a forward reference - // used as the shuffle mask. When this occurs, the shuffle mask will - // fall into this case and fail. To avoid this error, do this bit of - // ugliness to allow such a mask pass. - if (const ConstantExpr* CE = dyn_cast(Mask)) { - if (CE->getOpcode() == Instruction::UserOp1) - return true; - } - return false; + return true; + } + + if (const ConstantDataSequential *CDS = + dyn_cast(Mask)) { + unsigned V1Size = cast(V1->getType())->getNumElements(); + for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i) + if (CDS->getElementAsInteger(i) >= V1Size*2) + return false; + return true; } - return true; + + // The bitcode reader can create a place holder for a forward reference + // used as the shuffle mask. When this occurs, the shuffle mask will + // fall into this case and fail. To avoid this error, do this bit of + // ugliness to allow such a mask pass. + if (const ConstantExpr *CE = dyn_cast(Mask)) + if (CE->getOpcode() == Instruction::UserOp1) + return true; + + return false; } /// getMaskValue - Return the index from the shuffle mask for the specified /// output result. This is either -1 if the element is undef or a number less /// than 2*numelements. int ShuffleVectorInst::getMaskValue(unsigned i) const { - const Constant *Mask = cast(getOperand(2)); - if (isa(Mask)) return -1; - if (isa(Mask)) return 0; - const ConstantVector *MaskCV = cast(Mask); - assert(i < MaskCV->getNumOperands() && "Index out of range"); - - if (isa(MaskCV->getOperand(i))) + assert(i < getType()->getNumElements() && "Index out of range"); + if (ConstantDataSequential *CDS =dyn_cast(getMask())) + return CDS->getElementAsInteger(i); + Constant *C = getMask()->getAggregateElement(i); + if (isa(C)) return -1; - return cast(MaskCV->getOperand(i))->getZExtValue(); + return cast(C)->getZExtValue(); } +/// getShuffleMask - Return the full mask for this instruction, where each +/// element is the element number and undef's are returned as -1. +void ShuffleVectorInst::getShuffleMask(SmallVectorImpl &Result) const { + unsigned NumElts = getType()->getNumElements(); + + if (ConstantDataSequential *CDS=dyn_cast(getMask())) { + for (unsigned i = 0; i != NumElts; ++i) + Result.push_back(CDS->getElementAsInteger(i)); + return; + } + Constant *Mask = getMask(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *C = Mask->getAggregateElement(i); + Result.push_back(isa(C) ? -1 : + cast(Mask)->getZExtValue()); + } +} + + //===----------------------------------------------------------------------===// // InsertValueInst Class //===----------------------------------------------------------------------===// @@ -3457,15 +3488,11 @@ } InsertElementInst *InsertElementInst::clone_impl() const { - return InsertElementInst::Create(getOperand(0), - getOperand(1), - getOperand(2)); + return InsertElementInst::Create(getOperand(0), getOperand(1), getOperand(2)); } ShuffleVectorInst *ShuffleVectorInst::clone_impl() const { - return new ShuffleVectorInst(getOperand(0), - getOperand(1), - getOperand(2)); + return new ShuffleVectorInst(getOperand(0), getOperand(1), getOperand(2)); } PHINode *PHINode::clone_impl() const { From apazos at codeaurora.org Wed Jan 25 17:55:32 2012 From: apazos at codeaurora.org (Ana Pazos) Date: Wed, 25 Jan 2012 15:55:32 -0800 Subject: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions In-Reply-To: References: <00f601ccdaf6$e6cdf8d0$b469ea70$@org> Message-ID: <016b01ccdbbc$d32677f0$797367d0$@org> Hi Anton, - I have updated fusedMAC.ll to include Neon fused multiply add/sub test cases. The patch is attached. - Regarding Neon2 flag, Code Sourcery GCC seems to support only these flags: -mfpu=vfpv4 -mfpu=vfpv4-d16 -mfpu=neon-vfpv4 -mfpu=vfpv3-d16-fp16 -mfpu=vfpv3-fp16 So I think it just relies on -mfpu=neon-vfpv4 to imply Neon2. Thanks, Ana. -----Original Message----- From: anton at korobeynikov.info [mailto:anton at korobeynikov.info] On Behalf Of Anton Korobeynikov Sent: Wednesday, January 25, 2012 5:42 AM To: Ana Pazos Cc: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Hi Ana, > In this update: > - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. > - I kept setting .fpu=neon-vfpv4 code attribute because that is what the > assembler understands. Looks ok. > The additional changes mentioned in the email discussions I think belong to > a separate patch: > - Associate VMLA/VMLS with LessPreciseFPMAD flag, and maybe with fast-math > flag. They should definitely not be. They are not less precise! They are "exactly precise" as two separate ops. It's just FMA which has greater precision than usual thanks to 1 rounding. And it's FMA which needs to be associated with -ffast-math on VFPv2 > - VFPv3/VFPv4/NEON/NEON2 associations with FeatureFP16/FeatureD16. Right. But in a separate patch, please. > - Support to set -mfpu=neon2 in clang. Do you want this?? We should be compatible with gcc in this matter. What does it do? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University -------------- next part -------------- A non-text attachment was scrubbed... Name: fusedMACtestpatch.diff Type: application/octet-stream Size: 3287 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120125/a5b05ae3/attachment.obj From sabre at nondot.org Wed Jan 25 18:01:10 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 00:01:10 -0000 Subject: [llvm-commits] [llvm] r148999 - in /llvm/trunk: include/llvm/Type.h lib/VMCore/Type.cpp Message-ID: <20120126000111.04E9B2A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 18:01:10 2012 New Revision: 148999 URL: http://llvm.org/viewvc/llvm-project?rev=148999&view=rev Log: Ok, break down and add some cast<>'ing helper methods to the Type class to reduce the number of cast<>'s we have. This allows someone to use things like Ty->getVectorNumElements() instead of cast(Ty)->getNumElements() when you know that a type is a vector. It would be a great general cleanup to move the codebase to use these, I will do so in the code I'm touching. Modified: llvm/trunk/include/llvm/Type.h llvm/trunk/lib/VMCore/Type.cpp Modified: llvm/trunk/include/llvm/Type.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Type.h?rev=148999&r1=148998&r2=148999&view=diff ============================================================================== --- llvm/trunk/include/llvm/Type.h (original) +++ llvm/trunk/include/llvm/Type.h Wed Jan 25 18:01:10 2012 @@ -315,6 +315,32 @@ unsigned getNumContainedTypes() const { return NumContainedTys; } //===--------------------------------------------------------------------===// + // Helper methods corresponding to subclass methods. This forces a cast to + // the specified subclass and calls its accessor. "getVectorNumElements" (for + // example) is shorthand for cast(Ty)->getNumElements(). This is + // only intended to cover the core methods that are frequently used, helper + // methods should not be added here. + + unsigned getIntegerBitWidth() const; + + Type *getFunctionParamType(unsigned i) const; + unsigned getFunctionNumParams() const; + bool isFunctionVarArg() const; + + // TODO: StructType + + Type *getSequentialElementType() const; + + uint64_t getArrayNumElements() const; + Type *getArrayElementType() const { return getSequentialElementType(); } + + unsigned getVectorNumElements() const; + Type *getVectorElementType() const { return getSequentialElementType(); } + + unsigned getPointerAddressSpace() const; + Type *getPointerElementType() const { return getSequentialElementType(); } + + //===--------------------------------------------------------------------===// // Static members exported by the Type class itself. Useful for getting // instances of Type. // Modified: llvm/trunk/lib/VMCore/Type.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Type.cpp?rev=148999&r1=148998&r2=148999&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Type.cpp (original) +++ llvm/trunk/lib/VMCore/Type.cpp Wed Jan 25 18:01:10 2012 @@ -198,6 +198,45 @@ } //===----------------------------------------------------------------------===// +// Subclass Helper Methods +//===----------------------------------------------------------------------===// + +unsigned Type::getIntegerBitWidth() const { + return cast(this)->getBitWidth(); +} + +bool Type::isFunctionVarArg() const { + return cast(this)->isVarArg(); +} + +Type *Type::getFunctionParamType(unsigned i) const { + return cast(this)->getParamType(i); +} + +unsigned Type::getFunctionNumParams() const { + return cast(this)->getNumParams(); +} + +Type *Type::getSequentialElementType() const { + return cast(this)->getElementType(); +} + +uint64_t Type::getArrayNumElements() const { + return cast(this)->getNumElements(); +} + +unsigned Type::getVectorNumElements() const { + return cast(this)->getNumElements(); +} + +unsigned Type::getPointerAddressSpace() const { + return cast(this)->getAddressSpace(); +} + + + + +//===----------------------------------------------------------------------===// // Primitive 'Type' data //===----------------------------------------------------------------------===// From sabre at nondot.org Wed Jan 25 18:06:44 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 00:06:44 -0000 Subject: [llvm-commits] [llvm] r149000 - in /llvm/trunk: include/llvm/Type.h lib/VMCore/Type.cpp Message-ID: <20120126000644.77BC32A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 18:06:44 2012 New Revision: 149000 URL: http://llvm.org/viewvc/llvm-project?rev=149000&view=rev Log: add StructType helpers too. Modified: llvm/trunk/include/llvm/Type.h llvm/trunk/lib/VMCore/Type.cpp Modified: llvm/trunk/include/llvm/Type.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Type.h?rev=149000&r1=148999&r2=149000&view=diff ============================================================================== --- llvm/trunk/include/llvm/Type.h (original) +++ llvm/trunk/include/llvm/Type.h Wed Jan 25 18:06:44 2012 @@ -26,6 +26,7 @@ class Module; class LLVMContext; class LLVMContextImpl; +class StringRef; template struct GraphTraits; /// The instances of the Type class are immutable: once they are created, @@ -327,7 +328,9 @@ unsigned getFunctionNumParams() const; bool isFunctionVarArg() const; - // TODO: StructType + StringRef getStructName() const; + unsigned getStructNumElements() const; + Type *getStructElementType(unsigned N) const; Type *getSequentialElementType() const; Modified: llvm/trunk/lib/VMCore/Type.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Type.cpp?rev=149000&r1=148999&r2=149000&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Type.cpp (original) +++ llvm/trunk/lib/VMCore/Type.cpp Wed Jan 25 18:06:44 2012 @@ -217,6 +217,20 @@ return cast(this)->getNumParams(); } +StringRef Type::getStructName() const { + return cast(this)->getName(); +} + +unsigned Type::getStructNumElements() const { + return cast(this)->getNumElements(); +} + +Type *Type::getStructElementType(unsigned N) const { + return cast(this)->getElementType(N); +} + + + Type *Type::getSequentialElementType() const { return cast(this)->getElementType(); } From sabre at nondot.org Wed Jan 25 18:41:50 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 00:41:50 -0000 Subject: [llvm-commits] [llvm] r149005 - /llvm/trunk/lib/VMCore/Instructions.cpp Message-ID: <20120126004150.D3CA52A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 18:41:50 2012 New Revision: 149005 URL: http://llvm.org/viewvc/llvm-project?rev=149005&view=rev Log: fix pasto in the new (and still unused) ShuffleVectorInst::getShuffleMask method. Modified: llvm/trunk/lib/VMCore/Instructions.cpp Modified: llvm/trunk/lib/VMCore/Instructions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instructions.cpp?rev=149005&r1=149004&r2=149005&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Instructions.cpp (original) +++ llvm/trunk/lib/VMCore/Instructions.cpp Wed Jan 25 18:41:50 2012 @@ -1649,7 +1649,7 @@ for (unsigned i = 0; i != NumElts; ++i) { Constant *C = Mask->getAggregateElement(i); Result.push_back(isa(C) ? -1 : - cast(Mask)->getZExtValue()); + cast(C)->getZExtValue()); } } From sabre at nondot.org Wed Jan 25 18:42:35 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 00:42:35 -0000 Subject: [llvm-commits] [llvm] r149006 - in /llvm/trunk/lib: Transforms/InstCombine/InstCombineVectorOps.cpp VMCore/Constants.cpp Message-ID: <20120126004235.289EB2A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 18:42:34 2012 New Revision: 149006 URL: http://llvm.org/viewvc/llvm-project?rev=149006&view=rev Log: some general cleanup, using new methods and tidying up old code. Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=149006&r1=149005&r2=149006&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Wed Jan 25 18:42:34 2012 @@ -19,14 +19,13 @@ /// is to leave as a vector operation. isConstant indicates whether we're /// extracting one known element. If false we're extracting a variable index. static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa(V)) - return true; - if (ConstantVector *C = dyn_cast(V)) { + if (Constant *C = dyn_cast(V)) { if (isConstant) return true; - // If all elts are the same, we can extract. - Constant *Op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) - if (C->getOperand(i) != Op0) + + // If all elts are the same, we can extract it and use any of the values. + Constant *Op0 = C->getAggregateElement(0U); + for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i) + if (C->getAggregateElement(i) != Op0) return false; return true; } @@ -54,41 +53,18 @@ return false; } -/// getShuffleMask - Read and decode a shufflevector mask. -/// Turn undef elements into negative values. -static SmallVector getShuffleMask(const ShuffleVectorInst *SVI) { - unsigned NElts = SVI->getType()->getNumElements(); - if (isa(SVI->getOperand(2))) - return SmallVector(NElts, 0); - if (isa(SVI->getOperand(2))) - return SmallVector(NElts, -1); - - SmallVector Result; - const ConstantVector *CP = cast(SVI->getOperand(2)); - for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) - if (isa(*i)) - Result.push_back(-1); // undef - else - Result.push_back(cast(*i)->getZExtValue()); - return Result; -} - /// FindScalarElement - Given a vector and an element number, see if the scalar /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - VectorType *PTy = cast(V->getType()); - unsigned Width = PTy->getNumElements(); + VectorType *VTy = cast(V->getType()); + unsigned Width = VTy->getNumElements(); if (EltNo >= Width) // Out of range access. - return UndefValue::get(PTy->getElementType()); + return UndefValue::get(VTy->getElementType()); - if (isa(V)) - return UndefValue::get(PTy->getElementType()); - if (isa(V)) - return Constant::getNullValue(PTy->getElementType()); - if (ConstantVector *CP = dyn_cast(V)) - return CP->getOperand(EltNo); + if (Constant *C = dyn_cast(V)) + return C->getAggregateElement(EltNo); if (InsertElementInst *III = dyn_cast(V)) { // If this is an insert to a variable element, we don't know what it is. @@ -107,11 +83,10 @@ } if (ShuffleVectorInst *SVI = dyn_cast(V)) { - unsigned LHSWidth = - cast(SVI->getOperand(0)->getType())->getNumElements(); + unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) - return UndefValue::get(PTy->getElementType()); + return UndefValue::get(VTy->getElementType()); if (InEl < (int)LHSWidth) return FindScalarElement(SVI->getOperand(0), InEl); return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); @@ -122,27 +97,11 @@ } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If vector val is undef, replace extract with scalar undef. - if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // If vector val is constant 0, replace extract with scalar 0. - if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - - if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { - // If vector val is constant with all elements the same, replace EI with - // that element. When the elements are not identical, we cannot replace yet - // (we do that below, but only when the index is constant). - Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i != C->getNumOperands(); ++i) - if (C->getOperand(i) != op0) { - op0 = 0; - break; - } - if (op0) - return ReplaceInstUsesWith(EI, op0); - } + // If vector val is constant with all elements the same, replace EI with + // that element. We handle a known element # below. + if (Constant *C = dyn_cast(EI.getOperand(0))) + if (CheapToScalarize(C, false)) + return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. @@ -176,8 +135,7 @@ // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { - if (VectorType *VT = - dyn_cast(BCI->getOperand(0)->getType())) + if (VectorType *VT = dyn_cast(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); @@ -216,7 +174,7 @@ int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); Value *Src; unsigned LHSWidth = - cast(SVI->getOperand(0)->getType())->getNumElements(); + SVI->getOperand(0)->getType()->getVectorNumElements(); if (SrcIdx < 0) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); @@ -452,7 +410,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - SmallVector Mask = getShuffleMask(&SVI); + SmallVector Mask = SVI.getShuffleMask(); bool MadeChange = false; @@ -623,12 +581,11 @@ SmallVector LHSMask; SmallVector RHSMask; - if (newLHS != LHS) { - LHSMask = getShuffleMask(LHSShuffle); - } - if (RHSShuffle && newRHS != RHS) { - RHSMask = getShuffleMask(RHSShuffle); - } + if (newLHS != LHS) + LHSMask = LHSShuffle->getShuffleMask(); + if (RHSShuffle && newRHS != RHS) + RHSMask = RHSShuffle->getShuffleMask(); + unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; SmallVector newMask; bool isSplat = true; Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=149006&r1=149005&r2=149006&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Wed Jan 25 18:42:34 2012 @@ -170,7 +170,7 @@ if (const UndefValue *UV = dyn_cast(this)) return UV->getElementValue(Elt); - if (const ConstantDataSequential *CDS = dyn_cast(this)) + if (const ConstantDataSequential *CDS =dyn_cast(this)) return CDS->getElementAsConstant(Elt); return 0; } @@ -201,8 +201,7 @@ } #endif assert(isa(V) && "References remain to Constant being destroyed"); - Constant *CV = cast(V); - CV->destroyConstant(); + cast(V)->destroyConstant(); // The constant should remove itself from our use list... assert((use_empty() || use_back() != V) && "Constant not removed!"); @@ -628,15 +627,13 @@ /// getSequentialElement - If this CAZ has array or vector type, return a zero /// with the right element type. Constant *ConstantAggregateZero::getSequentialElement() const { - return Constant::getNullValue( - cast(getType())->getElementType()); + return Constant::getNullValue(getType()->getSequentialElementType()); } /// getStructElement - If this CAZ has struct type, return a zero with the /// right element type for the specified element. Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const { - return Constant::getNullValue( - cast(getType())->getElementType(Elt)); + return Constant::getNullValue(getType()->getStructElementType(Elt)); } /// getElementValue - Return a zero of the right value for the specified GEP @@ -663,13 +660,13 @@ /// getSequentialElement - If this undef has array or vector type, return an /// undef with the right element type. UndefValue *UndefValue::getSequentialElement() const { - return UndefValue::get(cast(getType())->getElementType()); + return UndefValue::get(getType()->getSequentialElementType()); } /// getStructElement - If this undef has struct type, return a zero with the /// right element type for the specified element. UndefValue *UndefValue::getStructElement(unsigned Elt) const { - return UndefValue::get(cast(getType())->getElementType(Elt)); + return UndefValue::get(getType()->getStructElementType(Elt)); } /// getElementValue - Return an undef of the right value for the specified GEP @@ -1020,8 +1017,8 @@ // isValueValidForType implementations bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) { - unsigned NumBits = cast(Ty)->getBitWidth(); // assert okay - if (Ty == Type::getInt1Ty(Ty->getContext())) + unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay + if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1; if (NumBits >= 64) return true; // always true, has to fit in largest type @@ -1030,8 +1027,8 @@ } bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) { - unsigned NumBits = cast(Ty)->getBitWidth(); // assert okay - if (Ty == Type::getInt1Ty(Ty->getContext())) + unsigned NumBits = Ty->getIntegerBitWidth(); + if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1 || Val == -1; if (NumBits >= 64) return true; // always true, has to fit in largest type @@ -1536,8 +1533,7 @@ "PtrToInt destination must be integer or integer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) - assert(cast(C->getType())->getNumElements() == - cast(DstTy)->getNumElements() && + assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&& "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::PtrToInt, C, DstTy); } @@ -1549,8 +1545,7 @@ "IntToPtr destination must be a pointer or pointer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) - assert(cast(C->getType())->getNumElements() == - cast(DstTy)->getNumElements() && + assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&& "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::IntToPtr, C, DstTy); } @@ -1731,7 +1726,7 @@ // Get the result type of the getelementptr! Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs); assert(Ty && "GEP indices invalid!"); - unsigned AS = cast(C->getType())->getAddressSpace(); + unsigned AS = C->getType()->getPointerAddressSpace(); Type *ReqTy = Ty->getPointerTo(AS); assert(C->getType()->isPointerTy() && @@ -1811,7 +1806,7 @@ const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec); LLVMContextImpl *pImpl = Val->getContext().pImpl; - Type *ReqTy = cast(Val->getType())->getElementType(); + Type *ReqTy = Val->getType()->getVectorElementType(); return pImpl->ExprConstants.getOrCreate(ReqTy, Key); } @@ -1819,8 +1814,8 @@ Constant *Idx) { assert(Val->getType()->isVectorTy() && "Tried to create insertelement operation on non-vector type!"); - assert(Elt->getType() == cast(Val->getType())->getElementType() - && "Insertelement types must match!"); + assert(Elt->getType() == Val->getType()->getVectorElementType() && + "Insertelement types must match!"); assert(Idx->getType()->isIntegerTy(32) && "Insertelement index must be i32 type!"); @@ -1844,8 +1839,8 @@ if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask)) return FC; // Fold a few common cases. - unsigned NElts = cast(Mask->getType())->getNumElements(); - Type *EltTy = cast(V1->getType())->getElementType(); + unsigned NElts = Mask->getType()->getVectorNumElements(); + Type *EltTy = V1->getType()->getVectorElementType(); Type *ShufTy = VectorType::get(EltTy, NElts); // Look up the constant in the table first to ensure uniqueness @@ -2055,7 +2050,7 @@ unsigned ConstantDataSequential::getNumElements() const { if (ArrayType *AT = dyn_cast(getType())) return AT->getNumElements(); - return cast(getType())->getNumElements(); + return getType()->getVectorNumElements(); } @@ -2084,7 +2079,7 @@ /// the correct element type. We take the bytes in as an StringRef because /// we *want* an underlying "char*" to avoid TBAA type punning violations. Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { - assert(isElementTypeCompatible(cast(Ty)->getElementType())); + assert(isElementTypeCompatible(Ty->getSequentialElementType())); // If the elements are all zero or there are no elements, return a CAZ, which // is more dense and canonical. if (isAllZeros(Elements)) @@ -2266,7 +2261,7 @@ // The data is stored in host byte order, make sure to cast back to the right // type to load with the right endianness. - switch (cast(getElementType())->getBitWidth()) { + switch (getElementType()->getIntegerBitWidth()) { default: assert(0 && "Invalid bitwidth for CDS"); case 8: return *(uint8_t*)EltPtr; case 16: return *(uint16_t*)EltPtr; From peter at pcc.me.uk Wed Jan 25 19:31:38 2012 From: peter at pcc.me.uk (Peter Collingbourne) Date: Thu, 26 Jan 2012 01:31:38 -0000 Subject: [llvm-commits] [llvm] r149010 - /llvm/trunk/tools/llvm-config/llvm-config.cpp Message-ID: <20120126013138.777632A6C12C@llvm.org> Author: pcc Date: Wed Jan 25 19:31:38 2012 New Revision: 149010 URL: http://llvm.org/viewvc/llvm-project?rev=149010&view=rev Log: llvm-config: Add support for CMake build trees in which the build mode does not form part of the path. Modified: llvm/trunk/tools/llvm-config/llvm-config.cpp Modified: llvm/trunk/tools/llvm-config/llvm-config.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-config/llvm-config.cpp?rev=149010&r1=149009&r2=149010&view=diff ============================================================================== --- llvm/trunk/tools/llvm-config/llvm-config.cpp (original) +++ llvm/trunk/tools/llvm-config/llvm-config.cpp Wed Jan 25 19:31:38 2012 @@ -169,7 +169,8 @@ // and from an installed path. We try and auto-detect which case we are in so // that we can report the correct information when run from a development // tree. - bool IsInDevelopmentTree, DevelopmentTreeLayoutIsCMakeStyle; + bool IsInDevelopmentTree; + enum { MakefileStyle, CMakeStyle, CMakeBuildModeStyle } DevelopmentTreeLayout; llvm::SmallString<256> CurrentPath(GetExecutablePath(argv[0]).str()); std::string CurrentExecPrefix; std::string ActiveObjRoot; @@ -185,7 +186,7 @@ // symbolic links, but is good enough. if (CurrentExecPrefix == std::string(LLVM_OBJ_ROOT) + "/" + LLVM_BUILDMODE) { IsInDevelopmentTree = true; - DevelopmentTreeLayoutIsCMakeStyle = false; + DevelopmentTreeLayout = MakefileStyle; // If we are in a development tree, then check if we are in a BuildTools // directory. This indicates we are built for the build triple, but we @@ -195,9 +196,13 @@ } else { ActiveObjRoot = LLVM_OBJ_ROOT; } + } else if (CurrentExecPrefix == std::string(LLVM_OBJ_ROOT)) { + IsInDevelopmentTree = true; + DevelopmentTreeLayout = CMakeStyle; + ActiveObjRoot = LLVM_OBJ_ROOT; } else if (CurrentExecPrefix == std::string(LLVM_OBJ_ROOT) + "/bin") { IsInDevelopmentTree = true; - DevelopmentTreeLayoutIsCMakeStyle = true; + DevelopmentTreeLayout = CMakeBuildModeStyle; ActiveObjRoot = LLVM_OBJ_ROOT; } else { IsInDevelopmentTree = false; @@ -213,12 +218,19 @@ // CMake organizes the products differently than a normal prefix style // layout. - if (DevelopmentTreeLayoutIsCMakeStyle) { - ActiveBinDir = ActiveObjRoot + "/bin/" + LLVM_BUILDMODE; - ActiveLibDir = ActiveObjRoot + "/lib/" + LLVM_BUILDMODE; - } else { + switch (DevelopmentTreeLayout) { + case MakefileStyle: ActiveBinDir = ActiveObjRoot + "/" + LLVM_BUILDMODE + "/bin"; ActiveLibDir = ActiveObjRoot + "/" + LLVM_BUILDMODE + "/lib"; + break; + case CMakeStyle: + ActiveBinDir = ActiveObjRoot + "/bin"; + ActiveLibDir = ActiveObjRoot + "/lib"; + break; + case CMakeBuildModeStyle: + ActiveBinDir = ActiveObjRoot + "/bin/" + LLVM_BUILDMODE; + ActiveLibDir = ActiveObjRoot + "/lib/" + LLVM_BUILDMODE; + break; } // We need to include files from both the source and object trees. From sabre at nondot.org Wed Jan 25 20:31:23 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 02:31:23 -0000 Subject: [llvm-commits] [llvm] r149021 - in /llvm/trunk: include/llvm/Constants.h lib/VMCore/Constants.cpp Message-ID: <20120126023123.2BF2F2A6C12D@llvm.org> Author: lattner Date: Wed Jan 25 20:31:22 2012 New Revision: 149021 URL: http://llvm.org/viewvc/llvm-project?rev=149021&view=rev Log: Add a ConstantDataVector::getSplatValue() method, for parity with ConstantVector. Fix some outright bugs in the implementation of ConstantArray and Constant struct, which would cause us to not make one big UndefValue when asking for an array/struct with all undef elements. Enhance Constant::isAllOnesValue to work with ConstantDataVector. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=149021&r1=149020&r2=149021&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Wed Jan 25 20:31:22 2012 @@ -766,6 +766,10 @@ /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// getSplatValue - If this is a splat constant, meaning that all of the + /// elements have the same value, return that value. Otherwise return NULL. + Constant *getSplatValue() const; + /// getType - Specialize the getType() method to always return a VectorType, /// which reduces the amount of casting needed in parts of the compiler. /// Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=149021&r1=149020&r2=149021&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Wed Jan 25 20:31:22 2012 @@ -78,6 +78,11 @@ if (Constant *Splat = CV->getSplatValue()) return Splat->isAllOnesValue(); + // Check for constant vectors which are splats of -1 values. + if (const ConstantDataVector *CV = dyn_cast(this)) + if (Constant *Splat = CV->getSplatValue()) + return Splat->isAllOnesValue(); + return false; } @@ -711,17 +716,27 @@ } LLVMContextImpl *pImpl = Ty->getContext().pImpl; // If this is an all-zero array, return a ConstantAggregateZero object + bool isAllZero = true; + bool isUndef = false; if (!V.empty()) { Constant *C = V[0]; - if (!C->isNullValue()) - return pImpl->ArrayConstants.getOrCreate(Ty, V); - - for (unsigned i = 1, e = V.size(); i != e; ++i) - if (V[i] != C) - return pImpl->ArrayConstants.getOrCreate(Ty, V); + isAllZero = C->isNullValue(); + isUndef = isa(C); + + if (isAllZero || isUndef) + for (unsigned i = 1, e = V.size(); i != e; ++i) + if (V[i] != C) { + isAllZero = false; + isUndef = false; + break; + } } - - return ConstantAggregateZero::get(Ty); + + if (isAllZero) + return ConstantAggregateZero::get(Ty); + if (isUndef) + return UndefValue::get(Ty); + return pImpl->ArrayConstants.getOrCreate(Ty, V); } /// ConstantArray::get(const string&) - Return an array that is initialized to @@ -780,14 +795,31 @@ // ConstantStruct accessors. Constant *ConstantStruct::get(StructType *ST, ArrayRef V) { - // Create a ConstantAggregateZero value if all elements are zeros. - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (!V[i]->isNullValue()) - return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V); - assert((ST->isOpaque() || ST->getNumElements() == V.size()) && "Incorrect # elements specified to ConstantStruct::get"); - return ConstantAggregateZero::get(ST); + + // Create a ConstantAggregateZero value if all elements are zeros. + bool isZero = true; + bool isUndef = false; + + if (!V.empty()) { + isUndef = isa(V[0]); + isZero = V[0]->isNullValue(); + if (isUndef || isZero) { + for (unsigned i = 0, e = V.size(); i != e; ++i) { + if (!V[i]->isNullValue()) + isZero = false; + if (!isa(V[i])) + isUndef = false; + } + } + } + if (isZero) + return ConstantAggregateZero::get(ST); + if (isUndef) + return UndefValue::get(ST); + + return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V); } Constant *ConstantStruct::get(StructType *T, ...) { @@ -2329,6 +2361,20 @@ return Str.drop_back().find(0) == StringRef::npos; } +/// getSplatValue - If this is a splat constant, meaning that all of the +/// elements have the same value, return that value. Otherwise return NULL. +Constant *ConstantDataVector::getSplatValue() const { + const char *Base = getRawDataValues().data(); + + // Compare elements 1+ to the 0'th element. + unsigned EltSize = getElementByteSize(); + for (unsigned i = 1, e = getNumElements(); i != e; ++i) + if (memcmp(Base, Base+i*EltSize, EltSize)) + return 0; + + // If they're all the same, return the 0th one as a representative. + return getElementAsConstant(0); +} //===----------------------------------------------------------------------===// // replaceUsesOfWithOnConstant implementations @@ -2360,33 +2406,25 @@ // Fill values with the modified operands of the constant array. Also, // compute whether this turns into an all-zeros array. - bool isAllZeros = false; unsigned NumUpdated = 0; - if (!ToC->isNullValue()) { - for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) { - Constant *Val = cast(O->get()); - if (Val == From) { - Val = ToC; - ++NumUpdated; - } - Values.push_back(Val); - } - } else { - isAllZeros = true; - for (Use *O = OperandList, *E = OperandList+getNumOperands();O != E; ++O) { - Constant *Val = cast(O->get()); - if (Val == From) { - Val = ToC; - ++NumUpdated; - } - Values.push_back(Val); - if (isAllZeros) isAllZeros = Val->isNullValue(); + + // Keep track of whether all the values in the array are "ToC". + bool AllSame = true; + for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) { + Constant *Val = cast(O->get()); + if (Val == From) { + Val = ToC; + ++NumUpdated; } + Values.push_back(Val); + AllSame = Val == ToC; } Constant *Replacement = 0; - if (isAllZeros) { + if (AllSame && ToC->isNullValue()) { Replacement = ConstantAggregateZero::get(getType()); + } else if (AllSame && isa(ToC)) { + Replacement = UndefValue::get(getType()); } else { // Check to see if we have this array type already. bool Exists; @@ -2446,16 +2484,24 @@ // Fill values with the modified operands of the constant struct. Also, // compute whether this turns into an all-zeros struct. bool isAllZeros = false; - if (!ToC->isNullValue()) { - for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O) - Values.push_back(cast(O->get())); - } else { + bool isAllUndef = false; + if (ToC->isNullValue()) { isAllZeros = true; for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) { Constant *Val = cast(O->get()); Values.push_back(Val); if (isAllZeros) isAllZeros = Val->isNullValue(); } + } else if (isa(ToC)) { + isAllUndef = true; + for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) { + Constant *Val = cast(O->get()); + Values.push_back(Val); + if (isAllUndef) isAllUndef = isa(Val); + } + } else { + for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O) + Values.push_back(cast(O->get())); } Values[OperandToUpdate] = ToC; @@ -2464,6 +2510,8 @@ Constant *Replacement = 0; if (isAllZeros) { Replacement = ConstantAggregateZero::get(getType()); + } else if (isAllUndef) { + Replacement = UndefValue::get(getType()); } else { // Check to see if we have this struct type already. bool Exists; From sabre at nondot.org Wed Jan 25 20:32:05 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 02:32:05 -0000 Subject: [llvm-commits] [llvm] r149024 - in /llvm/trunk/lib: Transforms/IPO/GlobalOpt.cpp Transforms/InstCombine/InstructionCombining.cpp Transforms/Scalar/SCCP.cpp VMCore/AsmWriter.cpp VMCore/ConstantFold.cpp Message-ID: <20120126023205.26C722A6C12D@llvm.org> Author: lattner Date: Wed Jan 25 20:32:04 2012 New Revision: 149024 URL: http://llvm.org/viewvc/llvm-project?rev=149024&view=rev Log: Continue improving support for ConstantDataAggregate, and use the new methods recently added to (sometimes greatly!) simplify code. Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp llvm/trunk/lib/Transforms/Scalar/SCCP.cpp llvm/trunk/lib/VMCore/AsmWriter.cpp llvm/trunk/lib/VMCore/ConstantFold.cpp Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=149024&r1=149023&r2=149024&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Wed Jan 25 20:32:04 2012 @@ -2176,21 +2176,9 @@ std::vector Elts; if (StructType *STy = dyn_cast(Init->getType())) { - // Break up the constant into its elements. - if (ConstantStruct *CS = dyn_cast(Init)) { - for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (isa(Init)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Elts.push_back(Constant::getNullValue(STy->getElementType(i))); - } else if (isa(Init)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Elts.push_back(UndefValue::get(STy->getElementType(i))); - } else { - llvm_unreachable("This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - } + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(Init->getAggregateElement(i)); // Replace the element that we are supposed to. ConstantInt *CU = cast(Addr->getOperand(OpNo)); @@ -2209,22 +2197,11 @@ if (ArrayType *ATy = dyn_cast(InitTy)) NumElts = ATy->getNumElements(); else - NumElts = cast(InitTy)->getNumElements(); + NumElts = InitTy->getVectorNumElements(); // Break up the array into elements. - if (ConstantArray *CA = dyn_cast(Init)) { - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (ConstantVector *CV = dyn_cast(Init)) { - for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) - Elts.push_back(cast(*i)); - } else if (isa(Init)) { - Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); - } else { - assert(isa(Init) && "This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); - } + for (uint64_t i = 0, e = NumElts; i != e; ++i) + Elts.push_back(Init->getAggregateElement(i)); assert(CI->getZExtValue() < NumElts); Elts[CI->getZExtValue()] = Modified: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=149024&r1=149023&r2=149024&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Wed Jan 25 20:32:04 2012 @@ -1270,24 +1270,16 @@ return ReplaceInstUsesWith(EV, Agg); if (Constant *C = dyn_cast(Agg)) { - if (isa(C)) - return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); - - if (isa(C)) - return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); - - if (isa(C) || isa(C)) { - // Extract the element indexed by the first index out of the constant - Value *V = C->getOperand(*EV.idx_begin()); - if (EV.getNumIndices() > 1) - // Extract the remaining indices out of the constant indexed by the - // first index - return ExtractValueInst::Create(V, EV.getIndices().slice(1)); - else - return ReplaceInstUsesWith(EV, V); + if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) { + if (EV.getNumIndices() == 0) + return ReplaceInstUsesWith(EV, C2); + // Extract the remaining indices out of the constant indexed by the + // first index + return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); } return 0; // Can't handle other constants - } + } + if (InsertValueInst *IV = dyn_cast(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; Modified: llvm/trunk/lib/Transforms/Scalar/SCCP.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SCCP.cpp?rev=149024&r1=149023&r2=149024&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SCCP.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SCCP.cpp Wed Jan 25 20:32:04 2012 @@ -408,15 +408,14 @@ return LV; // Common case, already in the map. if (Constant *C = dyn_cast(V)) { - if (isa(C)) - ; // Undef values remain undefined. - else if (ConstantStruct *CS = dyn_cast(C)) - LV.markConstant(CS->getOperand(i)); // Constants are constant. - else if (isa(C)) { - Type *FieldTy = cast(V->getType())->getElementType(i); - LV.markConstant(Constant::getNullValue(FieldTy)); - } else + Constant *Elt = C->getAggregateElement(i); + + if (Elt == 0) LV.markOverdefined(); // Unknown sort of constant. + else if (isa(Elt)) + ; // Undef values remain undefined. + else + LV.markConstant(Elt); // Constants are constant. } // All others are underdefined by default. Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AsmWriter.cpp?rev=149024&r1=149023&r2=149024&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/AsmWriter.cpp (original) +++ llvm/trunk/lib/VMCore/AsmWriter.cpp Wed Jan 25 20:32:04 2012 @@ -855,6 +855,37 @@ } return; } + + if (const ConstantDataArray *CA = dyn_cast(CV)) { + // As a special case, print the array as a string if it is an array of + // i8 with ConstantInt values. + if (CA->isString()) { + Out << "c\""; + PrintEscapedString(CA->getAsString(), Out); + Out << '"'; + return; + } + + Type *ETy = CA->getType()->getElementType(); + Out << '['; + if (CA->getNumOperands()) { + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getElementAsConstant(0), + &TypePrinter, Machine, + Context); + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { + Out << ", "; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter, + Machine, Context); + } + Out << ']'; + } + return; + } + if (const ConstantStruct *CS = dyn_cast(CV)) { if (CS->getType()->isPacked()) @@ -886,21 +917,19 @@ return; } - if (const ConstantVector *CP = dyn_cast(CV)) { - Type *ETy = CP->getType()->getElementType(); - assert(CP->getNumOperands() > 0 && - "Number of operands for a PackedConst must be > 0"); + if (isa(CV) || isa(CV)) { + Type *ETy = CV->getType()->getVectorElementType(); Out << '<'; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine, - Context); - for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { + WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter, + Machine, Context); + for (unsigned i = 1, e = CV->getType()->getVectorNumElements(); i != e;++i){ Out << ", "; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine, - Context); + WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter, + Machine, Context); } Out << '>'; return; Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=149024&r1=149023&r2=149024&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantFold.cpp (original) +++ llvm/trunk/lib/VMCore/ConstantFold.cpp Wed Jan 25 20:32:04 2012 @@ -38,11 +38,10 @@ // ConstantFold*Instruction Implementations //===----------------------------------------------------------------------===// -/// BitCastConstantVector - Convert the specified ConstantVector node to the +/// BitCastConstantVector - Convert the specified vector Constant node to the /// specified vector type. At this point, we know that the elements of the /// input vector constant are all simple integer or FP values. -static Constant *BitCastConstantVector(ConstantVector *CV, - VectorType *DstTy) { +static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) { if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy); if (CV->isNullValue()) return Constant::getNullValue(DstTy); @@ -51,22 +50,21 @@ // doing so requires endianness information. This should be handled by // Analysis/ConstantFolding.cpp unsigned NumElts = DstTy->getNumElements(); - if (NumElts != CV->getNumOperands()) + if (NumElts != CV->getType()->getVectorNumElements()) return 0; + + Type *DstEltTy = DstTy->getElementType(); // Check to verify that all elements of the input are simple. + SmallVector Result; for (unsigned i = 0; i != NumElts; ++i) { - if (!isa(CV->getOperand(i)) && - !isa(CV->getOperand(i))) - return 0; + Constant *C = CV->getAggregateElement(i); + if (C == 0) return 0; + C = ConstantExpr::getBitCast(C, DstEltTy); + if (isa(C)) return 0; + Result.push_back(C); } - // Bitcast each element now. - std::vector Result; - Type *DstEltTy = DstTy->getElementType(); - for (unsigned i = 0; i != NumElts; ++i) - Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i), - DstEltTy)); return ConstantVector::get(Result); } @@ -142,8 +140,8 @@ if (isa(V)) return Constant::getNullValue(DestTy); - if (ConstantVector *CV = dyn_cast(V)) - return BitCastConstantVector(CV, DestPTy); + // Handle ConstantVector and ConstantAggregateVector. + return BitCastConstantVector(V, DestPTy); } // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts @@ -692,42 +690,26 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2) { - if (ConstantInt *CB = dyn_cast(Cond)) - return CB->getZExtValue() ? V1 : V2; - - // Check for zero aggregate and ConstantVector of zeros + // Check for i1 and vector true/false conditions. if (Cond->isNullValue()) return V2; + if (Cond->isAllOnesValue()) return V1; - if (ConstantVector* CondV = dyn_cast(Cond)) { - - if (CondV->isAllOnesValue()) return V1; - - VectorType *VTy = cast(V1->getType()); - ConstantVector *CP1 = dyn_cast(V1); - ConstantVector *CP2 = dyn_cast(V2); - - if ((CP1 || isa(V1)) && - (CP2 || isa(V2))) { - - // Find the element type of the returned vector - Type *EltTy = VTy->getElementType(); - unsigned NumElem = VTy->getNumElements(); - std::vector Res(NumElem); - - bool Valid = true; - for (unsigned i = 0; i < NumElem; ++i) { - ConstantInt* c = dyn_cast(CondV->getOperand(i)); - if (!c) { - Valid = false; - break; - } - Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res[i] = c->getZExtValue() ? C1 : C2; - } - // If we were able to build the vector, return it - if (Valid) return ConstantVector::get(Res); + // FIXME: CDV Condition. + // If the condition is a vector constant, fold the result elementwise. + if (ConstantVector *CondV = dyn_cast(Cond)) { + SmallVector Result; + for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){ + ConstantInt *Cond = dyn_cast(CondV->getOperand(i)); + if (Cond == 0) break; + + Constant *Res = (Cond->getZExtValue() ? V2 : V1)->getAggregateElement(i); + if (Res == 0) break; + Result.push_back(Res); } + + // If we were able to build the vector, return it. + if (Result.size() == V1->getType()->getVectorNumElements()) + return ConstantVector::get(Result); } @@ -781,70 +763,22 @@ Constant *Idx) { ConstantInt *CIdx = dyn_cast(Idx); if (!CIdx) return 0; - APInt idxVal = CIdx->getValue(); - if (isa(Val)) { - // Insertion of scalar constant into vector undef - // Optimize away insertion of undef - if (isa(Elt)) - return Val; - // Otherwise break the aggregate undef into multiple undefs and do - // the insertion - unsigned numOps = - cast(Val->getType())->getNumElements(); - std::vector Ops; - Ops.reserve(numOps); - for (unsigned i = 0; i < numOps; ++i) { - Constant *Op = - (idxVal == i) ? Elt : UndefValue::get(Elt->getType()); - Ops.push_back(Op); - } - return ConstantVector::get(Ops); - } - if (isa(Val)) { - // Insertion of scalar constant into vector aggregate zero - // Optimize away insertion of zero - if (Elt->isNullValue()) - return Val; - // Otherwise break the aggregate zero into multiple zeros and do - // the insertion - unsigned numOps = - cast(Val->getType())->getNumElements(); - std::vector Ops; - Ops.reserve(numOps); - for (unsigned i = 0; i < numOps; ++i) { - Constant *Op = - (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType()); - Ops.push_back(Op); - } - return ConstantVector::get(Ops); - } - if (ConstantVector *CVal = dyn_cast(Val)) { - // Insertion of scalar constant into vector constant - std::vector Ops; - Ops.reserve(CVal->getNumOperands()); - for (unsigned i = 0; i < CVal->getNumOperands(); ++i) { - Constant *Op = - (idxVal == i) ? Elt : cast(CVal->getOperand(i)); - Ops.push_back(Op); + const APInt &IdxVal = CIdx->getValue(); + + SmallVector Result; + for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){ + if (i == IdxVal) { + Result.push_back(Elt); + continue; } - return ConstantVector::get(Ops); + + if (Constant *C = Val->getAggregateElement(i)) + Result.push_back(C); + else + return 0; } - - return 0; -} - -/// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef -/// return the specified element value. Otherwise return null. -static Constant *GetVectorElement(Constant *C, unsigned EltNo) { - if (ConstantVector *CV = dyn_cast(C)) - return CV->getOperand(EltNo); - - Type *EltTy = cast(C->getType())->getElementType(); - if (isa(C)) - return Constant::getNullValue(EltTy); - if (isa(C)) - return UndefValue::get(EltTy); - return 0; + + return ConstantVector::get(Result); } Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, @@ -860,24 +794,21 @@ // Loop over the shuffle mask, evaluating each element. SmallVector Result; for (unsigned i = 0; i != MaskNumElts; ++i) { - Constant *InElt = GetVectorElement(Mask, i); + Constant *InElt = Mask->getAggregateElement(i); if (InElt == 0) return 0; - if (isa(InElt)) - InElt = UndefValue::get(EltTy); - else if (ConstantInt *CI = dyn_cast(InElt)) { - unsigned Elt = CI->getZExtValue(); - if (Elt >= SrcNumElts*2) - InElt = UndefValue::get(EltTy); - else if (Elt >= SrcNumElts) - InElt = GetVectorElement(V2, Elt - SrcNumElts); - else - InElt = GetVectorElement(V1, Elt); - if (InElt == 0) return 0; - } else { - // Unknown value. - return 0; + if (isa(InElt)) { + Result.push_back(UndefValue::get(EltTy)); + continue; } + unsigned Elt = cast(InElt)->getZExtValue(); + if (Elt >= SrcNumElts*2) + InElt = UndefValue::get(EltTy); + else if (Elt >= SrcNumElts) + InElt = V2->getAggregateElement(Elt - SrcNumElts); + else + InElt = V1->getAggregateElement(Elt); + if (InElt == 0) return 0; Result.push_back(InElt); } @@ -890,26 +821,10 @@ if (Idxs.empty()) return Agg; - if (isa(Agg)) // ev(undef, x) -> undef - return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(), - Idxs)); - - if (isa(Agg)) // ev(0, x) -> 0 - return - Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(), - Idxs)); - - // Otherwise recurse. - if (ConstantStruct *CS = dyn_cast(Agg)) - return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]), - Idxs.slice(1)); - - if (ConstantArray *CA = dyn_cast(Agg)) - return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]), - Idxs.slice(1)); - ConstantVector *CV = cast(Agg); - return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]), - Idxs.slice(1)); + if (Constant *C = Agg->getAggregateElement(Idxs[0])) + return ConstantFoldExtractValueInstruction(C, Idxs.slice(1)); + + return 0; } Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, @@ -919,84 +834,30 @@ if (Idxs.empty()) return Val; - if (isa(Agg)) { - // Insertion of constant into aggregate undef - // Optimize away insertion of undef. - if (isa(Val)) - return Agg; - - // Otherwise break the aggregate undef into multiple undefs and do - // the insertion. - CompositeType *AggTy = cast(Agg->getType()); - unsigned numOps; - if (ArrayType *AR = dyn_cast(AggTy)) - numOps = AR->getNumElements(); - else - numOps = cast(AggTy)->getNumElements(); - - std::vector Ops(numOps); - for (unsigned i = 0; i < numOps; ++i) { - Type *MemberTy = AggTy->getTypeAtIndex(i); - Constant *Op = - (Idxs[0] == i) ? - ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy), - Val, Idxs.slice(1)) : - UndefValue::get(MemberTy); - Ops[i] = Op; - } - - if (StructType* ST = dyn_cast(AggTy)) - return ConstantStruct::get(ST, Ops); - return ConstantArray::get(cast(AggTy), Ops); - } + unsigned NumElts; + if (StructType *ST = dyn_cast(Agg->getType())) + NumElts = ST->getNumElements(); + else if (ArrayType *AT = dyn_cast(Agg->getType())) + NumElts = AT->getNumElements(); + else + NumElts = AT->getVectorNumElements(); - if (isa(Agg)) { - // Insertion of constant into aggregate zero - // Optimize away insertion of zero. - if (Val->isNullValue()) - return Agg; - - // Otherwise break the aggregate zero into multiple zeros and do - // the insertion. - CompositeType *AggTy = cast(Agg->getType()); - unsigned numOps; - if (ArrayType *AR = dyn_cast(AggTy)) - numOps = AR->getNumElements(); - else - numOps = cast(AggTy)->getNumElements(); + SmallVector Result; + for (unsigned i = 0; i != NumElts; ++i) { + Constant *C = Agg->getAggregateElement(i); + if (C == 0) return 0; - std::vector Ops(numOps); - for (unsigned i = 0; i < numOps; ++i) { - Type *MemberTy = AggTy->getTypeAtIndex(i); - Constant *Op = - (Idxs[0] == i) ? - ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy), - Val, Idxs.slice(1)) : - Constant::getNullValue(MemberTy); - Ops[i] = Op; - } + if (Idxs[0] == i) + C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1)); - if (StructType *ST = dyn_cast(AggTy)) - return ConstantStruct::get(ST, Ops); - return ConstantArray::get(cast(AggTy), Ops); + Result.push_back(C); } - if (isa(Agg) || isa(Agg)) { - // Insertion of constant into aggregate constant. - std::vector Ops(Agg->getNumOperands()); - for (unsigned i = 0; i < Agg->getNumOperands(); ++i) { - Constant *Op = cast(Agg->getOperand(i)); - if (Idxs[0] == i) - Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1)); - Ops[i] = Op; - } - - if (StructType* ST = dyn_cast(Agg->getType())) - return ConstantStruct::get(ST, Ops); - return ConstantArray::get(cast(Agg->getType()), Ops); - } - - return 0; + if (StructType *ST = dyn_cast(Agg->getType())) + return ConstantStruct::get(ST, Result); + if (ArrayType *AT = dyn_cast(Agg->getType())) + return ConstantArray::get(AT, Result); + return ConstantVector::get(Result); } @@ -1174,7 +1035,6 @@ // At this point we know neither constant is an UndefValue. if (ConstantInt *CI1 = dyn_cast(C1)) { if (ConstantInt *CI2 = dyn_cast(C2)) { - using namespace APIntOps; const APInt &C1V = CI1->getValue(); const APInt &C2V = CI2->getValue(); switch (Opcode) { @@ -1271,145 +1131,18 @@ } } } else if (VectorType *VTy = dyn_cast(C1->getType())) { - ConstantVector *CP1 = dyn_cast(C1); - ConstantVector *CP2 = dyn_cast(C2); - if ((CP1 != NULL || isa(C1)) && - (CP2 != NULL || isa(C2))) { - std::vector Res; - Type* EltTy = VTy->getElementType(); - Constant *C1 = 0; - Constant *C2 = 0; - switch (Opcode) { - default: - break; - case Instruction::Add: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getAdd(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::FAdd: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getFAdd(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::Sub: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getSub(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::FSub: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getFSub(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::Mul: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getMul(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::FMul: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getFMul(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::UDiv: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getUDiv(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::SDiv: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getSDiv(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::FDiv: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getFDiv(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::URem: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getURem(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::SRem: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getSRem(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::FRem: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getFRem(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::And: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getAnd(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::Or: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getOr(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::Xor: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getXor(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::LShr: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getLShr(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::AShr: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getAShr(C1, C2)); - } - return ConstantVector::get(Res); - case Instruction::Shl: - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy); - C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy); - Res.push_back(ConstantExpr::getShl(C1, C2)); - } - return ConstantVector::get(Res); - } + // Perform elementwise folding. + SmallVector Result; + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *LHS = C1->getAggregateElement(i); + Constant *RHS = C2->getAggregateElement(i); + if (LHS == 0 || RHS == 0) break; + + Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); } + + if (Result.size() == VTy->getNumElements()) + return ConstantVector::get(Result); } if (ConstantExpr *CE1 = dyn_cast(C1)) { From echristo at apple.com Wed Jan 25 20:33:53 2012 From: echristo at apple.com (Eric Christopher) Date: Thu, 26 Jan 2012 02:33:53 -0000 Subject: [llvm-commits] [debuginfo-tests] r149026 - /debuginfo-tests/trunk/sret.cpp Message-ID: <20120126023353.BF8032A6C12C@llvm.org> Author: echristo Date: Wed Jan 25 20:33:53 2012 New Revision: 149026 URL: http://llvm.org/viewvc/llvm-project?rev=149026&view=rev Log: Reverse the order that these are expected for clang change. Should be no functional change (the offsets in AT_data_member_location are still correct). If the gdb testsuite itself needs changing then I'll revert this and the clang patch. Modified: debuginfo-tests/trunk/sret.cpp Modified: debuginfo-tests/trunk/sret.cpp URL: http://llvm.org/viewvc/llvm-project/debuginfo-tests/trunk/sret.cpp?rev=149026&r1=149025&r2=149026&view=diff ============================================================================== --- debuginfo-tests/trunk/sret.cpp (original) +++ debuginfo-tests/trunk/sret.cpp Wed Jan 25 20:33:53 2012 @@ -6,8 +6,8 @@ // DEBUGGER: r // DEBUGGER: p a // CHECK: $1 = (A &) -// CHECK: _vptr$A = // CHECK: m_int = 12 +// CHECK: _vptr$A = class A { From sabre at nondot.org Wed Jan 25 20:51:13 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 02:51:13 -0000 Subject: [llvm-commits] [llvm] r149028 - in /llvm/trunk: include/llvm/Constant.h include/llvm/Instructions.h lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/VMCore/ConstantFold.cpp lib/VMCore/Constants.cpp lib/VMCore/Instructions.cpp Message-ID: <20120126025113.F0EEF2A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 20:51:13 2012 New Revision: 149028 URL: http://llvm.org/viewvc/llvm-project?rev=149028&view=rev Log: eliminate the Constant::getVectorElements method. There are better (and more robust) ways to do what it was doing now. Also, add static methods for decoding a ShuffleVector mask. Modified: llvm/trunk/include/llvm/Constant.h llvm/trunk/include/llvm/Instructions.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/VMCore/ConstantFold.cpp llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/Instructions.cpp Modified: llvm/trunk/include/llvm/Constant.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constant.h?rev=149028&r1=149027&r2=149028&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constant.h (original) +++ llvm/trunk/include/llvm/Constant.h Wed Jan 25 20:51:13 2012 @@ -91,12 +91,6 @@ /// FIXME: This really should not be in VMCore. PossibleRelocationsTy getRelocationInfo() const; - /// getVectorElements - This method, which is only valid on constant of vector - /// type, returns the elements of the vector in the specified smallvector. - /// This handles breaking down a vector undef into undef elements, etc. For - /// constant exprs and other cases we can't handle, we return an empty vector. - void getVectorElements(SmallVectorImpl &Elts) const; - /// getAggregateElement - For aggregates (struct/array/vector) return the /// constant that corresponds to the specified element if possible, or null if /// not. This can return null if the element index is a ConstantExpr, or if Modified: llvm/trunk/include/llvm/Instructions.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Instructions.h?rev=149028&r1=149027&r2=149028&view=diff ============================================================================== --- llvm/trunk/include/llvm/Instructions.h (original) +++ llvm/trunk/include/llvm/Instructions.h Wed Jan 25 20:51:13 2012 @@ -1677,11 +1677,19 @@ /// getMaskValue - Return the index from the shuffle mask for the specified /// output result. This is either -1 if the element is undef or a number less /// than 2*numelements. - int getMaskValue(unsigned i) const; + static int getMaskValue(Constant *Mask, unsigned i); + + int getMaskValue(unsigned i) const { + return getMaskValue(getMask(), i); + } /// getShuffleMask - Return the full mask for this instruction, where each /// element is the element number and undef's are returned as -1. - void getShuffleMask(SmallVectorImpl &Mask) const; + static void getShuffleMask(Constant *Mask, SmallVectorImpl &Result); + + void getShuffleMask(SmallVectorImpl &Result) const { + return getShuffleMask(getMask(), Result); + } SmallVector getShuffleMask() const { SmallVector Mask; Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=149028&r1=149027&r2=149028&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Jan 25 20:51:13 2012 @@ -2806,22 +2806,13 @@ } void SelectionDAGBuilder::visitShuffleVector(const User &I) { - SmallVector Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - // Convert the ConstantVector mask operand into an array of ints, with -1 - // representing undef values. - SmallVector MaskElts; - cast(I.getOperand(2))->getVectorElements(MaskElts); - unsigned MaskNumElts = MaskElts.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (isa(MaskElts[i])) - Mask.push_back(-1); - else - Mask.push_back(cast(MaskElts[i])->getSExtValue()); - } - + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); + unsigned MaskNumElts = Mask.size(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=149028&r1=149027&r2=149028&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantFold.cpp (original) +++ llvm/trunk/lib/VMCore/ConstantFold.cpp Wed Jan 25 20:51:13 2012 @@ -1697,20 +1697,20 @@ R==APFloat::cmpEqual); } } else if (C1->getType()->isVectorTy()) { - SmallVector C1Elts, C2Elts; - C1->getVectorElements(C1Elts); - C2->getVectorElements(C2Elts); - if (C1Elts.empty() || C2Elts.empty()) - return 0; - // If we can constant fold the comparison of each element, constant fold // the whole vector comparison. SmallVector ResElts; // Compare the elements, producing an i1 result or constant expr. - for (unsigned i = 0, e = C1Elts.size(); i != e; ++i) - ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i])); - - return ConstantVector::get(ResElts); + for (unsigned i = 0, e = C1->getType()->getVectorNumElements(); i != e;++i){ + Constant *C1E = C1->getAggregateElement(i); + Constant *C2E = C2->getAggregateElement(i); + if (C1E == 0 || C2E == 0) break; + + ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E)); + } + + if (ResElts.size() == C1->getType()->getVectorNumElements()) + return ConstantVector::get(ResElts); } if (C1->getType()->isFloatingPointTy()) { Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=149028&r1=149027&r2=149028&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Wed Jan 25 20:51:13 2012 @@ -312,36 +312,6 @@ return Result; } - -/// getVectorElements - This method, which is only valid on constant of vector -/// type, returns the elements of the vector in the specified smallvector. -/// This handles breaking down a vector undef into undef elements, etc. For -/// constant exprs and other cases we can't handle, we return an empty vector. -void Constant::getVectorElements(SmallVectorImpl &Elts) const { - assert(getType()->isVectorTy() && "Not a vector constant!"); - - if (const ConstantVector *CV = dyn_cast(this)) { - for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) - Elts.push_back(CV->getOperand(i)); - return; - } - - VectorType *VT = cast(getType()); - if (isa(this)) { - Elts.assign(VT->getNumElements(), - Constant::getNullValue(VT->getElementType())); - return; - } - - if (isa(this)) { - Elts.assign(VT->getNumElements(), UndefValue::get(VT->getElementType())); - return; - } - - // Unknown type, must be constant expr etc. -} - - /// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove /// it. This involves recursively eliminating any dead users of the /// constantexpr. Modified: llvm/trunk/lib/VMCore/Instructions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instructions.cpp?rev=149028&r1=149027&r2=149028&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Instructions.cpp (original) +++ llvm/trunk/lib/VMCore/Instructions.cpp Wed Jan 25 20:51:13 2012 @@ -1625,11 +1625,11 @@ /// getMaskValue - Return the index from the shuffle mask for the specified /// output result. This is either -1 if the element is undef or a number less /// than 2*numelements. -int ShuffleVectorInst::getMaskValue(unsigned i) const { - assert(i < getType()->getNumElements() && "Index out of range"); - if (ConstantDataSequential *CDS =dyn_cast(getMask())) +int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) { + assert(i < Mask->getType()->getVectorNumElements() && "Index out of range"); + if (ConstantDataSequential *CDS =dyn_cast(Mask)) return CDS->getElementAsInteger(i); - Constant *C = getMask()->getAggregateElement(i); + Constant *C = Mask->getAggregateElement(i); if (isa(C)) return -1; return cast(C)->getZExtValue(); @@ -1637,15 +1637,15 @@ /// getShuffleMask - Return the full mask for this instruction, where each /// element is the element number and undef's are returned as -1. -void ShuffleVectorInst::getShuffleMask(SmallVectorImpl &Result) const { - unsigned NumElts = getType()->getNumElements(); +void ShuffleVectorInst::getShuffleMask(Constant *Mask, + SmallVectorImpl &Result) { + unsigned NumElts = Mask->getType()->getVectorNumElements(); - if (ConstantDataSequential *CDS=dyn_cast(getMask())) { + if (ConstantDataSequential *CDS=dyn_cast(Mask)) { for (unsigned i = 0; i != NumElts; ++i) Result.push_back(CDS->getElementAsInteger(i)); return; } - Constant *Mask = getMask(); for (unsigned i = 0; i != NumElts; ++i) { Constant *C = Mask->getAggregateElement(i); Result.push_back(isa(C) ? -1 : From sabre at nondot.org Wed Jan 25 20:54:54 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 02:54:54 -0000 Subject: [llvm-commits] [llvm] r149029 - /llvm/trunk/lib/VMCore/ConstantFold.cpp Message-ID: <20120126025454.B99222A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 20:54:54 2012 New Revision: 149029 URL: http://llvm.org/viewvc/llvm-project?rev=149029&view=rev Log: simplify by using ShuffleVectorInst::getMaskValue. Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=149029&r1=149028&r2=149029&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantFold.cpp (original) +++ llvm/trunk/lib/VMCore/ConstantFold.cpp Wed Jan 25 20:54:54 2012 @@ -787,24 +787,22 @@ // Undefined shuffle mask -> undefined value. if (isa(Mask)) return UndefValue::get(V1->getType()); - unsigned MaskNumElts = cast(Mask->getType())->getNumElements(); - unsigned SrcNumElts = cast(V1->getType())->getNumElements(); - Type *EltTy = cast(V1->getType())->getElementType(); + unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); + unsigned SrcNumElts = V1->getType()->getVectorNumElements(); + Type *EltTy = V1->getType()->getVectorElementType(); // Loop over the shuffle mask, evaluating each element. SmallVector Result; for (unsigned i = 0; i != MaskNumElts; ++i) { - Constant *InElt = Mask->getAggregateElement(i); - if (InElt == 0) return 0; - - if (isa(InElt)) { + int Elt = ShuffleVectorInst::getMaskValue(Mask, i); + if (Elt == -1) { Result.push_back(UndefValue::get(EltTy)); continue; } - unsigned Elt = cast(InElt)->getZExtValue(); - if (Elt >= SrcNumElts*2) + Constant *InElt; + if (unsigned(Elt) >= SrcNumElts*2) InElt = UndefValue::get(EltTy); - else if (Elt >= SrcNumElts) + else if (unsigned(Elt) >= SrcNumElts) InElt = V2->getAggregateElement(Elt - SrcNumElts); else InElt = V1->getAggregateElement(Elt); From sabre at nondot.org Wed Jan 25 21:10:45 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 03:10:45 -0000 Subject: [llvm-commits] [llvm] r149033 - /llvm/trunk/lib/VMCore/ConstantFold.cpp Message-ID: <20120126031045.BF55A2A6C12C@llvm.org> Author: lattner Date: Wed Jan 25 21:10:45 2012 New Revision: 149033 URL: http://llvm.org/viewvc/llvm-project?rev=149033&view=rev Log: unbreak test/Bitcode/shuffle.ll. Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=149033&r1=149032&r2=149033&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantFold.cpp (original) +++ llvm/trunk/lib/VMCore/ConstantFold.cpp Wed Jan 25 21:10:45 2012 @@ -787,6 +787,9 @@ // Undefined shuffle mask -> undefined value. if (isa(Mask)) return UndefValue::get(V1->getType()); + // Don't break the bitcode reader hack. + if (isa(Mask)) return 0; + unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); unsigned SrcNumElts = V1->getType()->getVectorNumElements(); Type *EltTy = V1->getType()->getVectorElementType(); From zinob at codeaurora.org Wed Jan 25 22:02:07 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Wed, 25 Jan 2012 20:02:07 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set In-Reply-To: <226102FF-900A-4896-870C-F46C00B82BD4@2pi.dk> References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> <000c01ccda5b$76b208c0$64161a40$@org> <38F583CE-3516-421A-84C2-46978621E648@apple.com> <901B7A01-6E81-4807-A78F-2922C100117D@2pi.dk> <001c01ccdacd$befdd9c0$3cf98d40$@org> <226102FF-900A-4896-870C-F46C00B82BD4@2pi.dk> Message-ID: <000c01ccdbdf$45cacb90$d16062b0$@org> >> The way to look at it is the other way: >> 1. If the offset of load/store is too large then don't bother assigning R0-7 >> 2. If both operands of ADD are not kill then don't bother assigning R0-7 >> 3. If immediate of ADD is too large don't bother assigning R0-7 >> . >> The goal is to eliminate as much as possible candidates that compete for R0-R7 so that the RA does a better assignment of R0-R7 (which ultimately increases 16-bits encoding). >> Returning 2 fails to do this. You may as well return 0 instead of 2 J. >Does this negative bias mean that VirtReg.bytes is 0 for most virtual registers? How > many get VirtReg.bytes > 0? Of course it varies and depends on the compiled code. I have seen that VirtReg.bytes =0 (or close) while this VirtReg occurs frequently in the function (Which in this case their weight is high). At the same I have seen Candidate that have a very high VirtReg.byte and (they were getting a costPerUse register before this heuristic). Some EEMBC benchmarks shrank with > 5% and are example of this. >As I am reading your changes to the eviction policy, you are completely replacing >spill weights with a code size metric for live ranges with Virteg.bytes > 0. Is that >the intention? It depends why the eviction is invoked. Currently there are three reasons for invoking eviction: enabling coalescing, preventing spill/split, preventing a costPerUse register. Note all these evections where already put in place before my heuristic. 1) Both for coalescing or for preventing split/spill: VirtReg.bytes=0 and the heuristic is ignored and only the pair is considered. Whatever were put in place is still managing these type of evictions. 2) This heuristic is ON only when a candidate gets a register that has a CostPerUse. In this case, When the RA attempts to trade it for a register with no cost, Now with this heuristic it has a metric to evaluate whether there is a trade worth evicting for. -Zino From zinob at codeaurora.org Wed Jan 25 22:14:15 2012 From: zinob at codeaurora.org (Zino Benaissa) Date: Wed, 25 Jan 2012 20:14:15 -0800 Subject: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set References: <000001ccda35$239f0e10$6add2a30$@org> <0E37D7B5-BBF8-4D29-9679-5C4D22B32AEB@2pi.dk> <000c01ccda5b$76b208c0$64161a40$@org> <38F583CE-3516-421A-84C2-46978621E648@apple.com> <901B7A01-6E81-4807-A78F-2922C100117D@2pi.dk> <001c01ccdacd$befdd9c0$3cf98d40$@org> <226102FF-900A-4896-870C-F46C00B82BD4@2pi.dk> Message-ID: <000d01ccdbe0$f78c5c30$e6a51490$@org> Couple more thing, As mentioned Both runtime and code size are improved. One peep that got triggered more is: cmp r0, 0 beq L is peeped to: cbz r0 (compared and branch) while r8-r13 stay as is: cmp r8, 0 beq L Last but not least, thank you for in-depth review of this work. Cheers, -Zino -----Original Message----- From: Zino Benaissa [mailto:zinob at codeaurora.org] Sent: Wednesday, January 25, 2012 8:02 PM To: 'Jakob Stoklund Olesen' Cc: 'rajav at codeaurora.org'; 'llvm-commits at cs.uiuc.edu' Subject: RE: [llvm-commits] Tuning LLVM Greedy Register Allocator to optimize for code size when targeting ARM Thumb 2 instruction set >> The way to look at it is the other way: >> 1. If the offset of load/store is too large then don't bother assigning R0-7 >> 2. If both operands of ADD are not kill then don't bother assigning R0-7 >> 3. If immediate of ADD is too large don't bother assigning R0-7 >> . >> The goal is to eliminate as much as possible candidates that compete for R0-R7 so that the RA does a better assignment of R0-R7 (which ultimately increases 16-bits encoding). >> Returning 2 fails to do this. You may as well return 0 instead of 2 J. >Does this negative bias mean that VirtReg.bytes is 0 for most virtual registers? How > many get VirtReg.bytes > 0? Of course it varies and depends on the compiled code. I have seen that VirtReg.bytes =0 (or close) while this VirtReg occurs frequently in the function (Which in this case their weight is high). At the same I have seen Candidate that have a very high VirtReg.byte and (they were getting a costPerUse register before this heuristic). Some EEMBC benchmarks shrank with > 5% and are example of this. >As I am reading your changes to the eviction policy, you are completely replacing >spill weights with a code size metric for live ranges with Virteg.bytes > 0. Is that >the intention? It depends why the eviction is invoked. Currently there are three reasons for invoking eviction: enabling coalescing, preventing spill/split, preventing a costPerUse register. Note all these evections where already put in place before my heuristic. 1) Both for coalescing or for preventing split/spill: VirtReg.bytes=0 and the heuristic is ignored and only the pair is considered. Whatever were put in place is still managing these type of evictions. 2) This heuristic is ON only when a candidate gets a register that has a CostPerUse. In this case, When the RA attempts to trade it for a register with no cost, Now with this heuristic it has a metric to evaluate whether there is a trade worth evicting for. -Zino From geek4civic at gmail.com Wed Jan 25 22:44:48 2012 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Thu, 26 Jan 2012 13:44:48 +0900 Subject: [llvm-commits] [llvm] r141446 - in /llvm/trunk: lib/Target/ARM/ARMInstrFormats.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.h t Message-ID: Jim, excuse me to comment to an old commit. ;) 2011/10/8 Jim Grosbach : > Author: grosbach > Date: Fri Oct ?7 18:56:00 2011 > New Revision: 141446 > > URL: http://llvm.org/viewvc/llvm-project?rev=141446&view=rev > Log: > ARM NEON assembly parsing and encoding for VDUP(scalar). > > Modified: > ? ?llvm/trunk/lib/Target/ARM/ARMInstrFormats.td > ? ?llvm/trunk/lib/Target/ARM/ARMInstrNEON.td > ? ?llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp > ? ?llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp > ? ?llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h > ? ?llvm/trunk/test/MC/ARM/neont2-dup-encoding.s > ? ?llvm/trunk/utils/TableGen/EDEmitter.cpp > --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) > +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Fri Oct ?7 18:56:00 2011 > @@ -1815,6 +1903,37 @@ > ? ? Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(), > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ExclaimTok.getLoc())); > ? ? Parser.Lex(); // Eat exclaim token > + ? ?return false; > + ?} > + > + ?// Also check for an index operand. This is only legal for vector registers, > + ?// but that'll get caught OK in operand matching, so we don't need to > + ?// explicitly filter everything else out here. > + ?if (Parser.getTok().is(AsmToken::LBrac)) { > + ? ?SMLoc SIdx = Parser.getTok().getLoc(); > + ? ?Parser.Lex(); // Eat left bracket token. > + > + ? ?const MCExpr *ImmVal; > + ? ?SMLoc ExprLoc = Parser.getTok().getLoc(); > + ? ?if (getParser().ParseExpression(ImmVal)) > + ? ? ?return MatchOperand_ParseFail; > + ? ?const MCConstantExpr *MCE = dyn_cast(ImmVal); > + ? ?if (!MCE) { > + ? ? ?TokError("immediate value expected for vector index"); > + ? ? ?return MatchOperand_ParseFail; > + ? ?} > + > + ? ?SMLoc E = Parser.getTok().getLoc(); > + ? ?if (Parser.getTok().isNot(AsmToken::RBrac)) { > + ? ? ?Error(E, "']' expected"); > + ? ? ?return MatchOperand_ParseFail; > + ? ?} > + > + ? ?Parser.Lex(); // Eat right bracket token. > + > + ? ?Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SIdx, E, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? getContext())); > ? } > > ? return false; I think "MatchOperand_ParseFail" would be incompatible against "bool". I have found ms cl.exe warns about it. ...Takumi From echristo at apple.com Thu Jan 26 01:01:33 2012 From: echristo at apple.com (Eric Christopher) Date: Thu, 26 Jan 2012 07:01:33 -0000 Subject: [llvm-commits] [debuginfo-tests] r149050 - /debuginfo-tests/trunk/sret.cpp Message-ID: <20120126070133.C638E2A6C12D@llvm.org> Author: echristo Date: Thu Jan 26 01:01:33 2012 New Revision: 149050 URL: http://llvm.org/viewvc/llvm-project?rev=149050&view=rev Log: Revert previous patch as the corresponding clang patch was reverted. gdb's tests are too dependent upon order in debug info. Modified: debuginfo-tests/trunk/sret.cpp Modified: debuginfo-tests/trunk/sret.cpp URL: http://llvm.org/viewvc/llvm-project/debuginfo-tests/trunk/sret.cpp?rev=149050&r1=149049&r2=149050&view=diff ============================================================================== --- debuginfo-tests/trunk/sret.cpp (original) +++ debuginfo-tests/trunk/sret.cpp Thu Jan 26 01:01:33 2012 @@ -6,8 +6,8 @@ // DEBUGGER: r // DEBUGGER: p a // CHECK: $1 = (A &) -// CHECK: m_int = 12 // CHECK: _vptr$A = +// CHECK: m_int = 12 class A { From craig.topper at gmail.com Thu Jan 26 01:18:04 2012 From: craig.topper at gmail.com (Craig Topper) Date: Thu, 26 Jan 2012 07:18:04 -0000 Subject: [llvm-commits] [llvm] r149052 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86InstrSSE.td Message-ID: <20120126071804.675742A6C12C@llvm.org> Author: ctopper Date: Thu Jan 26 01:18:03 2012 New Revision: 149052 URL: http://llvm.org/viewvc/llvm-project?rev=149052&view=rev Log: Remove some more patterns by custom lowering intrinsics to target specific nodes. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=149052&r1=149051&r2=149052&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 26 01:18:03 2012 @@ -9358,6 +9358,15 @@ case Intrinsic::x86_avx2_psign_d: return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_insertps: + return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx_vperm2f128_ps_256: + case Intrinsic::x86_avx_vperm2f128_pd_256: + case Intrinsic::x86_avx_vperm2f128_si_256: + case Intrinsic::x86_avx2_vperm2i128: + return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=149052&r1=149051&r2=149052&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Jan 26 01:18:03 2012 @@ -5860,13 +5860,6 @@ defm INSERTPS : SS41I_insertf32<0x21, "insertps">; } -def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, - Requires<[HasAVX]>; -def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, - Requires<[HasSSE41]>; - //===----------------------------------------------------------------------===// // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// @@ -7179,19 +7172,8 @@ } let Predicates = [HasAVX] in { -def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; - -def : Pat<(int_x86_avx_vperm2f128_ps_256 - VR256:$src1, (memopv8f32 addr:$src2), imm:$src3), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vperm2f128_pd_256 - VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; @@ -7398,19 +7380,17 @@ //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // +let neverHasSideEffects = 1 in { def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>, - VEX_4V; + []>, VEX_4V; +let mayLoad = 1 in def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2), - imm:$src3))]>, - VEX_4V; + []>, VEX_4V; +} let Predicates = [HasAVX2] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), From craig.topper at gmail.com Thu Jan 26 01:31:30 2012 From: craig.topper at gmail.com (Craig Topper) Date: Thu, 26 Jan 2012 07:31:30 -0000 Subject: [llvm-commits] [llvm] r149053 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td Message-ID: <20120126073130.B913D2A6C12C@llvm.org> Author: ctopper Date: Thu Jan 26 01:31:30 2012 New Revision: 149053 URL: http://llvm.org/viewvc/llvm-project?rev=149053&view=rev Log: Fix AVX vs SSE patterns ordering issue for VPCMPESTRM and VPCMPISTRM. Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=149053&r1=149052&r2=149053&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Jan 26 01:31:30 2012 @@ -6583,8 +6583,9 @@ } let Defs = [EFLAGS], usesCustomInserter = 1 in { + let AddedComplexity = 1 in + defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; - defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { @@ -6620,8 +6621,9 @@ } let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + let AddedComplexity = 1 in + defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; - defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; } let Predicates = [HasAVX], From craig.topper at gmail.com Thu Jan 26 01:51:56 2012 From: craig.topper at gmail.com (Craig Topper) Date: Thu, 26 Jan 2012 07:51:56 -0000 Subject: [llvm-commits] [llvm] r149054 - /llvm/trunk/lib/Target/X86/X86InstrXOP.td Message-ID: <20120126075156.37E9B2A6C12C@llvm.org> Author: ctopper Date: Thu Jan 26 01:51:55 2012 New Revision: 149054 URL: http://llvm.org/viewvc/llvm-project?rev=149054&view=rev Log: Add HasXOP predicate check covering a bunch of XOP intrinsic patterns. Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=149054&r1=149053&r2=149054&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Thu Jan 26 01:51:55 2012 @@ -286,6 +286,7 @@ // XOP Intrinsics patterns +let Predicates = [HasXOP] in { // VPCOM EQ def : Pat<(int_x86_xop_vpcomeqw VR128:$src1, VR128:$src2), (VPCOMWri VR128:$src1, VR128:$src2, (i8 4))>; @@ -827,4 +828,5 @@ (bitconvert (memopv2i64 addr:$src2)), VR256:$src3), (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>; +} // let Predicates = [HasXOP] From victor.umansky at intel.com Thu Jan 26 02:51:40 2012 From: victor.umansky at intel.com (Victor Umansky) Date: Thu, 26 Jan 2012 08:51:40 -0000 Subject: [llvm-commits] [llvm] r149056 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-cvt.ll test/CodeGen/X86/avx-fp2int.ll Message-ID: <20120126085140.539142A6C12C@llvm.org> Author: vumansky Date: Thu Jan 26 02:51:39 2012 New Revision: 149056 URL: http://llvm.org/viewvc/llvm-project?rev=149056&view=rev Log: Fix for the following bug in AVX codegen for double-to-int conversions: . "fptosi" and "fptoui" IR instructions are defined with round-to-zero rounding mode. . Currently for AVX mode for <4xdouble> and <8xdouble> the "VCVTPD2DQ.128" and "VCVTPD2DQ.256" instructions are selected (for .fp_to_sint. DAG node operation ) by AVX codegen. However they use round-to-nearest-even rounding mode. . Consequently, the conversion produces incorrect numbers. The fix is to replace selection of VCVTPD2DQ instructions with VCVTTPD2DQ instructions. The latter use truncate (i.e. round-to-zero) rounding mode. As .fp_to_sint. DAG node operation is used only for lowering of "fptosi" and "fptoui" IR instructions, the fix in X86InstrSSE.td definition file doesn.t have an impact on other LLVM flows. The patch includes changes in the .td file, LIT test for the changes and a fix in a legacy LIT test (which produced asm code conflicting with LLVN IR spec). Added: llvm/trunk/test/CodeGen/X86/avx-fp2int.ll (with props) Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/test/CodeGen/X86/avx-cvt.ll Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=149056&r1=149055&r2=149056&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Jan 26 02:51:39 2012 @@ -4693,9 +4693,9 @@ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), - (VCVTPD2DQYrr VR256:$src)>; + (VCVTTPD2DQYrr VR256:$src)>; def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), - (VCVTPD2DQYrm addr:$src)>; + (VCVTTPD2DQYrm addr:$src)>; // Convert Packed DW Integers to Packed Double FP let Predicates = [HasAVX] in { Modified: llvm/trunk/test/CodeGen/X86/avx-cvt.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-cvt.ll?rev=149056&r1=149055&r2=149056&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-cvt.ll (original) +++ llvm/trunk/test/CodeGen/X86/avx-cvt.ll Thu Jan 26 02:51:39 2012 @@ -18,7 +18,7 @@ ret <4 x double> %b } -; CHECK: vcvtpd2dqy %ymm +; CHECK: vcvttpd2dqy %ymm define <4 x i32> @fptosi01(<4 x double> %a) { %b = fptosi <4 x double> %a to <4 x i32> ret <4 x i32> %b Added: llvm/trunk/test/CodeGen/X86/avx-fp2int.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-fp2int.ll?rev=149056&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-fp2int.ll (added) +++ llvm/trunk/test/CodeGen/X86/avx-fp2int.ll Thu Jan 26 02:51:39 2012 @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate + +; CHECK: test1: +; CHECK: vcvttpd2dqy +; CHECK: ret +; CHECK: test2: +; CHECK: vcvttpd2dqy +; CHECK: ret + +define <4 x i8> @test1(<4 x double> %d) { + %c = fptoui <4 x double> %d to <4 x i8> + ret <4 x i8> %c +} +define <4 x i8> @test2(<4 x double> %d) { + %c = fptosi <4 x double> %d to <4 x i8> + ret <4 x i8> %c +} Propchange: llvm/trunk/test/CodeGen/X86/avx-fp2int.ll ------------------------------------------------------------------------------ svn:executable = * From eli.bendersky at intel.com Thu Jan 26 03:09:59 2012 From: eli.bendersky at intel.com (Bendersky, Eli) Date: Thu, 26 Jan 2012 09:09:59 +0000 Subject: [llvm-commits] [llvm] r148653 - in /llvm/trunk: include/llvm/Object/ObjectFile.h include/llvm/Support/Endian.h lib/Object/ELFObjectFile.cpp In-Reply-To: <972604FC-7661-4379-98CC-310F83C4A943@apple.com> References: <20120122090104.4CD0B2A6C12C@llvm.org> <14B0BD87-2FF4-417D-B932-3F18E7E939B4@apple.com> <9BBE4537D1BAAB479E9E8F9D4234619D322E28@HASMSX103.ger.corp.intel.com> <972604FC-7661-4379-98CC-310F83C4A943@apple.com> Message-ID: <9BBE4537D1BAAB479E9E8F9D4234619D323D00@HASMSX103.ger.corp.intel.com> > > In light of the above, which use(s) of std::vector would you recommend > replacing with SmallVector? > > > > Thanks in advance, > > Eli > > > > > > > > Hi Eli, > > That's pretty similar to what the MachO side of things is doing. > SmallVector is nearly equivalent to std::vector, so even just using it to > optimize for a set of common cases w/ not too many objects in it can be very > handy, especially when the contained objects are small like they are here. > > I wouldn't go so far to say that it's a requirement to use SmallVector<> here > or anything like that, but it would be my personal choice barring seeing actual > performance comparisons demonstrating otherwise. > Hi Jim, We plan to replace this code pretty soon with a more sophisticated memory manager, so the vector of addresses will go away in its current form. In the new implementation we're going to submit, we'll be aiming to use SmallVector instead of vector wherever it's reasonable, and feel free to comment on that in the patch review. Does that sound acceptable? Eli --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. From nicholas at mxc.ca Thu Jan 26 03:27:07 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Thu, 26 Jan 2012 01:27:07 -0800 Subject: [llvm-commits] [llvm] r148941 - in /llvm/trunk: lib/Analysis/InlineCost.cpp test/Transforms/Inline/alloca-bonus.ll In-Reply-To: References: <20120125082740.C7BB02A6C12C@llvm.org> <4F2050C7.1030009@mxc.ca> Message-ID: <4F211C6B.2080709@mxc.ca> Eli Friedman wrote: > On Wed, Jan 25, 2012 at 10:58 AM, Nick Lewycky wrote: >> Eli Friedman wrote: >>> >>> On Wed, Jan 25, 2012 at 12:27 AM, Nick Lewycky wrote: >>>> >>>> Author: nicholas >>>> Date: Wed Jan 25 02:27:40 2012 >>>> New Revision: 148941 >>>> >>>> URL: http://llvm.org/viewvc/llvm-project?rev=148941&view=rev >>>> Log: >>>> Support pointer comparisons against constants, when looking at the >>>> inline-cost >>>> savings from a pointer argument becoming an alloca. Sometimes callees >>>> will even >>>> compare a pointer to null and then branch to an otherwise unreachable >>>> block! >>>> Detect these cases and compute the number of saved instructions, instead >>>> of >>>> bailing out and reporting no savings. >>> >>> >>> [Comments inlined.] >>> >>>> + } else if (ICmpInst *ICI = dyn_cast(I)) { >>>> + if (!isa(ICI->getOperand(1))) >>>> + return 0; >>>> + ICmpInsts.push_back(ICI); >>> >>> >>> You probably want to restrict this to equality comparisons; I don't >>> think we fold relational comparisons between an alloca and a constant >>> in general. >> >> >> We do. This fits in with the rule that you aren't allowed to guess an >> alloca. > > Is that rule documented somewhere? Nope. More generally, while we document the semantics of the 'alloca' instruction and the 'icmp' instruction, we don't document secondary rules like this which fall out of our as-if interpretation of those semantics. Cue Dan Gohman. ;) Nick From james.molloy at arm.com Thu Jan 26 03:25:43 2012 From: james.molloy at arm.com (James Molloy) Date: Thu, 26 Jan 2012 09:25:43 -0000 Subject: [llvm-commits] [llvm] r149057 - in /llvm/trunk: include/llvm/CodeGen/AsmPrinter.h include/llvm/MC/MCExpr.h lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/MC/MCExpr.cpp lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMAsmPrinter.h lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp test/MC/ARM/cxx-global-constructor.ll Message-ID: <20120126092543.8F0282A6C12C@llvm.org> Author: jamesm Date: Thu Jan 26 03:25:43 2012 New Revision: 149057 URL: http://llvm.org/viewvc/llvm-project?rev=149057&view=rev Log: Add support for the R_ARM_TARGET1 relocation, which should be given to relocations applied to all C++ constructors and destructors. This enables the linker to match concrete relocation types (absolute or relative) with whatever library or C++ support code is being linked against. Added: llvm/trunk/test/MC/ARM/cxx-global-constructor.ll (with props) Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h llvm/trunk/include/llvm/MC/MCExpr.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/MC/MCExpr.cpp llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/AsmPrinter.h?rev=149057&r1=149056&r2=149057&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h (original) +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h Thu Jan 26 03:25:43 2012 @@ -268,6 +268,12 @@ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); + /// EmitXXStructor - Targets can override this to change how global constants + /// that are part of a C++ static/global constructor list are emitted. + virtual void EmitXXStructor(const Constant *CV) { + EmitGlobalConstant(CV); + } + /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. Modified: llvm/trunk/include/llvm/MC/MCExpr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCExpr.h?rev=149057&r1=149056&r2=149057&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCExpr.h (original) +++ llvm/trunk/include/llvm/MC/MCExpr.h Thu Jan 26 03:25:43 2012 @@ -169,6 +169,7 @@ VK_ARM_GOTOFF, VK_ARM_TPOFF, VK_ARM_GOTTPOFF, + VK_ARM_TARGET1, VK_PPC_TOC, VK_PPC_DARWIN_HA16, // ha16(symbol) Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=149057&r1=149056&r2=149057&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Thu Jan 26 03:25:43 2012 @@ -1296,7 +1296,7 @@ OutStreamer.SwitchSection(OutputSection); if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) EmitAlignment(Align); - EmitGlobalConstant(Structors[i].second); + EmitXXStructor(Structors[i].second); } } Modified: llvm/trunk/lib/MC/MCExpr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCExpr.cpp?rev=149057&r1=149056&r2=149057&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCExpr.cpp (original) +++ llvm/trunk/lib/MC/MCExpr.cpp Thu Jan 26 03:25:43 2012 @@ -58,7 +58,8 @@ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT || SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF || SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF) + SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1) OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); else if (SRE.getKind() != MCSymbolRefExpr::VK_None && SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 && @@ -193,6 +194,7 @@ case VK_ARM_TPOFF: return "(tpoff)"; case VK_ARM_GOTTPOFF: return "(gottpoff)"; case VK_ARM_TLSGD: return "(tlsgd)"; + case VK_ARM_TARGET1: return "(target1)"; case VK_PPC_TOC: return "toc"; case VK_PPC_DARWIN_HA16: return "ha16"; case VK_PPC_DARWIN_LO16: return "lo16"; Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=149057&r1=149056&r2=149057&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Thu Jan 26 03:25:43 2012 @@ -299,6 +299,22 @@ OutStreamer.EmitLabel(CurrentFnSym); } +void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { + uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + assert(Size && "C++ constructor pointer had zero size!"); + + const GlobalValue *GV = dyn_cast(CV); + assert(GV && "C++ constructor pointer was not a GlobalValue!"); + + const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV), + (Subtarget->isTargetDarwin() + ? MCSymbolRefExpr::VK_None + : MCSymbolRefExpr::VK_ARM_TARGET1), + OutContext); + + OutStreamer.EmitValue(E, Size); +} + /// runOnMachineFunction - This uses the EmitInstruction() /// method to print assembly for each instruction. /// Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h?rev=149057&r1=149056&r2=149057&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h (original) +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h Thu Jan 26 03:25:43 2012 @@ -73,6 +73,7 @@ virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + void EmitXXStructor(const Constant *CV); // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); Modified: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp?rev=149057&r1=149056&r2=149057&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp (original) +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp Thu Jan 26 03:25:43 2012 @@ -236,7 +236,10 @@ case MCSymbolRefExpr::VK_ARM_GOTOFF: Type = ELF::R_ARM_GOTOFF32; break; - } + case MCSymbolRefExpr::VK_ARM_TARGET1: + Type = ELF::R_ARM_TARGET1; + break; + } break; case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: Added: llvm/trunk/test/MC/ARM/cxx-global-constructor.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/cxx-global-constructor.ll?rev=149057&view=auto ============================================================================== --- llvm/trunk/test/MC/ARM/cxx-global-constructor.ll (added) +++ llvm/trunk/test/MC/ARM/cxx-global-constructor.ll Thu Jan 26 03:25:43 2012 @@ -0,0 +1,12 @@ +; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic \ +; RUN: -filetype=obj -o - | elf-dump --dump-section-data | FileCheck %s + + + at llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }] + +define void @f() { + ret void +} + +; Check for a relocation of type R_ARM_TARGET1. +; CHECK: ('r_type', 0x26) Propchange: llvm/trunk/test/MC/ARM/cxx-global-constructor.ll ------------------------------------------------------------------------------ svn:eol-style = native From geek4civic at gmail.com Thu Jan 26 03:32:38 2012 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Thu, 26 Jan 2012 18:32:38 +0900 Subject: [llvm-commits] [PATCH][asan] Small formatting Message-ID: --- .../Instrumentation/AddressSanitizer.cpp | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) G->replaceAllUsesWith( ConstantExpr::getGetElementPtr(NewGlobal, Indices2, 2)); What is "2"? AFAIK 3rd argument of ConstantExpr::getGetElementPtr() assumes bool. ms cl.exe found it. ...Takumi -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-2-true.patch.txt Type: text/x-patch Size: 633 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/fa058c61/attachment.bin From ggreif at gmail.com Thu Jan 26 04:28:58 2012 From: ggreif at gmail.com (Gabor Greif) Date: Thu, 26 Jan 2012 10:28:58 -0000 Subject: [llvm-commits] [llvm] r149060 - /llvm/trunk/autoconf/configure.ac Message-ID: <20120126102858.411CC2A6C12C@llvm.org> Author: ggreif Date: Thu Jan 26 04:28:58 2012 New Revision: 149060 URL: http://llvm.org/viewvc/llvm-project?rev=149060&view=rev Log: comment tweaks Modified: llvm/trunk/autoconf/configure.ac Modified: llvm/trunk/autoconf/configure.ac URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=149060&r1=149059&r2=149060&view=diff ============================================================================== --- llvm/trunk/autoconf/configure.ac (original) +++ llvm/trunk/autoconf/configure.ac Thu Jan 26 04:28:58 2012 @@ -373,7 +373,7 @@ AC_MSG_WARN([Configuring LLVM for an unknown target archicture]) fi -# Determine the LLVM native architecture for the target +dnl Determine the LLVM native architecture for the target case "$llvm_cv_target_arch" in x86) LLVM_NATIVE_ARCH="X86" ;; x86_64) LLVM_NATIVE_ARCH="X86" ;; @@ -386,7 +386,7 @@ dnl Check for the endianness of the target AC_C_BIGENDIAN(AC_SUBST([ENDIAN],[big]),AC_SUBST([ENDIAN],[little])) -dnl Check for build platform executable suffix if we're crosscompiling +dnl Check for build platform executable suffix if we're cross-compiling if test "$cross_compiling" = yes; then AC_SUBST(LLVM_CROSS_COMPILING, [1]) AC_BUILD_EXEEXT @@ -670,8 +670,8 @@ esac AC_SUBST(TARGETS_TO_BUILD,$TARGETS_TO_BUILD) -# Determine whether we are building LLVM support for the native architecture. -# If so, define LLVM_NATIVE_ARCH to that LLVM target. +dnl Determine whether we are building LLVM support for the native architecture. +dnl If so, define LLVM_NATIVE_ARCH to that LLVM target. for a_target in $TARGETS_TO_BUILD; do if test "$a_target" = "$LLVM_NATIVE_ARCH"; then AC_DEFINE_UNQUOTED(LLVM_NATIVE_ARCH, $LLVM_NATIVE_ARCH, @@ -698,8 +698,8 @@ fi done -# Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual -# target feature def files. +dnl Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual +dnl target feature def files. LLVM_ENUM_TARGETS="" LLVM_ENUM_ASM_PRINTERS="" LLVM_ENUM_ASM_PARSERS="" @@ -1435,9 +1435,9 @@ dnl checks found to the Makefiles so we can use it there too AC_SUBST(SHLIBPATH_VAR,$libltdl_cv_shlibpath_var) -# Translate the various configuration directories and other basic -# information into substitutions that will end up in Makefile.config.in -# that these configured values can be used by the makefiles +dnl Translate the various configuration directories and other basic +dnl information into substitutions that will end up in Makefile.config.in +dnl that these configured values can be used by the makefiles if test "${prefix}" = "NONE" ; then prefix="/usr/local" fi @@ -1462,8 +1462,8 @@ AC_SUBST(LLVM_MANDIR) AC_SUBST(LLVM_CONFIGTIME) -# Place the various directores into the config.h file as #defines so that we -# can know about the installation paths within LLVM. +dnl Place the various directories into the config.h file as #defines so that we +dnl can know about the installation paths within LLVM. AC_DEFINE_UNQUOTED(LLVM_PREFIX,"$LLVM_PREFIX", [Installation prefix directory]) AC_DEFINE_UNQUOTED(LLVM_BINDIR, "$LLVM_BINDIR", @@ -1487,7 +1487,7 @@ AC_DEFINE_UNQUOTED(LLVM_DEFAULT_TARGET_TRIPLE, "$target", [Target triple LLVM will generate code for by default]) -# Determine which bindings to build. +dnl Determine which bindings to build. if test "$BINDINGS_TO_BUILD" = auto ; then BINDINGS_TO_BUILD="" if test "x$OCAMLC" != x -a "x$OCAMLDEP" != x ; then @@ -1496,11 +1496,11 @@ fi AC_SUBST(BINDINGS_TO_BUILD,$BINDINGS_TO_BUILD) -# This isn't really configurey, but it avoids having to repeat the list in -# other files. +dnl This isn't really configurey, but it avoids having to repeat the list in +dnl other files. AC_SUBST(ALL_BINDINGS,ocaml) -# Do any work necessary to ensure that bindings have what they need. +dnl Do any work necessary to ensure that bindings have what they need. binding_prereqs_failed=0 for a_binding in $BINDINGS_TO_BUILD ; do case "$a_binding" in From 6yearold at gmail.com Thu Jan 26 06:36:23 2012 From: 6yearold at gmail.com (arrowdodger) Date: Thu, 26 Jan 2012 15:36:23 +0300 Subject: [llvm-commits] [PATCH][CMake] PR10050. In-Reply-To: References: Message-ID: On Thu, Jan 19, 2012 at 8:06 PM, arrowdodger <6yearold at gmail.com> wrote: > On Thu, Jan 12, 2012 at 6:28 PM, arrowdodger <6yearold at gmail.com> wrote: > >> On Sun, Jan 8, 2012 at 3:36 PM, arrowdodger <6yearold at gmail.com> wrote: >> >>> On Mon, Jan 2, 2012 at 9:36 PM, Chandler Carruth wrote: >>> >>>> Thanks, but there are some real problems with how this is structured. >>>> The first two are easy to fix, but the last may not be. >>>> >>>> 1) The macros are strangely formed in a few ways. First, please avoid >>>> magical arrays filled in with the results of your macro. They're very hard >>>> to understand. I'd much rather that each macro produces a new target which >>>> other things can then depend on, or populates an array the name of which is >>>> passed into the macro. Also, why make the user write a foreach loop around >>>> the files? It seems like you could have them provide a list of files rather >>>> than a single file, and do the looping inside the macro. Finally, please >>>> allow the user to specify the actual filename rather than the basename of >>>> the podfile. Compute the basename yourself, as this will make both the use >>>> of the macro easier to understand and it's implementation more clear. >>>> >>> >>> Does it looks better now? >>> >>> >>>> 2) Please use something more distinct such as 'llvm-docs-*' as the >>>> custom target names; 'llvm-ps' looks too much like it could be one of the >>>> actual LLVM tools. >>>> >>> >>> Done. >>> >>> >>>> 3) I don't understand why you can't use normal CMake dependencies to >>>> connect the install rules to the actual build steps. It seems like you >>>> could use install(FILES ${HTML_FILES} ...) or some equivalent construct. >>>> CMake should be handling the dependency computations and ensuring that >>>> those files are build for 'make install'. >>>> >>> >>> Yeah, it should. But it does not. I've even asked on StackOverflow about >>> this. >>> >>> >>>> If CMake cannot do this naturally with its own dependency information, >>>> then I fear it may not be worth adding this to the CMake builds, and they >>>> will be less functional than the autotools builds. I think re-executing the >>>> make tool is an unacceptable design wart. >>>> >>> >>> Why do you think so? Isn't that hack is what CMake should actually do - >>> just call `make ` during `make install`? If CMake fix it, this hack >>> can be removed by altering only 3 lines in AddLLVM.cmake. >>> >>> I also have trouble believing CMake can't represent this pattern, but I >>>> haven't tried my hand at it... >>>> >>> >>> If you find cleaner way to solve this, i would be happy to redo the >>> patch. >>> >> >> Bump. >> > > Another bump. > Chandler, you aren't interested in this? Should i stop spamming maillist? -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/0142fb4a/attachment-0001.html -------------- next part -------------- A non-text attachment was scrubbed... Name: clang.cmake.docs.patch Type: text/x-patch Size: 4063 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/0142fb4a/attachment-0002.bin -------------- next part -------------- A non-text attachment was scrubbed... Name: llvm.cmake.docs.patch Type: text/x-patch Size: 8874 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/0142fb4a/attachment-0003.bin From grosbach at apple.com Thu Jan 26 09:32:51 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 07:32:51 -0800 Subject: [llvm-commits] [llvm] r148653 - in /llvm/trunk: include/llvm/Object/ObjectFile.h include/llvm/Support/Endian.h lib/Object/ELFObjectFile.cpp In-Reply-To: <9BBE4537D1BAAB479E9E8F9D4234619D323D00@HASMSX103.ger.corp.intel.com> References: <20120122090104.4CD0B2A6C12C@llvm.org> <14B0BD87-2FF4-417D-B932-3F18E7E939B4@apple.com> <9BBE4537D1BAAB479E9E8F9D4234619D322E28@HASMSX103.ger.corp.intel.com> <972604FC-7661-4379-98CC-310F83C4A943@apple.com> <9BBE4537D1BAAB479E9E8F9D4234619D323D00@HASMSX103.ger.corp.intel.com> Message-ID: <9B04BF69-9AC9-4738-8E1E-D94A2D9DD074@apple.com> On Jan 26, 2012, at 1:09 AM, Bendersky, Eli wrote: >>> In light of the above, which use(s) of std::vector would you recommend >> replacing with SmallVector? >>> >>> Thanks in advance, >>> Eli >>> >>> >>> >> >> Hi Eli, >> >> That's pretty similar to what the MachO side of things is doing. >> SmallVector is nearly equivalent to std::vector, so even just using it to >> optimize for a set of common cases w/ not too many objects in it can be very >> handy, especially when the contained objects are small like they are here. >> >> I wouldn't go so far to say that it's a requirement to use SmallVector<> here >> or anything like that, but it would be my personal choice barring seeing actual >> performance comparisons demonstrating otherwise. >> > > Hi Jim, > > We plan to replace this code pretty soon with a more sophisticated memory manager, so the vector of addresses will go away in its current form. In the new implementation we're going to submit, we'll be aiming to use SmallVector instead of vector wherever it's reasonable, and feel free to comment on that in the patch review. > Does that sound acceptable? Sure, that sounds great. I look forward to seeing the new implementation. -Jim > Eli > > --------------------------------------------------------------------- > Intel Israel (74) Limited > > This e-mail and any attachments may contain confidential material for > the sole use of the intended recipient(s). Any review or distribution > by others is strictly prohibited. If you are not the intended > recipient, please contact the sender and delete all copies. > From curdeius at gmail.com Thu Jan 26 09:49:35 2012 From: curdeius at gmail.com (Marcus Curdeius) Date: Thu, 26 Jan 2012 16:49:35 +0100 Subject: [llvm-commits] [PATCH] Some MSVC2010 /W4 level warnings removed In-Reply-To: References: Message-ID: I've created a patch to remove some annoying warnings. Rationale: when you have a bunch of unimportant warnings, you might miss a one which is important. Tested only on Win7 MSVC2010 x64. All tests passed. Cheers, Marek -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/762b9778/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: llvm.warnings-removed.patch.zip Type: application/zip Size: 4256 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/762b9778/attachment.zip From grosbach at apple.com Thu Jan 26 09:56:45 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 15:56:45 -0000 Subject: [llvm-commits] [llvm] r149062 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Message-ID: <20120126155645.6388B2A6C12C@llvm.org> Author: grosbach Date: Thu Jan 26 09:56:45 2012 New Revision: 149062 URL: http://llvm.org/viewvc/llvm-project?rev=149062&view=rev Log: Tidy up. Fix mismatched return types for error handling. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=149062&r1=149061&r2=149062&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Thu Jan 26 09:56:45 2012 @@ -2552,16 +2552,12 @@ if (getParser().ParseExpression(ImmVal)) return MatchOperand_ParseFail; const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return MatchOperand_ParseFail; - } + if (!MCE) + return TokError("immediate value expected for vector index"); SMLoc E = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); Parser.Lex(); // Eat right bracket token. From grosbach at apple.com Thu Jan 26 10:00:53 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 08:00:53 -0800 Subject: [llvm-commits] [llvm] r141446 - in /llvm/trunk: lib/Target/ARM/ARMInstrFormats.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp lib/Target/ARM/InstPrinter/ARMInstPrinter.h t In-Reply-To: References: Message-ID: On Jan 25, 2012, at 8:44 PM, NAKAMURA Takumi wrote: > Jim, excuse me to comment to an old commit. ;) > No problem at all. > 2011/10/8 Jim Grosbach : >> Author: grosbach >> Date: Fri Oct 7 18:56:00 2011 >> New Revision: 141446 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=141446&view=rev >> Log: >> ARM NEON assembly parsing and encoding for VDUP(scalar). >> >> Modified: >> llvm/trunk/lib/Target/ARM/ARMInstrFormats.td >> llvm/trunk/lib/Target/ARM/ARMInstrNEON.td >> llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp >> llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp >> llvm/trunk/lib/Target/ARM/InstPrinter/ARMInstPrinter.h >> llvm/trunk/test/MC/ARM/neont2-dup-encoding.s >> llvm/trunk/utils/TableGen/EDEmitter.cpp > >> --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) >> +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Fri Oct 7 18:56:00 2011 >> @@ -1815,6 +1903,37 @@ >> Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(), >> ExclaimTok.getLoc())); >> Parser.Lex(); // Eat exclaim token >> + return false; >> + } >> + >> + // Also check for an index operand. This is only legal for vector registers, >> + // but that'll get caught OK in operand matching, so we don't need to >> + // explicitly filter everything else out here. >> + if (Parser.getTok().is(AsmToken::LBrac)) { >> + SMLoc SIdx = Parser.getTok().getLoc(); >> + Parser.Lex(); // Eat left bracket token. >> + >> + const MCExpr *ImmVal; >> + SMLoc ExprLoc = Parser.getTok().getLoc(); >> + if (getParser().ParseExpression(ImmVal)) >> + return MatchOperand_ParseFail; >> + const MCConstantExpr *MCE = dyn_cast(ImmVal); >> + if (!MCE) { >> + TokError("immediate value expected for vector index"); >> + return MatchOperand_ParseFail; >> + } >> + >> + SMLoc E = Parser.getTok().getLoc(); >> + if (Parser.getTok().isNot(AsmToken::RBrac)) { >> + Error(E, "']' expected"); >> + return MatchOperand_ParseFail; >> + } >> + >> + Parser.Lex(); // Eat right bracket token. >> + >> + Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), >> + SIdx, E, >> + getContext())); >> } >> >> return false; > > I think "MatchOperand_ParseFail" would be incompatible against "bool". > I have found ms cl.exe warns about it. > Ack! Absolutely right. Probably a copy/paste error. Fixed in r149062. Thanks for the review! > ...Takumi From glider at google.com Thu Jan 26 11:01:20 2012 From: glider at google.com (Alexander Potapenko) Date: Thu, 26 Jan 2012 17:01:20 -0000 Subject: [llvm-commits] [compiler-rt] r149063 - in /compiler-rt/trunk/lib/asan: asan_mac.cc asan_procmaps.h Message-ID: <20120126170124.325F12A6C12C@llvm.org> Author: glider Date: Thu Jan 26 11:01:20 2012 New Revision: 149063 URL: http://llvm.org/viewvc/llvm-project?rev=149063&view=rev Log: Fix a bug in AsanProcMaps on Mac: on 64 bits the program was trying to read twice as many segment load commands as the binary actually contained. Modified: compiler-rt/trunk/lib/asan/asan_mac.cc compiler-rt/trunk/lib/asan/asan_procmaps.h Modified: compiler-rt/trunk/lib/asan/asan_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.cc?rev=149063&r1=149062&r2=149063&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_mac.cc Thu Jan 26 11:01:20 2012 @@ -177,6 +177,7 @@ current_image_ = _dyld_image_count(); current_load_cmd_count_ = -1; current_load_cmd_addr_ = NULL; + current_magic_ = 0; } // Next and NextSegmentLoad were inspired by base/sysinfo.cc in @@ -202,6 +203,8 @@ real_strncpy(filename, _dyld_get_image_name(current_image_), filename_size); } + if (FLAG_v >= 4) + Report("LC_SEGMENT: %p--%p %s+%p\n", *start, *end, filename, *offset); return true; } return false; @@ -216,7 +219,8 @@ if (current_load_cmd_count_ < 0) { // Set up for this image; current_load_cmd_count_ = hdr->ncmds; - switch (hdr->magic) { + current_magic_ = hdr->magic; + switch (current_magic_) { #ifdef MH_MAGIC_64 case MH_MAGIC_64: { current_load_cmd_addr_ = (char*)hdr + sizeof(mach_header_64); @@ -233,18 +237,24 @@ } } - // We start with the next load command (we've already looked at this one). - for (current_load_cmd_count_--; - current_load_cmd_count_ >= 0; - current_load_cmd_count_--) { + for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) { + switch (current_magic_) { + // current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64. #ifdef MH_MAGIC_64 - if (NextSegmentLoad( - start, end, offset, filename, filename_size)) - return true; + case MH_MAGIC_64: { + if (NextSegmentLoad( + start, end, offset, filename, filename_size)) + return true; + break; + } #endif - if (NextSegmentLoad( - start, end, offset, filename, filename_size)) - return true; + case MH_MAGIC: { + if (NextSegmentLoad( + start, end, offset, filename, filename_size)) + return true; + break; + } + } } // If we get here, no more load_cmd's in this image talk about // segments. Go on to the next image. Modified: compiler-rt/trunk/lib/asan/asan_procmaps.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_procmaps.h?rev=149063&r1=149062&r2=149063&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_procmaps.h (original) +++ compiler-rt/trunk/lib/asan/asan_procmaps.h Thu Jan 26 11:01:20 2012 @@ -64,6 +64,7 @@ char *current_; #elif defined __APPLE__ int current_image_; + uint32_t current_magic_; int current_load_cmd_count_; char *current_load_cmd_addr_; #endif From glider at google.com Thu Jan 26 11:06:50 2012 From: glider at google.com (Alexander Potapenko) Date: Thu, 26 Jan 2012 17:06:50 -0000 Subject: [llvm-commits] [compiler-rt] r149064 - /compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py Message-ID: <20120126170650.DB3622A6C12C@llvm.org> Author: glider Date: Thu Jan 26 11:06:50 2012 New Revision: 149064 URL: http://llvm.org/viewvc/llvm-project?rev=149064&view=rev Log: More accurate atos execution which depends on the file type (EXECUTE, DYLIB) of the binary. More Linux-like output on Mac (to match more output tests). Modified: compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py Modified: compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py?rev=149064&r1=149063&r2=149064&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py (original) +++ compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py Thu Jan 26 11:06:50 2012 @@ -14,6 +14,8 @@ import subprocess pipes = {} +filetypes = {} +DEBUG=False def patch_address(frameno, addr_s): ''' Subtracts 1 or 2 from the top frame's address. @@ -30,6 +32,15 @@ return hex(addr) return addr_s + +def fix_filename(file_name): + for path_to_cut in sys.argv[1:]: + file_name = re.sub(".*" + path_to_cut, "", file_name) + file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) + file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) + return file_name + + # TODO(glider): need some refactoring here def symbolize_addr2line(line): #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) @@ -50,15 +61,25 @@ except: function_name = "" file_name = "" - for path_to_cut in sys.argv[1:]: - file_name = re.sub(".*" + path_to_cut, "", file_name) - file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) - file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) + file_name = fix_filename(file_name) print match.group(1), "in", function_name, file_name else: print line.rstrip() + +def get_macho_filetype(binary): + if not filetypes.has_key(binary): + otool_pipe = subprocess.Popen(["otool", "-Vh", binary], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + otool_line = "".join(otool_pipe.stdout.readlines()) + for t in ["DYLIB", "EXECUTE"]: + if t in otool_line: + filetypes[binary] = t + otool_pipe.stdin.close() + return filetypes[binary] + + def symbolize_atos(line): #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) @@ -66,32 +87,46 @@ #print line prefix = match.group(1) frameno = match.group(2) - addr = match.group(3) + orig_addr = match.group(3) binary = match.group(4) offset = match.group(5) - addr = patch_address(frameno, addr) - load_addr = int(addr, 16) - int(offset, 16) + addr = patch_address(frameno, orig_addr) + load_addr = hex(int(orig_addr, 16) - int(offset, 16)) + filetype = get_macho_filetype(binary) + if not pipes.has_key(binary): # Guess which arch we're running. 10 = len("0x") + 8 hex digits. if len(addr) > 10: arch = "x86_64" else: arch = "i386" - #print "atos -o %s -arch %s " % (binary, arch) - pipes[binary] = subprocess.Popen(["atos", "-o", binary, "-arch", arch], + + if filetype == "DYLIB": + load_addr = "0x0" + if DEBUG: + print "atos -o %s -arch %s -l %s" % (binary, arch, load_addr) + pipes[binary] = subprocess.Popen(["atos", "-o", binary, "-arch", arch, "-l", load_addr], stdin=subprocess.PIPE, stdout=subprocess.PIPE,) p = pipes[binary] - # TODO(glider): how to tell if the address is absolute? - if ".app/" in binary and not ".framework" in binary: - print >>p.stdin, "%s" % addr + if filetype == "DYLIB": + print >>p.stdin, "%s" % offset else: print >>p.stdin, "%s" % addr # TODO(glider): it's more efficient to make a batch atos run for each binary. p.stdin.close() atos_line = p.stdout.readline().rstrip() + # A well-formed atos response looks like this: + # foo(type1, type2) (in object.name) (filename.cc:80) + match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) + #print "atos_line: ", atos_line + if match: + function_name = match.group(1) + function_name = re.sub("\(.*?\)", "", function_name) + file_name = fix_filename(match.group(3)) + print "%s%s in %s %s" % (prefix, addr, function_name, file_name) + else: + print "%s%s in %s" % (prefix, addr, atos_line) del pipes[binary] - - print "%s%s in %s" % (prefix, addr, atos_line) else: print line.rstrip() From glider at google.com Thu Jan 26 11:40:19 2012 From: glider at google.com (Alexander Potapenko) Date: Thu, 26 Jan 2012 17:40:19 -0000 Subject: [llvm-commits] [compiler-rt] r149068 - in /compiler-rt/trunk/lib/asan/tests: heap-overflow.tmpl.Darwin null_deref.tmpl.Darwin strncpy-overflow.tmpl.Darwin use-after-free.tmpl.Darwin Message-ID: <20120126174019.2FE062A6C12C@llvm.org> Author: glider Date: Thu Jan 26 11:40:18 2012 New Revision: 149068 URL: http://llvm.org/viewvc/llvm-project?rev=149068&view=rev Log: Darwin-specific templates for the output tests. These should fix the output tests on Mac. Added: compiler-rt/trunk/lib/asan/tests/null_deref.tmpl.Darwin compiler-rt/trunk/lib/asan/tests/strncpy-overflow.tmpl.Darwin compiler-rt/trunk/lib/asan/tests/use-after-free.tmpl.Darwin Modified: compiler-rt/trunk/lib/asan/tests/heap-overflow.tmpl.Darwin Modified: compiler-rt/trunk/lib/asan/tests/heap-overflow.tmpl.Darwin URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/heap-overflow.tmpl.Darwin?rev=149068&r1=149067&r2=149068&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/tests/heap-overflow.tmpl.Darwin (original) +++ compiler-rt/trunk/lib/asan/tests/heap-overflow.tmpl.Darwin Thu Jan 26 11:40:18 2012 @@ -5,4 +5,4 @@ #0 0x.* in .*mz_malloc.* _asan_rtl_ #1 0x.* in malloc_zone_malloc.* #2 0x.* in malloc.* - #3 0x.* in main heap-overflow.cc:4 + #3 0x.* in main heap-overflow.cc:[45] Added: compiler-rt/trunk/lib/asan/tests/null_deref.tmpl.Darwin URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/null_deref.tmpl.Darwin?rev=149068&view=auto ============================================================================== --- compiler-rt/trunk/lib/asan/tests/null_deref.tmpl.Darwin (added) +++ compiler-rt/trunk/lib/asan/tests/null_deref.tmpl.Darwin Thu Jan 26 11:40:18 2012 @@ -0,0 +1,5 @@ +.*ERROR: AddressSanitizer crashed on unknown address 0x0*00028 .*pc 0x.* +AddressSanitizer can not provide additional info. ABORTING +# atos cannot resolve the file:line info for frame 0 on the O1 level + #0 0x.* in NullDeref.* + #1 0x.* in main.*null_deref.cc:[67] Added: compiler-rt/trunk/lib/asan/tests/strncpy-overflow.tmpl.Darwin URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/strncpy-overflow.tmpl.Darwin?rev=149068&view=auto ============================================================================== --- compiler-rt/trunk/lib/asan/tests/strncpy-overflow.tmpl.Darwin (added) +++ compiler-rt/trunk/lib/asan/tests/strncpy-overflow.tmpl.Darwin Thu Jan 26 11:40:18 2012 @@ -0,0 +1,9 @@ +WRITE of size 1 at 0x.* thread T0 + #0 0x.* in wrap_strncpy + #1 0x.* in main .*strncpy-overflow.cc:[78] +0x.* is located 0 bytes to the right of 9-byte region +allocated by thread T0 here: + #0 0x.* in .*mz_malloc.* _asan_rtl_ + #1 0x.* in malloc_zone_malloc.* + #2 0x.* in malloc.* + #3 0x.* in main .*strncpy-overflow.cc:6 Added: compiler-rt/trunk/lib/asan/tests/use-after-free.tmpl.Darwin URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/use-after-free.tmpl.Darwin?rev=149068&view=auto ============================================================================== --- compiler-rt/trunk/lib/asan/tests/use-after-free.tmpl.Darwin (added) +++ compiler-rt/trunk/lib/asan/tests/use-after-free.tmpl.Darwin Thu Jan 26 11:40:18 2012 @@ -0,0 +1,14 @@ +.*ERROR: AddressSanitizer heap-use-after-free on address 0x.* at pc 0x.* bp 0x.* sp 0x.* +READ of size 1 at 0x.* thread T0 + #0 0x.* in main .*use-after-free.cc:5 +0x.* is located 5 bytes inside of 10-byte region .0x.*,0x.* +freed by thread T0 here: + #0 0x.* in .*mz_free.* _asan_rtl_ +# We override free() on Darwin, thus no malloc_zone_free + #1 0x.* in free + #2 0x.* in main .*use-after-free.cc:[45] +previously allocated by thread T0 here: + #0 0x.* in .*mz_malloc.* _asan_rtl_ + #1 0x.* in malloc_zone_malloc.* + #2 0x.* in malloc.* + #3 0x.* in main .*use-after-free.cc:3 From stoklund at 2pi.dk Thu Jan 26 11:52:15 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Thu, 26 Jan 2012 17:52:15 -0000 Subject: [llvm-commits] [llvm] r149069 - in /llvm/trunk: include/llvm/CodeGen/MachineInstr.h lib/CodeGen/MachineCopyPropagation.cpp lib/CodeGen/MachineInstr.cpp test/CodeGen/ARM/2012-01-26-CopyPropKills.ll Message-ID: <20120126175215.7E5042A6C12C@llvm.org> Author: stoklund Date: Thu Jan 26 11:52:15 2012 New Revision: 149069 URL: http://llvm.org/viewvc/llvm-project?rev=149069&view=rev Log: Clear kill flags before propagating a copy. The live range of the source register may be extended when a redundant copy is eliminated. Make sure any kill flags between the two copies are cleared. This fixes PR11765. Added: llvm/trunk/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp llvm/trunk/lib/CodeGen/MachineInstr.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=149069&r1=149068&r2=149069&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Thu Jan 26 11:52:15 2012 @@ -773,6 +773,10 @@ const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); + /// clearRegisterKills - Clear all kill flags affecting Reg. If RegInfo is + /// provided, this includes super-register kills. + void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo); + /// addRegisterDead - We have determined MI defined a register without a use. /// Look for the operand that defines it and mark it as IsDead. If /// AddIfNotFound is true, add a implicit operand if it's not found. Returns Modified: llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp?rev=149069&r1=149068&r2=149069&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp Thu Jan 26 11:52:15 2012 @@ -142,7 +142,12 @@ // %RSP = COPY %RAX // CALL // %RAX = COPY %RSP - CopyMI->getOperand(1).setIsKill(false); + + // Clear any kills of Def between CopyMI and MI. This extends the + // live range. + for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) + I->clearRegisterKills(Def, TRI); + MI->eraseFromParent(); Changed = true; ++NumDeletes; Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=149069&r1=149068&r2=149069&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Thu Jan 26 11:52:15 2012 @@ -1707,6 +1707,20 @@ return Found; } +void MachineInstr::clearRegisterKills(unsigned Reg, + const TargetRegisterInfo *RegInfo) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + RegInfo = 0; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned OpReg = MO.getReg(); + if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg))) + MO.setIsKill(false); + } +} + bool MachineInstr::addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { Added: llvm/trunk/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll?rev=149069&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll (added) +++ llvm/trunk/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll Thu Jan 26 11:52:15 2012 @@ -0,0 +1,121 @@ +; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs +; PR11765 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +; This test case exercises the MachineCopyPropagation pass by disabling the +; RegisterCoalescer. + +define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 { +bb: + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %bb + unreachable + +bb2: ; preds = %bb + br i1 undef, label %bb92, label %bb3 + +bb3: ; preds = %bb2 + %tmp = or <4 x i32> undef, undef + %tmp4 = bitcast <4 x i32> %tmp to <4 x float> + %tmp5 = fsub <4 x float> , %tmp4 + %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float> + %tmp7 = fmul <4 x float> %tmp6, + %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64> + %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float> + %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> + %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float> + %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> + %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> + %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64> + %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64> + %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32> + %tmp19 = and <2 x i32> %tmp18, + %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32> + %tmp21 = and <2 x i32> %tmp20, + %tmp22 = or <2 x i32> %tmp19, %tmp21 + %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64> + %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> + %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float> + %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> + %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32> + %tmp28 = and <2 x i32> %tmp27, + %tmp29 = and <2 x i32> undef, + %tmp30 = or <2 x i32> %tmp28, %tmp29 + %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64> + %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3 + %tmp33 = fmul <4 x float> undef, + %tmp34 = fadd <4 x float> %tmp33, %tmp32 + %tmp35 = fmul <4 x float> %tmp33, zeroinitializer + %tmp36 = fadd <4 x float> %tmp35, zeroinitializer + %tmp37 = fadd <4 x float> %tmp35, zeroinitializer + %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64> + %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float> + %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> + %tmp42 = load <4 x float>* null, align 16, !tbaa !0 + %tmp43 = fmul <4 x float> %tmp42, %tmp41 + %tmp44 = load <4 x float>* undef, align 16, !tbaa !0 + %tmp45 = fadd <4 x float> undef, %tmp43 + %tmp46 = fadd <4 x float> undef, %tmp45 + %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64> + %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float> + %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> + %tmp51 = fmul <4 x float> %tmp42, %tmp50 + %tmp52 = fmul <4 x float> %tmp44, undef + %tmp53 = fadd <4 x float> %tmp52, %tmp51 + %tmp54 = fadd <4 x float> undef, %tmp53 + %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64> + %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> + %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float> + %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer + %tmp59 = fmul <4 x float> undef, %tmp58 + %tmp60 = fadd <4 x float> %tmp59, undef + %tmp61 = fadd <4 x float> %tmp60, zeroinitializer + %tmp62 = load void (i8*, i8*)** undef, align 4 + call arm_aapcs_vfpcc void %tmp62(i8* sret undef, i8* undef) nounwind + %tmp63 = bitcast <4 x float> %tmp46 to i128 + %tmp64 = bitcast <4 x float> %tmp54 to i128 + %tmp65 = bitcast <4 x float> %tmp61 to i128 + %tmp66 = lshr i128 %tmp63, 64 + %tmp67 = trunc i128 %tmp66 to i64 + %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1 + %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2 + %tmp70 = lshr i128 %tmp64, 64 + %tmp71 = trunc i128 %tmp70 to i64 + %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3 + %tmp73 = trunc i128 %tmp65 to i64 + %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4 + %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5 + %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6 + %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7 + call arm_aapcs_vfpcc void @bar(i8* sret null, [8 x i64] %tmp77) nounwind + %tmp78 = call arm_aapcs_vfpcc i8* null(i8* null) nounwind + %tmp79 = bitcast i8* %tmp78 to i512* + %tmp80 = load i512* %tmp79, align 16 + %tmp81 = lshr i512 %tmp80, 128 + %tmp82 = trunc i512 %tmp80 to i128 + %tmp83 = trunc i512 %tmp81 to i128 + %tmp84 = bitcast i128 %tmp83 to <4 x float> + %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64> + %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> + %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float> + %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> + %tmp89 = fmul <4 x float> undef, %tmp88 + %tmp90 = fadd <4 x float> %tmp89, undef + %tmp91 = fadd <4 x float> undef, %tmp90 + store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0 + unreachable + +bb92: ; preds = %bb2 + ret void +} + +declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} From mcrosier at apple.com Thu Jan 26 12:24:25 2012 From: mcrosier at apple.com (Chad Rosier) Date: Thu, 26 Jan 2012 18:24:25 -0000 Subject: [llvm-commits] [llvm] r149070 - in /llvm/trunk: lib/CodeGen/MachineBasicBlock.cpp test/CodeGen/ARM/tail-dup.ll Message-ID: <20120126182425.B2E072A6C12C@llvm.org> Author: mcrosier Date: Thu Jan 26 12:24:25 2012 New Revision: 149070 URL: http://llvm.org/viewvc/llvm-project?rev=149070&view=rev Log: Replace the use of isPredicable() with isPredicated() in MachineBasicBlock::canFallThrough(). We're interested in the state of the instruction (i.e., is this a barrier or not?), not if the instruction is predicable or not. rdar://10501092 Added: llvm/trunk/test/CodeGen/ARM/tail-dup.ll Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp?rev=149070&r1=149069&r2=149070&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Thu Jan 26 12:24:25 2012 @@ -535,13 +535,12 @@ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { // If we couldn't analyze the branch, examine the last instruction. // If the block doesn't end in a known control barrier, assume fallthrough - // is possible. The isPredicable check is needed because this code can be + // is possible. The isPredicated check is needed because this code can be // called during IfConversion, where an instruction which is normally a // Barrier is predicated and thus no longer an actual control barrier. This // is over-conservative though, because if an instruction isn't actually // predicated we could still treat it like a barrier. - return empty() || !back().isBarrier() || - back().isPredicable(); + return empty() || !back().isBarrier() || TII->isPredicated(&back()); } // If there is no branch, control always falls through. Added: llvm/trunk/test/CodeGen/ARM/tail-dup.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/tail-dup.ll?rev=149070&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/tail-dup.ll (added) +++ llvm/trunk/test/CodeGen/ARM/tail-dup.ll Thu Jan 26 12:24:25 2012 @@ -0,0 +1,44 @@ +; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s + +; We should be able to tail-duplicate the basic block containing the indirectbr +; into all of its predecessors. +; CHECK: fn: +; CHECK: mov pc +; CHECK: mov pc +; CHECK: mov pc + + at fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4 + +define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp { +entry: + %0 = load i32* %opcodes, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0 + br label %indirectgoto + +INCREMENT: ; preds = %indirectgoto + %inc = add nsw i32 %result.0, 1 + %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1 + br label %indirectgoto + +DECREMENT: ; preds = %indirectgoto + %dec = add nsw i32 %result.0, -1 + %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0 + %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2 + br label %indirectgoto + +indirectgoto: ; preds = %DECREMENT, %INCREMENT, %entry + %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ] + %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ] + %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ] + %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1 + %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4 + indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT] + +RETURN: ; preds = %indirectgoto + ret i32 %result.0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} From dschuff at google.com Thu Jan 26 13:19:29 2012 From: dschuff at google.com (Derek Schuff) Date: Thu, 26 Jan 2012 11:19:29 -0800 Subject: [llvm-commits] Proposal/patch: Enable bitcode streaming In-Reply-To: References: <583C473A-9640-46A7-89FE-63BF0B97E6C6@apple.com> Message-ID: Thanks for the review. comments inline, and updated patches attached. On Wed, Jan 25, 2012 at 2:04 PM, Nick Lewycky wrote: > On 20 January 2012 11:55, Derek Schuff wrote: > >> And finally, the StreamingMemoryObject implementation, modified >> BitcodeReader, and modifed llvm-dis.cpp using the streaming interface. >> Please take a look >> > > Overall this looks good. I'm especially happy with some of the refactoring > inside BitcodeReader! Comments: > > --- a/include/llvm/Bitcode/ReaderWriter.h > +++ b/include/llvm/Bitcode/ReaderWriter.h > @@ -21,31 +21,41 @@ namespace llvm { > class MemoryBuffer; > class ModulePass; > class BitstreamWriter; > + class DataStreamer; > class LLVMContext; > class raw_ostream; > > I realize these were unsorted when you got here, but please alphabetize > them. > Done. > > + /// If 'verify' is true, check that the file fits in the buffer. > + static inline bool SkipBitcodeWrapperHeader(const unsigned char > *&BufPtr, > + const unsigned char > *&BufEnd, > + bool Verify) { > > I didn't really understand the comment. I think what you're doing is > disabling the check that the buffer contained the whole header. Could you > make it "bool SkipPastEnd" instead? > There 2 checks: the first checks that the buffer is large enough to contain the whole header. That always runs. The check that's conditionalized on 'verify' checks that the buffer is large enough to contain the whole bitcode. This doesn't work with the streaming implementation since it does not allocate a buffer that fits the whole file ahead of time. It also always skips past the end of the header if found (since this is how it returns the size of the buffer), so SkipPastEnd would be a bad name for the variable. I clarified the comment and renamed the variable VerifyBufferSize. (Also on this pass I found and deleted some trailing whitespace). > > +DataStreamer *getDataFileStreamer(const std::string &Filename, > + std::string *Err); > > Please line up the argument to the (. > Done. > > --- a/include/llvm/Support/StreamableMemoryObject.h > +++ b/include/llvm/Support/StreamableMemoryObject.h > @@ -12,6 +12,9 @@ > #define STREAMABLEMEMORYOBJECT_H_ > > #include "llvm/Support/MemoryObject.h" > +#include > +#include "llvm/ADT/OwningPtr.h" > +#include "llvm/Support/DataStream.h" > > ADT, Support, then headers. See > http://llvm.org/docs/CodingStandards.html#scf_includes . > > +/// StreamingMemoryObject - interface to data which is actually streamed > from > +/// at DataStreamer. In addition to inherited members, it has the > +/// dropLeadingBytes and setKnownObjectSize methods which are not > applicable > +/// to non-streamed objects > +class StreamingMemoryObject : public StreamableMemoryObject { > > I think that's a full sentence missing a period. It feels *awfully weird* > to have StreamableMemoryObject and StreamingMemoryObject, and both of them > are interfaces. The comment doesn't seem to sufficiently explain what's > going on here. (The DataStreamer can stream from a Streaming but not with a > Streamable? What?) > Yeah, this was kind of a tough naming problem. But there really does need to be these 2 different kinds of interfaces. The one I called StreamableMemoryObject (in which the data may or may not actually be streamed) needs to have extra methods over and above MemoryObject, which are directly due to the streamability: isValidAddress and isObjectEnd are needed because if we don't know the length of the stream ahead of time, then calling getExtent requires waiting until the entire stream is fetched. (getPointer is basically just there to support BLOBs, avoiding extra copies). Then you have RawMemoryObject, a non-streamed StreamableMemoryObject, and StreamingMemoryObject is an interface because there could be different implementations of StreamableMemoryObject (getting data from different sources). I'm open to ideas to simplify the situation. > > + // fetch enough bytes such that Pos can be read or EOF is reached > + // (i.e. BytesRead > Pos). Return true if Pos can be read. > + // Unlike most of the functions in BitcodeReader, returns true on > success. > + bool fetchToPos(size_t Pos) { > > Comment should start with a capital. > > + bool fetchToPos(size_t Pos) { > + if (EOFReached) return Pos < ObjectSize; > + while (Pos >= BytesRead) { > + Bytes.resize(BytesRead + kChunkSize); > + size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped], > + kChunkSize); > > Why is kChunkSize so special? Why not ask for all the bytes up until Pos? > The comment on DataStreamer::GetBytes doesn't give any reason not to ask > for as many bytes as you want? > The common case (actually the only case, currently) will actually be that the requested size is much smaller than the chunk size, and the chunk size just ensures that we batch them together rather than making a lot of potentially expensive requests into the streamer. The 'while' loop is just there instead of an 'if' to cover the corner case of a large request. I updated the comment > > +bool BitcodeReader::SuspendModuleParse() { > + // save our current position > + NextUnreadBit = Stream.GetCurrentBitNo(); > + return false; > +} > > What's up with that returning bool? > Originally it was going to check for error and use the same convention as the rest of the functions, but it ended up being simpler than I expected. I removed it entirely now. > > + // ParseModule will parse the next body in the stream and set its > + // position in the DeferredFunctionInfo map > > Sentence needs period. > Done. > > + unsigned char buf[16]; > + if (Bytes->readBytes(0, 16, buf, NULL) == -1) > + return Error("Bitcode stream must be at least 16 bytes in length"); > + > + if (!isBitcode(buf, buf + 16)) { > + return Error("Invalid bitcode signature"); > + } > > So, uh, braces or no braces around one-line return statements? :-) > LLVM style seems to be no braces, but Google style dies hard :) Fixed. > > +Module *llvm::getStreamedBitcodeModule(const std::string &name, > + DataStreamer *streamer, > + LLVMContext &Context, > + std::string *ErrMsg) { > > These args don't line up. > Done > > --- /dev/null > +++ b/lib/Support/DataStream.cpp > @@ -0,0 +1,96 @@ > +//===--- llvm/Support/DataStream.cpp - Lazy streamed Data -*- C++ -*-===// > > Don't include emacs mode markers (the -*- C++ -*- bit) on .cpp files, only > on .h files. > Done. > > +// Very simple stream backed by a file. Mostly useful for stdin and > debugging; > +// actual file access is probably still best done with mmap > +class DataFileStreamer : public DataStreamer { > + int Fd; > > Sentence seeking full stop. > > +DataStreamer *getDataFileStreamer(const std::string &Filename, > + std::string *StrError) { > > Line up to the ( again. > Done. > > + if (e != success) { > + *StrError = std::string() + "Could not open " + Filename + ": " > + + e.message() + "\n"; > + return NULL; > + } > > Optional: std::string("Could not open ") + Filename, and also putting the > + on the previous line instead of starting a line with the operator. > Done. > > --- a/lib/Support/StreamableMemoryObject.cpp > +++ b/lib/Support/StreamableMemoryObject.cpp > > In this file you added some spurious blank lines. Please don't do that. > I think i got them all. Also fixed some argument alignment. > > +bool StreamingMemoryObject::isObjectEnd(uint64_t address) { > + if (ObjectSize) return address == ObjectSize; > + fetchToPos(address); > + return address == BytesRead; > +} > > Shouldn't that end with "return address == ObjectSize"? If the file is > larger than 'address' bytes, won't this read up to address bytes, then > stop, leaving BytesRead == address? ObjectSize on the other hand isn't set > until EOF is reached. > Yes, it should. although BytesRead is in practice unlikely to actually == address due to the chunk-fetching behavior. No doubt that's why this slipped through all the testing. Fixed (and covered the case where address == 0; it's never the end because 0 is an invalid stream size) > > +int StreamingMemoryObject::readBytes(uint64_t address, > + uint64_t size, > + uint8_t* buf, > + uint64_t* copied) { > > Misaligned. > Fixed. > > + //StreamableMemoryObject > + > > Please remove. > Done. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/0b212b02/attachment-0001.html -------------- next part -------------- A non-text attachment was scrubbed... Name: BitstreamCursor_refactor2.diff Type: text/x-patch Size: 5372 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/0b212b02/attachment-0003.bin -------------- next part -------------- A non-text attachment was scrubbed... Name: StreamableMemoryObject.diff Type: text/x-patch Size: 24088 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/0b212b02/attachment-0004.bin -------------- next part -------------- A non-text attachment was scrubbed... Name: BitcodeReader_streaming2.diff Type: text/x-patch Size: 35945 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/0b212b02/attachment-0005.bin From grosser at fim.uni-passau.de Thu Jan 26 13:53:01 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Thu, 26 Jan 2012 19:53:01 -0000 Subject: [llvm-commits] [polly] r149073 - /polly/trunk/lib/CodeGeneration.cpp Message-ID: <20120126195301.71D6E2A6C12C@llvm.org> Author: grosser Date: Thu Jan 26 13:53:01 2012 New Revision: 149073 URL: http://llvm.org/viewvc/llvm-project?rev=149073&view=rev Log: Remove unneeded default case This silences a clang warning. Modified: polly/trunk/lib/CodeGeneration.cpp Modified: polly/trunk/lib/CodeGeneration.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGeneration.cpp?rev=149073&r1=149072&r2=149073&view=diff ============================================================================== --- polly/trunk/lib/CodeGeneration.cpp (original) +++ polly/trunk/lib/CodeGeneration.cpp Thu Jan 26 13:53:01 2012 @@ -858,8 +858,6 @@ case clast_red_sum: old = Builder.CreateAdd(old, exprValue); break; - default: - llvm_unreachable("Clast unknown reduction type"); } } From mcrosier at apple.com Thu Jan 26 14:19:05 2012 From: mcrosier at apple.com (Chad Rosier) Date: Thu, 26 Jan 2012 20:19:05 -0000 Subject: [llvm-commits] [llvm] r149075 - /llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Message-ID: <20120126201905.ACF0E2A6C12C@llvm.org> Author: mcrosier Date: Thu Jan 26 14:19:05 2012 New Revision: 149075 URL: http://llvm.org/viewvc/llvm-project?rev=149075&view=rev Log: Update comment for r149070. Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp?rev=149075&r1=149074&r2=149075&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Thu Jan 26 14:19:05 2012 @@ -537,9 +537,7 @@ // If the block doesn't end in a known control barrier, assume fallthrough // is possible. The isPredicated check is needed because this code can be // called during IfConversion, where an instruction which is normally a - // Barrier is predicated and thus no longer an actual control barrier. This - // is over-conservative though, because if an instruction isn't actually - // predicated we could still treat it like a barrier. + // Barrier is predicated and thus no longer an actual control barrier. return empty() || !back().isBarrier() || TII->isPredicated(&back()); } From clattner at apple.com Thu Jan 26 14:31:19 2012 From: clattner at apple.com (Chris Lattner) Date: Thu, 26 Jan 2012 12:31:19 -0800 Subject: [llvm-commits] fix egrep options for Darwin in utils/llvmgrep In-Reply-To: References: Message-ID: <42F11728-26F5-47C0-B460-F7E1781D3ED3@apple.com> On Jan 24, 2012, at 6:01 PM, Liang Wang wrote: > Hi, > > Currently, llvmgrep shows only filename since it uses -l as egrep > option on Darwin. It should use -H as for Linux. > The following patch fixes the problem. OK for trunk? Sure, please commit. -Chris > > Thanks, > Liang. > > > Index: utils/llvmgrep > =================================================================== > --- utils/llvmgrep (revision 148897) > +++ utils/llvmgrep (working copy) > @@ -29,7 +29,7 @@ > cd $TOPDIR > case `uname -s` in > SunOS) grep_cmd="ggrep -H -n" ;; > - Linux) grep_cmd="egrep -H -n" ;; > + Linux|Darwin) grep_cmd="egrep -H -n" ;; > *) grep_cmd="egrep -l -n" ;; > esac > ./utils/llvmdo -topdir "$TOPDIR" \ > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From spop at codeaurora.org Thu Jan 26 14:34:33 2012 From: spop at codeaurora.org (Sebastian Pop) Date: Thu, 26 Jan 2012 14:34:33 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327452092.2489.29.camel@sapling> References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> Message-ID: On Tue, Jan 24, 2012 at 6:41 PM, Hal Finkel wrote: >> enabling vectorization gets the performance down by 80% on ARM. >> I will prepare a reduced testcase and try to find out the reason. >> As a first shot, I would say that this comes from the vectorization of >> code in a loop and the overhead of transfer between scalar and >> vector registers. > > This is good; as has been pointed out, we'll need to develop a > vectorization cost model for this kind of thing to really be successful, > and so we should start thinking about that. > > The pass, as implemented, has an semi-implicit cost model which says > that permutations followed by another vector operation are free, scalar > -> vector transfers are free, and vectorizing a memory operation is just > as good as vectorizing an arithmetic operation. Depending on the system, > these may all be untrue (although on some systems they are true). > > If you can generate a test case that would be great, I'd like to look at > it. Here is the testcase with calls to gettimeofday to measure time spent in the kernel and not in the ini/fini phases. On ARM I saw around 5 to 6x slowdown in the vector version. I haven't tried this on x86 yet but that should also produce slowdowns as the cost between scalar and vector regs is non null there as well. Sebastian -- Qualcomm Innovation Center, Inc is a member of Code Aurora Forum -------------- next part -------------- A non-text attachment was scrubbed... Name: test.c Type: text/x-csrc Size: 890 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/08013f8f/attachment.bin From sabre at nondot.org Thu Jan 26 14:37:12 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 20:37:12 -0000 Subject: [llvm-commits] [llvm] r149076 - /llvm/trunk/lib/VMCore/Constants.cpp Message-ID: <20120126203712.10CB02A6C12C@llvm.org> Author: lattner Date: Thu Jan 26 14:37:11 2012 New Revision: 149076 URL: http://llvm.org/viewvc/llvm-project?rev=149076&view=rev Log: Reduce a lot of code duplication by implementing ConstantExpr::getWithOperandReplaced and ConstantExpr::replaceUsesOfWithOnConstant in terms of ConstantExpr::getWithOperands. While we're at it, make sure that ConstantExpr::getWithOperands covers all instructions: it was missing insert/extractvalue. Modified: llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=149076&r1=149075&r2=149076&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Thu Jan 26 14:37:11 2012 @@ -904,66 +904,16 @@ /// one, but with the specified operand set to the specified value. Constant * ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { - assert(OpNo < getNumOperands() && "Operand num is out of range!"); assert(Op->getType() == getOperand(OpNo)->getType() && "Replacing operand with value of different type!"); if (getOperand(OpNo) == Op) return const_cast(this); + + SmallVector NewOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + NewOps.push_back(i == OpNo ? Op : getOperand(i)); - Constant *Op0, *Op1, *Op2; - switch (getOpcode()) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - return ConstantExpr::getCast(getOpcode(), Op, getType()); - case Instruction::Select: - Op0 = (OpNo == 0) ? Op : getOperand(0); - Op1 = (OpNo == 1) ? Op : getOperand(1); - Op2 = (OpNo == 2) ? Op : getOperand(2); - return ConstantExpr::getSelect(Op0, Op1, Op2); - case Instruction::InsertElement: - Op0 = (OpNo == 0) ? Op : getOperand(0); - Op1 = (OpNo == 1) ? Op : getOperand(1); - Op2 = (OpNo == 2) ? Op : getOperand(2); - return ConstantExpr::getInsertElement(Op0, Op1, Op2); - case Instruction::ExtractElement: - Op0 = (OpNo == 0) ? Op : getOperand(0); - Op1 = (OpNo == 1) ? Op : getOperand(1); - return ConstantExpr::getExtractElement(Op0, Op1); - case Instruction::ShuffleVector: - Op0 = (OpNo == 0) ? Op : getOperand(0); - Op1 = (OpNo == 1) ? Op : getOperand(1); - Op2 = (OpNo == 2) ? Op : getOperand(2); - return ConstantExpr::getShuffleVector(Op0, Op1, Op2); - case Instruction::GetElementPtr: { - SmallVector Ops; - Ops.resize(getNumOperands()-1); - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) - Ops[i-1] = getOperand(i); - if (OpNo == 0) - return - ConstantExpr::getGetElementPtr(Op, Ops, - cast(this)->isInBounds()); - Ops[OpNo-1] = Op; - return - ConstantExpr::getGetElementPtr(getOperand(0), Ops, - cast(this)->isInBounds()); - } - default: - assert(getNumOperands() == 2 && "Must be binary operator?"); - Op0 = (OpNo == 0) ? Op : getOperand(0); - Op1 = (OpNo == 1) ? Op : getOperand(1); - return ConstantExpr::get(getOpcode(), Op0, Op1, SubclassOptionalData); - } + return getWithOperands(NewOps); } /// getWithOperands - This returns the current constant expression with the @@ -999,12 +949,15 @@ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ExtractElement: return ConstantExpr::getExtractElement(Ops[0], Ops[1]); + case Instruction::InsertValue: + return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices()); + case Instruction::ExtractValue: + return ConstantExpr::getExtractValue(Ops[0], getIndices()); case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - return - ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1), - cast(this)->isInBounds()); + return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1), + cast(this)->isInBounds()); case Instruction::ICmp: case Instruction::FCmp: return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]); @@ -2539,88 +2492,13 @@ assert(isa(ToV) && "Cannot make Constant refer to non-constant!"); Constant *To = cast(ToV); - Constant *Replacement = 0; - if (getOpcode() == Instruction::GetElementPtr) { - SmallVector Indices; - Constant *Pointer = getOperand(0); - Indices.reserve(getNumOperands()-1); - if (Pointer == From) Pointer = To; - - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { - Constant *Val = getOperand(i); - if (Val == From) Val = To; - Indices.push_back(Val); - } - Replacement = ConstantExpr::getGetElementPtr(Pointer, Indices, - cast(this)->isInBounds()); - } else if (getOpcode() == Instruction::ExtractValue) { - Constant *Agg = getOperand(0); - if (Agg == From) Agg = To; - - ArrayRef Indices = getIndices(); - Replacement = ConstantExpr::getExtractValue(Agg, Indices); - } else if (getOpcode() == Instruction::InsertValue) { - Constant *Agg = getOperand(0); - Constant *Val = getOperand(1); - if (Agg == From) Agg = To; - if (Val == From) Val = To; - - ArrayRef Indices = getIndices(); - Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices); - } else if (isCast()) { - assert(getOperand(0) == From && "Cast only has one use!"); - Replacement = ConstantExpr::getCast(getOpcode(), To, getType()); - } else if (getOpcode() == Instruction::Select) { - Constant *C1 = getOperand(0); - Constant *C2 = getOperand(1); - Constant *C3 = getOperand(2); - if (C1 == From) C1 = To; - if (C2 == From) C2 = To; - if (C3 == From) C3 = To; - Replacement = ConstantExpr::getSelect(C1, C2, C3); - } else if (getOpcode() == Instruction::ExtractElement) { - Constant *C1 = getOperand(0); - Constant *C2 = getOperand(1); - if (C1 == From) C1 = To; - if (C2 == From) C2 = To; - Replacement = ConstantExpr::getExtractElement(C1, C2); - } else if (getOpcode() == Instruction::InsertElement) { - Constant *C1 = getOperand(0); - Constant *C2 = getOperand(1); - Constant *C3 = getOperand(1); - if (C1 == From) C1 = To; - if (C2 == From) C2 = To; - if (C3 == From) C3 = To; - Replacement = ConstantExpr::getInsertElement(C1, C2, C3); - } else if (getOpcode() == Instruction::ShuffleVector) { - Constant *C1 = getOperand(0); - Constant *C2 = getOperand(1); - Constant *C3 = getOperand(2); - if (C1 == From) C1 = To; - if (C2 == From) C2 = To; - if (C3 == From) C3 = To; - Replacement = ConstantExpr::getShuffleVector(C1, C2, C3); - } else if (isCompare()) { - Constant *C1 = getOperand(0); - Constant *C2 = getOperand(1); - if (C1 == From) C1 = To; - if (C2 == From) C2 = To; - if (getOpcode() == Instruction::ICmp) - Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2); - else { - assert(getOpcode() == Instruction::FCmp); - Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2); - } - } else if (getNumOperands() == 2) { - Constant *C1 = getOperand(0); - Constant *C2 = getOperand(1); - if (C1 == From) C1 = To; - if (C2 == From) C2 = To; - Replacement = ConstantExpr::get(getOpcode(), C1, C2, SubclassOptionalData); - } else { - llvm_unreachable("Unknown ConstantExpr type!"); + SmallVector NewOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + Constant *Op = getOperand(i); + NewOps.push_back(Op == From ? To : Op); } + Constant *Replacement = getWithOperands(NewOps); assert(Replacement != this && "I didn't contain From!"); // Everyone using this now uses the replacement. From sabre at nondot.org Thu Jan 26 14:40:56 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 20:40:56 -0000 Subject: [llvm-commits] [llvm] r149077 - in /llvm/trunk/lib/VMCore: Constants.cpp ConstantsContext.h Message-ID: <20120126204056.F3D752A6C12C@llvm.org> Author: lattner Date: Thu Jan 26 14:40:56 2012 New Revision: 149077 URL: http://llvm.org/viewvc/llvm-project?rev=149077&view=rev Log: smallvectorize and ArrayRef'ize some stuff. Modified: llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/ConstantsContext.h Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=149077&r1=149076&r2=149077&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Thu Jan 26 14:40:56 2012 @@ -717,7 +717,7 @@ /// Constant *ConstantArray::get(LLVMContext &Context, StringRef Str, bool AddNull) { - std::vector ElementVals; + SmallVector ElementVals; ElementVals.reserve(Str.size() + size_t(AddNull)); for (unsigned i = 0; i < Str.size(); ++i) ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i])); @@ -1957,7 +1957,7 @@ GetElementPtrConstantExpr:: -GetElementPtrConstantExpr(Constant *C, const std::vector &IdxList, +GetElementPtrConstantExpr(Constant *C, ArrayRef IdxList, Type *DestTy) : ConstantExpr(DestTy, Instruction::GetElementPtr, OperandTraits::op_end(this) @@ -2469,7 +2469,7 @@ Use *U) { assert(isa(To) && "Cannot make Constant refer to non-constant!"); - std::vector Values; + SmallVector Values; Values.reserve(getNumOperands()); // Build replacement array... for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { Constant *Val = getOperand(i); Modified: llvm/trunk/lib/VMCore/ConstantsContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantsContext.h?rev=149077&r1=149076&r2=149077&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantsContext.h (original) +++ llvm/trunk/lib/VMCore/ConstantsContext.h Thu Jan 26 14:40:56 2012 @@ -211,11 +211,11 @@ /// used behind the scenes to implement getelementpr constant exprs. class GetElementPtrConstantExpr : public ConstantExpr { virtual void anchor(); - GetElementPtrConstantExpr(Constant *C, const std::vector &IdxList, + GetElementPtrConstantExpr(Constant *C, ArrayRef IdxList, Type *DestTy); public: static GetElementPtrConstantExpr *Create(Constant *C, - const std::vector&IdxList, + ArrayRef IdxList, Type *DestTy, unsigned Flags) { GetElementPtrConstantExpr *Result = From sabre at nondot.org Thu Jan 26 14:44:57 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 20:44:57 -0000 Subject: [llvm-commits] [llvm] r149078 - in /llvm/trunk: include/llvm/CodeGen/AsmPrinter.h lib/CodeGen/AsmPrinter/DwarfDebug.h Message-ID: <20120126204457.E314C2A6C12C@llvm.org> Author: lattner Date: Thu Jan 26 14:44:57 2012 New Revision: 149078 URL: http://llvm.org/viewvc/llvm-project?rev=149078&view=rev Log: tidy up forward declarations. Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/AsmPrinter.h?rev=149078&r1=149077&r2=149078&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h (original) +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h Thu Jan 26 14:44:57 2012 @@ -23,11 +23,6 @@ class BlockAddress; class GCStrategy; class Constant; - class ConstantArray; - class ConstantFP; - class ConstantInt; - class ConstantStruct; - class ConstantVector; class GCMetadataPrinter; class GlobalValue; class GlobalVariable; @@ -37,8 +32,6 @@ class MachineLocation; class MachineLoopInfo; class MachineLoop; - class MachineConstantPool; - class MachineConstantPoolEntry; class MachineConstantPoolValue; class MachineJumpTableInfo; class MachineModuleInfo; @@ -268,8 +261,9 @@ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); - /// EmitXXStructor - Targets can override this to change how global constants - /// that are part of a C++ static/global constructor list are emitted. + /// EmitXXStructor - Targets can override this to change how global + /// constants that are part of a C++ static/global constructor list are + /// emitted. virtual void EmitXXStructor(const Constant *CV) { EmitGlobalConstant(CV); } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=149078&r1=149077&r2=149078&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Thu Jan 26 14:44:57 2012 @@ -31,6 +31,8 @@ namespace llvm { class CompileUnit; +class ConstantInt; +class ConstantFP; class DbgVariable; class MachineFrameInfo; class MachineModuleInfo; From hfinkel at anl.gov Thu Jan 26 14:49:53 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Thu, 26 Jan 2012 14:49:53 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> Message-ID: <1327610993.2489.317.camel@sapling> On Thu, 2012-01-26 at 14:34 -0600, Sebastian Pop wrote: > On Tue, Jan 24, 2012 at 6:41 PM, Hal Finkel wrote: > >> enabling vectorization gets the performance down by 80% on ARM. > >> I will prepare a reduced testcase and try to find out the reason. > >> As a first shot, I would say that this comes from the vectorization of > >> code in a loop and the overhead of transfer between scalar and > >> vector registers. > > > > This is good; as has been pointed out, we'll need to develop a > > vectorization cost model for this kind of thing to really be successful, > > and so we should start thinking about that. > > > > The pass, as implemented, has an semi-implicit cost model which says > > that permutations followed by another vector operation are free, scalar > > -> vector transfers are free, and vectorizing a memory operation is just > > as good as vectorizing an arithmetic operation. Depending on the system, > > these may all be untrue (although on some systems they are true). > > > > If you can generate a test case that would be great, I'd like to look at > > it. > > Here is the testcase with calls to gettimeofday to measure time spent > in the kernel and not in the ini/fini phases. > On ARM I saw around 5 to 6x slowdown in the vector version. > I haven't tried this on x86 yet but that should also produce slowdowns > as the cost between scalar and vector regs is non null there as well. Thanks! Did you compile with any non-default flags other than -mllvm -vectorize? -Hal > > Sebastian > -- > Qualcomm Innovation Center, Inc is a member of Code Aurora Forum -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From spop at codeaurora.org Thu Jan 26 15:12:19 2012 From: spop at codeaurora.org (Sebastian Pop) Date: Thu, 26 Jan 2012 15:12:19 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327610993.2489.317.camel@sapling> References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> <1327610993.2489.317.camel@sapling> Message-ID: On Thu, Jan 26, 2012 at 2:49 PM, Hal Finkel wrote: > Thanks! Did you compile with any non-default flags other than -mllvm > -vectorize? I used -O3 and -vectorize, no other non-default flags. Sebastian -- Qualcomm Innovation Center, Inc is a member of Code Aurora Forum From hfinkel at anl.gov Thu Jan 26 15:19:34 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Thu, 26 Jan 2012 15:19:34 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> <1327610993.2489.317.camel@sapling> Message-ID: <1327612774.2489.324.camel@sapling> On Thu, 2012-01-26 at 15:12 -0600, Sebastian Pop wrote: > On Thu, Jan 26, 2012 at 2:49 PM, Hal Finkel wrote: > > Thanks! Did you compile with any non-default flags other than -mllvm > > -vectorize? > > I used -O3 and -vectorize, no other non-default flags. If I run clang -O3 -mllvm -vectorize -S -emit-llvm -o test.ll test.c then I get no vectorization at all (the output is identical to that without the -vectorize). What target triple is your clang targeting? If I include -mllvm -debug-only=bb-vectorize then the relevant output is: BBV: fusing loop #1 for entry in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.body in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.end in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.cond7.preheader in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.body10 in main... BBV: found 16 instructions with candidate pairs BBV: found 62 pair connections. BBV: selected 0 pairs. BBV: done! BBV: fusing loop #1 for for.inc45 in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.end47 in main... BBV: found 3 instructions with candidate pairs BBV: found 0 pair connections. BBV: done! -Hal > > Sebastian > -- > Qualcomm Innovation Center, Inc is a member of Code Aurora Forum -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From anton at korobeynikov.info Thu Jan 26 15:30:05 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Fri, 27 Jan 2012 01:30:05 +0400 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327612774.2489.324.camel@sapling> References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> <1327610993.2489.317.camel@sapling> <1327612774.2489.324.camel@sapling> Message-ID: > If I run clang -O3 -mllvm -vectorize -S -emit-llvm -o test.ll test.c > then I get no vectorization at all (the output is identical to that > without the -vectorize). What target triple is your clang targeting? Probably Sebastian can provide the .ll file. Then no clang will be necessary :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From netcasper at gmail.com Thu Jan 26 15:31:16 2012 From: netcasper at gmail.com (Liang Wang) Date: Fri, 27 Jan 2012 05:31:16 +0800 Subject: [llvm-commits] fix egrep options for Darwin in utils/llvmgrep In-Reply-To: <42F11728-26F5-47C0-B460-F7E1781D3ED3@apple.com> References: <42F11728-26F5-47C0-B460-F7E1781D3ED3@apple.com> Message-ID: On Fri, Jan 27, 2012 at 4:31 AM, Chris Lattner wrote: > On Jan 24, 2012, at 6:01 PM, Liang Wang wrote: >> Hi, >> >> Currently, llvmgrep shows only filename since it uses -l as egrep >> option on Darwin. ?It should use -H as for Linux. >> The following patch fixes the problem. ?OK for trunk? > > Sure, please commit. Thanks. I haven't got commit access yet. Could you please help commit this patch? Thanks, Liang. > > -Chris > >> >> Thanks, >> Liang. >> >> >> Index: utils/llvmgrep >> =================================================================== >> --- utils/llvmgrep ? ?(revision 148897) >> +++ utils/llvmgrep ? ?(working copy) >> @@ -29,7 +29,7 @@ >> ? cd $TOPDIR >> ? case `uname -s` in >> ? ? SunOS) grep_cmd="ggrep -H -n" ;; >> - ? ?Linux) grep_cmd="egrep -H -n" ;; >> + ? ?Linux|Darwin) grep_cmd="egrep -H -n" ;; >> ? ? *) grep_cmd="egrep -l -n" ;; >> ? esac >> ? ./utils/llvmdo -topdir "$TOPDIR" \ >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From spop at codeaurora.org Thu Jan 26 15:36:56 2012 From: spop at codeaurora.org (Sebastian Pop) Date: Thu, 26 Jan 2012 15:36:56 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327612774.2489.324.camel@sapling> References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> <1327610993.2489.317.camel@sapling> <1327612774.2489.324.camel@sapling> Message-ID: On Thu, Jan 26, 2012 at 3:19 PM, Hal Finkel wrote: > On Thu, 2012-01-26 at 15:12 -0600, Sebastian Pop wrote: >> On Thu, Jan 26, 2012 at 2:49 PM, Hal Finkel wrote: >> > Thanks! Did you compile with any non-default flags other than -mllvm >> > -vectorize? >> >> I used -O3 and -vectorize, no other non-default flags. > > If I run clang -O3 -mllvm -vectorize -S -emit-llvm -o test.ll test.c > then I get no vectorization at all (the output is identical to that > without the -vectorize). What target triple is your clang targeting? > Target: arm-none-linux-gnueabi > If I include -mllvm -debug-only=bb-vectorize then the relevant output > is: > BBV: fusing loop #1 for entry in main... > BBV: found 0 instructions with candidate pairs > BBV: done! > BBV: fusing loop #1 for for.body in main... > BBV: found 0 instructions with candidate pairs > BBV: done! > BBV: fusing loop #1 for for.end in main... > BBV: found 0 instructions with candidate pairs > BBV: done! > BBV: fusing loop #1 for for.cond7.preheader in main... > BBV: found 0 instructions with candidate pairs > BBV: done! > BBV: fusing loop #1 for for.body10 in main... > BBV: found 16 instructions with candidate pairs > BBV: found 62 pair connections. > BBV: selected 0 pairs. > BBV: done! > BBV: fusing loop #1 for for.inc45 in main... > BBV: found 0 instructions with candidate pairs > BBV: done! > BBV: fusing loop #1 for for.end47 in main... > BBV: found 3 instructions with candidate pairs > BBV: found 0 pair connections. > BBV: done! > > ?-Hal > Here is my output: clang -O3 -mllvm -vectorize -S -emit-llvm -o test.ll test.c -mllvm -debug-only=bb-vectorize BBV: fusing loop #1 for entry in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.body in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.end in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.cond7.preheader in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.body10 in main... BBV: found 22 instructions with candidate pairs BBV: found 82 pair connections. BBV: selected pairs in the best tree for: %0 = load i8* %r.063, align 1, !tbaa !0 BBV: selected pair: %mul23 = mul nsw i32 %conv14, 234 <-> %mul35 = mul nsw i32 %conv15, 543 BBV: selected pair: %0 = load i8* %r.063, align 1, !tbaa !0 <-> %1 = load i8* %incdec.ptr11, align 1, !tbaa !0 BBV: selected pair: %conv14 = zext i8 %0 to i32 <-> %conv15 = zext i8 %1 to i32 BBV: selected pair: %add26 = add i32 %mul25, %mul23 <-> %add36 = add i32 %mul35, %mul33 BBV: selected pair: %mul = mul nsw i32 %conv14, 123 <-> %mul16 = mul nsw i32 %conv15, 321 BBV: selected pair: %conv30 = trunc i32 %add29 to i8 <-> %conv40 = trunc i32 %add39 to i8 BBV: selected pair: %mul25 = mul nsw i32 %conv15, 432 <-> %mul33 = mul nsw i32 %conv14, 345 BBV: selected pair: %add29 = add i32 %add26, %mul28 <-> %add39 = add i32 %add36, %mul38 BBV: selected pair: store i8 %conv30, i8* %incdec.ptr21, align 1, !tbaa !0 <-> store i8 %conv40, i8* %incdec.ptr31, align 1, !tbaa !0 BBV: selected pairs in the best tree for: %conv14 = zext i8 %0 to i32 BBV: selected pair: %mul23 = mul nsw i32 %conv14, 234 <-> %mul35 = mul nsw i32 %conv15, 543 BBV: selected pair: %conv14 = zext i8 %0 to i32 <-> %conv15 = zext i8 %1 to i32 BBV: selected pair: %mul = mul nsw i32 %conv14, 123 <-> %mul16 = mul nsw i32 %conv15, 321 BBV: selected pair: %add26 = add i32 %mul25, %mul23 <-> %add36 = add i32 %mul35, %mul33 BBV: selected pair: %conv30 = trunc i32 %add29 to i8 <-> %conv40 = trunc i32 %add39 to i8 BBV: selected pair: %mul25 = mul nsw i32 %conv15, 432 <-> %mul33 = mul nsw i32 %conv14, 345 BBV: selected pair: %add29 = add i32 %add26, %mul28 <-> %add39 = add i32 %add36, %mul38 BBV: selected pair: store i8 %conv30, i8* %incdec.ptr21, align 1, !tbaa !0 <-> store i8 %conv40, i8* %incdec.ptr31, align 1, !tbaa !0 BBV: selected 9 pairs. BBV: initial: for.body10: ; preds = %for.body10, %for.cond7.preheader %w.065 = phi i8* [ %call1, %for.cond7.preheader ], [ %incdec.ptr41, %for.body10 ] %i.164 = phi i32 [ 0, %for.cond7.preheader ], [ %inc43, %for.body10 ] %r.063 = phi i8* [ %call, %for.cond7.preheader ], [ %incdec.ptr13, %for.body10 ] %incdec.ptr11 = getelementptr inbounds i8* %r.063, i32 1 %0 = load i8* %r.063, align 1, !tbaa !0 %incdec.ptr12 = getelementptr inbounds i8* %r.063, i32 2 %1 = load i8* %incdec.ptr11, align 1, !tbaa !0 %incdec.ptr13 = getelementptr inbounds i8* %r.063, i32 3 %2 = load i8* %incdec.ptr12, align 1, !tbaa !0 %conv14 = zext i8 %0 to i32 %mul = mul nsw i32 %conv14, 123 %conv15 = zext i8 %1 to i32 %mul16 = mul nsw i32 %conv15, 321 %conv17 = zext i8 %2 to i32 %mul18 = mul nsw i32 %conv17, 567 %add = add i32 %mul16, %mul %add19 = add i32 %add, %mul18 %conv20 = trunc i32 %add19 to i8 %incdec.ptr21 = getelementptr inbounds i8* %w.065, i32 1 store i8 %conv20, i8* %w.065, align 1, !tbaa !0 %mul23 = mul nsw i32 %conv14, 234 %mul25 = mul nsw i32 %conv15, 432 %mul28 = mul nsw i32 %conv17, 987 %add26 = add i32 %mul25, %mul23 %add29 = add i32 %add26, %mul28 %conv30 = trunc i32 %add29 to i8 %incdec.ptr31 = getelementptr inbounds i8* %w.065, i32 2 store i8 %conv30, i8* %incdec.ptr21, align 1, !tbaa !0 %mul33 = mul nsw i32 %conv14, 345 %mul35 = mul nsw i32 %conv15, 543 %mul38 = mul nsw i32 %conv17, 789 %add36 = add i32 %mul35, %mul33 %add39 = add i32 %add36, %mul38 %conv40 = trunc i32 %add39 to i8 %incdec.ptr41 = getelementptr inbounds i8* %w.065, i32 3 store i8 %conv40, i8* %incdec.ptr31, align 1, !tbaa !0 %inc43 = add nsw i32 %i.164, 1 %exitcond = icmp eq i32 %inc43, 10000 br i1 %exitcond, label %for.inc45, label %for.body10 BBV: fusing: %0 = load i8* %r.063, align 1, !tbaa !0 <-> %1 = load i8* %incdec.ptr11, align 1, !tbaa !0 BBV: fusing: %conv14 = zext i8 %2 to i32 <-> %conv15 = zext i8 %3 to i32 BBV: moving: %mul = mul nsw i32 %5, 123 to after %conv14.v.r2 = extractelement <2 x i32> %conv14, i32 1 BBV: fusing: %mul = mul nsw i32 %conv14.v.r1, 123 <-> %mul16 = mul nsw i32 %conv14.v.r2, 321 BBV: fusing: %mul23 = mul nsw i32 %conv14.v.r1, 234 <-> %mul35 = mul nsw i32 %conv14.v.r2, 543 BBV: moving: %add26 = add i32 %mul25, %5 to after %mul23.v.r2 = extractelement <2 x i32> %mul23, i32 1 BBV: moving: %add29 = add i32 %add26, %mul28 to after %add26 = add i32 %mul25, %5 BBV: moving: %conv30 = trunc i32 %add29 to i8 to after %add29 = add i32 %add26, %mul28 BBV: moving: store i8 %conv30, i8* %incdec.ptr21, align 1, !tbaa !0 to after %conv30 = trunc i32 %add29 to i8 BBV: fusing: %mul25 = mul nsw i32 %conv14.v.r2, 432 <-> %mul33 = mul nsw i32 %conv14.v.r1, 345 BBV: fusing: %add26 = add i32 %mul25.v.r1, %mul23.v.r1 <-> %add36 = add i32 %mul23.v.r2, %mul25.v.r2 BBV: moving: %add29 = add i32 %5, %mul28 to after %add26.v.r2 = extractelement <2 x i32> %add26, i32 1 BBV: moving: %conv30 = trunc i32 %add29 to i8 to after %add29 = add i32 %5, %mul28 BBV: moving: store i8 %conv30, i8* %incdec.ptr21, align 1, !tbaa !0 to after %conv30 = trunc i32 %add29 to i8 BBV: fusing: %add29 = add i32 %add26.v.r1, %mul28 <-> %add39 = add i32 %add26.v.r2, %mul38 BBV: moving: %conv30 = trunc i32 %5 to i8 to after %add29.v.r2 = extractelement <2 x i32> %add29, i32 1 BBV: moving: store i8 %conv30, i8* %incdec.ptr21, align 1, !tbaa !0 to after %conv30 = trunc i32 %5 to i8 BBV: fusing: %conv30 = trunc i32 %add29.v.r1 to i8 <-> %conv40 = trunc i32 %add29.v.r2 to i8 BBV: moving: store i8 %5, i8* %incdec.ptr21, align 1, !tbaa !0 to after %conv30.v.r2 = extractelement <2 x i8> %conv30, i32 1 BBV: fusing: store i8 %conv30.v.r1, i8* %incdec.ptr21, align 1, !tbaa !0 <-> store i8 %conv30.v.r2, i8* %incdec.ptr31, align 1, !tbaa !0 BBV: final: for.body10: ; preds = %for.body10, %for.cond7.preheader %w.065 = phi i8* [ %call1, %for.cond7.preheader ], [ %incdec.ptr41, %for.body10 ] %i.164 = phi i32 [ 0, %for.cond7.preheader ], [ %inc43, %for.body10 ] %r.063 = phi i8* [ %call, %for.cond7.preheader ], [ %incdec.ptr13, %for.body10 ] %incdec.ptr11 = getelementptr inbounds i8* %r.063, i32 1 %0 = bitcast i8* %r.063 to <2 x i8>* %incdec.ptr12 = getelementptr inbounds i8* %r.063, i32 2 %1 = load <2 x i8>* %0, align 1, !tbaa !0 %2 = extractelement <2 x i8> %1, i32 0 %3 = extractelement <2 x i8> %1, i32 1 %incdec.ptr13 = getelementptr inbounds i8* %r.063, i32 3 %4 = load i8* %incdec.ptr12, align 1, !tbaa !0 %conv14 = zext <2 x i8> %1 to <2 x i32> %conv14.v.r1 = extractelement <2 x i32> %conv14, i32 0 %conv14.v.r2 = extractelement <2 x i32> %conv14, i32 1 %mul.v.i1.1 = insertelement <2 x i32> undef, i32 123, i32 0 %mul.v.i1.2 = insertelement <2 x i32> %mul.v.i1.1, i32 321, i32 1 %mul = mul nsw <2 x i32> %conv14, %mul.v.i1.2 %mul.v.r1 = extractelement <2 x i32> %mul, i32 0 %mul.v.r2 = extractelement <2 x i32> %mul, i32 1 %conv17 = zext i8 %4 to i32 %mul18 = mul nsw i32 %conv17, 567 %add = add i32 %mul.v.r2, %mul.v.r1 %add19 = add i32 %add, %mul18 %conv20 = trunc i32 %add19 to i8 %incdec.ptr21 = getelementptr inbounds i8* %w.065, i32 1 store i8 %conv20, i8* %w.065, align 1, !tbaa !0 %mul23.v.i1.1 = insertelement <2 x i32> undef, i32 234, i32 0 %mul25.v.i1.1 = insertelement <2 x i32> undef, i32 432, i32 0 %mul28 = mul nsw i32 %conv17, 987 %incdec.ptr31 = getelementptr inbounds i8* %w.065, i32 2 %mul25.v.i1.2 = insertelement <2 x i32> %mul25.v.i1.1, i32 345, i32 1 %mul25.v.i0 = shufflevector <2 x i32> %conv14, <2 x i32> undef, <2 x i32> %mul25 = mul nsw <2 x i32> %mul25.v.i0, %mul25.v.i1.2 %mul25.v.r1 = extractelement <2 x i32> %mul25, i32 0 %mul25.v.r2 = extractelement <2 x i32> %mul25, i32 1 %mul23.v.i1.2 = insertelement <2 x i32> %mul23.v.i1.1, i32 543, i32 1 %mul23 = mul nsw <2 x i32> %conv14, %mul23.v.i1.2 %mul23.v.r1 = extractelement <2 x i32> %mul23, i32 0 %mul23.v.r2 = extractelement <2 x i32> %mul23, i32 1 %mul38 = mul nsw i32 %conv17, 789 %add26.v.i1 = shufflevector <2 x i32> %mul23, <2 x i32> %mul25, <2 x i32> %add26.v.i0 = shufflevector <2 x i32> %mul25, <2 x i32> %mul23, <2 x i32> %add26 = add <2 x i32> %add26.v.i0, %add26.v.i1 %add26.v.r1 = extractelement <2 x i32> %add26, i32 0 %add26.v.r2 = extractelement <2 x i32> %add26, i32 1 %add29.v.i1.1 = insertelement <2 x i32> undef, i32 %mul28, i32 0 %add29.v.i1.2 = insertelement <2 x i32> %add29.v.i1.1, i32 %mul38, i32 1 %add29 = add <2 x i32> %add26, %add29.v.i1.2 %add29.v.r1 = extractelement <2 x i32> %add29, i32 0 %add29.v.r2 = extractelement <2 x i32> %add29, i32 1 %conv30 = trunc <2 x i32> %add29 to <2 x i8> %conv30.v.r1 = extractelement <2 x i8> %conv30, i32 0 %conv30.v.r2 = extractelement <2 x i8> %conv30, i32 1 %5 = bitcast i8* %incdec.ptr21 to <2 x i8>* %incdec.ptr41 = getelementptr inbounds i8* %w.065, i32 3 store <2 x i8> %conv30, <2 x i8>* %5, align 1, !tbaa !0 %inc43 = add nsw i32 %i.164, 1 %exitcond = icmp eq i32 %inc43, 10000 br i1 %exitcond, label %for.inc45, label %for.body10 BBV: fusing loop #2 for for.body10 in main... BBV: found 27 instructions with candidate pairs BBV: found 33 pair connections. BBV: selected 0 pairs. BBV: done! BBV: fusing loop #1 for for.inc45 in main... BBV: found 0 instructions with candidate pairs BBV: done! BBV: fusing loop #1 for for.end47 in main... BBV: found 5 instructions with candidate pairs BBV: found 2 pair connections. BBV: selected 0 pairs. BBV: done! See also the attached test.ll (if that helps). Sebastian -- Qualcomm Innovation Center, Inc is a member of Code Aurora Forum -------------- next part -------------- A non-text attachment was scrubbed... Name: test.ll Type: application/octet-stream Size: 5982 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120126/f91d93eb/attachment.obj From sabre at nondot.org Thu Jan 26 15:37:56 2012 From: sabre at nondot.org (Chris Lattner) Date: Thu, 26 Jan 2012 21:37:56 -0000 Subject: [llvm-commits] [llvm] r149082 - in /llvm/trunk: include/llvm/Support/PatternMatch.h lib/Analysis/ConstantFolding.cpp lib/Analysis/ValueTracking.cpp lib/CodeGen/MachineFunction.cpp Message-ID: <20120126213756.460862A6C12C@llvm.org> Author: lattner Date: Thu Jan 26 15:37:55 2012 New Revision: 149082 URL: http://llvm.org/viewvc/llvm-project?rev=149082&view=rev Log: progress making the world safe to ConstantDataVector. While we're at it, allow PatternMatch's "neg" pattern to match integer vector negations, and enhance ComputeNumSigned bits to handle shl of vectors. Modified: llvm/trunk/include/llvm/Support/PatternMatch.h llvm/trunk/lib/Analysis/ConstantFolding.cpp llvm/trunk/lib/Analysis/ValueTracking.cpp llvm/trunk/lib/CodeGen/MachineFunction.cpp Modified: llvm/trunk/include/llvm/Support/PatternMatch.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/PatternMatch.h?rev=149082&r1=149081&r2=149082&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/PatternMatch.h (original) +++ llvm/trunk/include/llvm/Support/PatternMatch.h Thu Jan 26 15:37:55 2012 @@ -98,12 +98,19 @@ Res = &CI->getValue(); return true; } + // FIXME: Remove this. if (ConstantVector *CV = dyn_cast(V)) if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) { Res = &CI->getValue(); return true; } + if (ConstantDataVector *CV = dyn_cast(V)) + if (ConstantInt *CI = + dyn_cast_or_null(CV->getSplatValue())) { + Res = &CI->getValue(); + return true; + } return false; } }; @@ -144,9 +151,13 @@ bool match(ITy *V) { if (const ConstantInt *CI = dyn_cast(V)) return this->isValue(CI->getValue()); + // FIXME: Remove this. if (const ConstantVector *CV = dyn_cast(V)) if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) return this->isValue(CI->getValue()); + if (const ConstantDataVector *CV = dyn_cast(V)) + if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) + return this->isValue(CI->getValue()); return false; } }; @@ -164,12 +175,22 @@ Res = &CI->getValue(); return true; } + + // FIXME: remove. if (const ConstantVector *CV = dyn_cast(V)) if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) if (this->isValue(CI->getValue())) { Res = &CI->getValue(); return true; } + + if (const ConstantDataVector *CV = dyn_cast(V)) + if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) + if (this->isValue(CI->getValue())) { + Res = &CI->getValue(); + return true; + } + return false; } }; @@ -611,11 +632,11 @@ } private: bool matchIfNot(Value *LHS, Value *RHS) { - if (ConstantInt *CI = dyn_cast(RHS)) - return CI->isAllOnesValue() && L.match(LHS); - if (ConstantVector *CV = dyn_cast(RHS)) - return CV->isAllOnesValue() && L.match(LHS); - return false; + return (isa(RHS) || isa(RHS) || + // FIXME: Remove CV. + isa(RHS)) && + cast(RHS)->isAllOnesValue() && + L.match(LHS); } }; @@ -638,9 +659,9 @@ } private: bool matchIfNeg(Value *LHS, Value *RHS) { - if (ConstantInt *C = dyn_cast(LHS)) - return C->isZero() && L.match(RHS); - return false; + return ((isa(LHS) && cast(LHS)->isZero()) || + isa(LHS)) && + L.match(RHS); } }; Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=149082&r1=149081&r2=149082&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Thu Jan 26 15:37:55 2012 @@ -65,17 +65,17 @@ } // If this is a bitcast from constant vector -> vector, fold it. - ConstantVector *CV = dyn_cast(C); - if (CV == 0) + // FIXME: Remove ConstantVector support. + if (!isa(C) && !isa(C)) return ConstantExpr::getBitCast(C, DestTy); // If the element types match, VMCore can fold it. unsigned NumDstElt = DestVTy->getNumElements(); - unsigned NumSrcElt = CV->getNumOperands(); + unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - Type *SrcEltTy = CV->getType()->getElementType(); + Type *SrcEltTy = C->getType()->getVectorElementType(); Type *DstEltTy = DestVTy->getElementType(); // Otherwise, we're changing the number of elements in a vector, which @@ -95,7 +95,6 @@ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - if (!C) return ConstantExpr::getBitCast(C, DestTy); // Finally, VMCore can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); @@ -109,8 +108,9 @@ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); // Ask VMCore to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CV = dyn_cast(C); - if (!CV) // If VMCore wasn't able to fold it, bail out. + // If VMCore wasn't able to fold it, bail out. + if (!isa(C) && // FIXME: Remove ConstantVector. + !isa(C)) return C; } @@ -132,7 +132,7 @@ Constant *Elt = Zero; unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { - Constant *Src = dyn_cast(CV->getOperand(SrcElt++)); + Constant *Src =dyn_cast(C->getAggregateElement(SrcElt++)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); @@ -149,28 +149,29 @@ } Result.push_back(Elt); } - } else { - // Handle: bitcast (<2 x i64> to <4 x i32>) - unsigned Ratio = NumDstElt/NumSrcElt; - unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + return ConstantVector::get(Result); + } + + // Handle: bitcast (<2 x i64> to <4 x i32>) + unsigned Ratio = NumDstElt/NumSrcElt; + unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + + // Loop over each source value, expanding into multiple results. + for (unsigned i = 0; i != NumSrcElt; ++i) { + Constant *Src = dyn_cast(C->getAggregateElement(i)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); - // Loop over each source value, expanding into multiple results. - for (unsigned i = 0; i != NumSrcElt; ++i) { - Constant *Src = dyn_cast(CV->getOperand(i)); - if (!Src) // Reject constantexpr elements. - return ConstantExpr::getBitCast(C, DestTy); + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + // Shift the piece of the value into the right place, depending on + // endianness. + Constant *Elt = ConstantExpr::getLShr(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); - for (unsigned j = 0; j != Ratio; ++j) { - // Shift the piece of the value into the right place, depending on - // endianness. - Constant *Elt = ConstantExpr::getLShr(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - - // Truncate and remember this piece. - Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); - } + // Truncate and remember this piece. + Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); } } @@ -311,6 +312,7 @@ // not reached. } + // FIXME: Remove ConstantVector if (isa(C) || isa(C) || isa(C)) { Type *EltTy = cast(C->getType())->getElementType(); @@ -1115,11 +1117,8 @@ /// available for the result. Returns null if the conversion cannot be /// performed, otherwise returns the Constant value resulting from the /// conversion. -static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, - Type *Ty) { - assert(Op && "Called with NULL operand"); - APFloat Val(Op->getValueAPF()); - +static Constant *ConstantFoldConvertToInt(const APFloat &Val, + bool roundTowardZero, Type *Ty) { // All of these conversion intrinsics form an integer of at most 64bits. unsigned ResultWidth = cast(Ty)->getBitWidth(); assert(ResultWidth <= 64 && @@ -1271,24 +1270,31 @@ } } - if (ConstantVector *Op = dyn_cast(Operands[0])) { + // Support ConstantVector in case we have an Undef in the top. + if (isa(Operands[0]) || + isa(Operands[0])) { + Constant *Op = cast(Operands[0]); switch (F->getIntrinsicID()) { default: break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse2_cvtsd2si: case Intrinsic::x86_sse2_cvtsd2si64: - if (ConstantFP *FPOp = dyn_cast(Op->getOperand(0))) - return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty); + if (ConstantFP *FPOp = + dyn_cast_or_null(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/false, Ty); case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse2_cvttsd2si64: - if (ConstantFP *FPOp = dyn_cast(Op->getOperand(0))) - return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty); + if (ConstantFP *FPOp = + dyn_cast_or_null(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/true, Ty); } } - + if (isa(Operands[0])) { if (F->getIntrinsicID() == Intrinsic::bswap) return Operands[0]; Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=149082&r1=149081&r2=149082&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp (original) +++ llvm/trunk/lib/Analysis/ValueTracking.cpp Thu Jan 26 15:37:55 2012 @@ -89,6 +89,7 @@ } // Handle a constant vector by taking the intersection of the known bits of // each element. + // FIXME: Remove. if (ConstantVector *CV = dyn_cast(V)) { KnownZero.setAllBits(); KnownOne.setAllBits(); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { @@ -1005,30 +1006,28 @@ Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; - case Instruction::AShr: + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); - // ashr X, C -> adds C sign bits. - if (ConstantInt *C = dyn_cast(U->getOperand(1))) { - Tmp += C->getZExtValue(); + // ashr X, C -> adds C sign bits. Vectors too. + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + Tmp += ShAmt->getZExtValue(); if (Tmp > TyBits) Tmp = TyBits; } - // vector ashr X, -> adds C sign bits - if (ConstantVector *C = dyn_cast(U->getOperand(1))) { - if (ConstantInt *CI = dyn_cast_or_null(C->getSplatValue())) { - Tmp += CI->getZExtValue(); - if (Tmp > TyBits) Tmp = TyBits; - } - } return Tmp; - case Instruction::Shl: - if (ConstantInt *C = dyn_cast(U->getOperand(1))) { + } + case Instruction::Shl: { + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); - if (C->getZExtValue() >= TyBits || // Bad shift. - C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. - return Tmp - C->getZExtValue(); + Tmp2 = ShAmt->getZExtValue(); + if (Tmp2 >= TyBits || // Bad shift. + Tmp2 >= Tmp) break; // Shifted all sign bits out. + return Tmp - Tmp2; } break; + } case Instruction::And: case Instruction::Or: case Instruction::Xor: // NOT is handled here. Modified: llvm/trunk/lib/CodeGen/MachineFunction.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineFunction.cpp?rev=149082&r1=149081&r2=149082&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineFunction.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineFunction.cpp Thu Jan 26 15:37:55 2012 @@ -655,9 +655,12 @@ if (A->getType() == B->getType()) return false; // For now, only support constants with the same size. - if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType())) + uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); + if (StoreSize != TD->getTypeStoreSize(B->getType()) || + StoreSize > 128) return false; + // If a floating-point value and an integer value have the same encoding, // they can share a constant-pool entry. if (const ConstantFP *AFP = dyn_cast(A)) From hfinkel at anl.gov Thu Jan 26 15:41:52 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Thu, 26 Jan 2012 15:41:52 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> <1327610993.2489.317.camel@sapling> <1327612774.2489.324.camel@sapling> Message-ID: <1327614112.2489.326.camel@sapling> On Thu, 2012-01-26 at 15:36 -0600, Sebastian Pop wrote: > arm-none-linux-gnueabi Indeed, adding -ccc-host-triple arm-none-linux-gnueabi I also get vectorization (even though I don't get vectorization when targeting x86_64). I'll let you know what I find. -Hal -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From spop at codeaurora.org Thu Jan 26 15:49:59 2012 From: spop at codeaurora.org (Sebastian Pop) Date: Thu, 26 Jan 2012 15:49:59 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1327614112.2489.326.camel@sapling> References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> <1327610993.2489.317.camel@sapling> <1327612774.2489.324.camel@sapling> <1327614112.2489.326.camel@sapling> Message-ID: On Thu, Jan 26, 2012 at 3:41 PM, Hal Finkel wrote: > On Thu, 2012-01-26 at 15:36 -0600, Sebastian Pop wrote: >> arm-none-linux-gnueabi > > Indeed, adding -ccc-host-triple arm-none-linux-gnueabi I also get Minor remark: please use -target instead of -ccc-host-triple that is now deprecated. Thanks for looking at this testcase. Sebastian -- Qualcomm Innovation Center, Inc is a member of Code Aurora Forum From stoklund at 2pi.dk Thu Jan 26 15:54:11 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Thu, 26 Jan 2012 13:54:11 -0800 Subject: [llvm-commits] [llvm] r149070 - in /llvm/trunk: lib/CodeGen/MachineBasicBlock.cpp test/CodeGen/ARM/tail-dup.ll In-Reply-To: <20120126182425.B2E072A6C12C@llvm.org> References: <20120126182425.B2E072A6C12C@llvm.org> Message-ID: On Jan 26, 2012, at 10:24 AM, Chad Rosier wrote: > --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) > +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Thu Jan 26 12:24:25 2012 > @@ -535,13 +535,12 @@ > if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { > // If we couldn't analyze the branch, examine the last instruction. > // If the block doesn't end in a known control barrier, assume fallthrough > - // is possible. The isPredicable check is needed because this code can be > + // is possible. The isPredicated check is needed because this code can be > // called during IfConversion, where an instruction which is normally a > // Barrier is predicated and thus no longer an actual control barrier. This > // is over-conservative though, because if an instruction isn't actually > // predicated we could still treat it like a barrier. > - return empty() || !back().isBarrier() || > - back().isPredicable(); > + return empty() || !back().isBarrier() || TII->isPredicated(&back()); > } Is the comment still correct now? Is the check still over-conservative? /jakob From mcrosier at apple.com Thu Jan 26 15:58:51 2012 From: mcrosier at apple.com (Chad Rosier) Date: Thu, 26 Jan 2012 13:58:51 -0800 Subject: [llvm-commits] [llvm] r149070 - in /llvm/trunk: lib/CodeGen/MachineBasicBlock.cpp test/CodeGen/ARM/tail-dup.ll In-Reply-To: References: <20120126182425.B2E072A6C12C@llvm.org> Message-ID: <4BC99822-1255-4703-8284-F7DB53763D8C@apple.com> On Jan 26, 2012, at 1:54 PM, Jakob Stoklund Olesen wrote: > > On Jan 26, 2012, at 10:24 AM, Chad Rosier wrote: > >> --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) >> +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Thu Jan 26 12:24:25 2012 >> @@ -535,13 +535,12 @@ >> if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { >> // If we couldn't analyze the branch, examine the last instruction. >> // If the block doesn't end in a known control barrier, assume fallthrough >> - // is possible. The isPredicable check is needed because this code can be >> + // is possible. The isPredicated check is needed because this code can be >> // called during IfConversion, where an instruction which is normally a >> // Barrier is predicated and thus no longer an actual control barrier. This >> // is over-conservative though, because if an instruction isn't actually >> // predicated we could still treat it like a barrier. >> - return empty() || !back().isBarrier() || >> - back().isPredicable(); >> + return empty() || !back().isBarrier() || TII->isPredicated(&back()); >> } > > Is the comment still correct now? Is the check still over-conservative? No, I don't believe we're still being conservative. I've already update the comment in r149075. :) Chad > /jakob > From echristo at apple.com Thu Jan 26 16:06:24 2012 From: echristo at apple.com (Eric Christopher) Date: Thu, 26 Jan 2012 22:06:24 -0000 Subject: [llvm-commits] [llvm] r149085 - /llvm/trunk/utils/llvmgrep Message-ID: <20120126220624.0D6CE2A6C12C@llvm.org> Author: echristo Date: Thu Jan 26 16:06:23 2012 New Revision: 149085 URL: http://llvm.org/viewvc/llvm-project?rev=149085&view=rev Log: Use -H on darwin as well. Patch by Liang Wang! Modified: llvm/trunk/utils/llvmgrep Modified: llvm/trunk/utils/llvmgrep URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/llvmgrep?rev=149085&r1=149084&r2=149085&view=diff ============================================================================== --- llvm/trunk/utils/llvmgrep (original) +++ llvm/trunk/utils/llvmgrep Thu Jan 26 16:06:23 2012 @@ -29,7 +29,7 @@ cd $TOPDIR case `uname -s` in SunOS) grep_cmd="ggrep -H -n" ;; - Linux) grep_cmd="egrep -H -n" ;; + Linux|Darwin) grep_cmd="egrep -H -n" ;; *) grep_cmd="egrep -l -n" ;; esac ./utils/llvmdo -topdir "$TOPDIR" \ From echristo at apple.com Thu Jan 26 16:10:37 2012 From: echristo at apple.com (Eric Christopher) Date: Thu, 26 Jan 2012 14:10:37 -0800 Subject: [llvm-commits] fix egrep options for Darwin in utils/llvmgrep In-Reply-To: References: <42F11728-26F5-47C0-B460-F7E1781D3ED3@apple.com> Message-ID: <0EC4A19A-4E23-48F7-9FAC-B48F6BBA781E@apple.com> On Jan 26, 2012, at 1:31 PM, Liang Wang wrote: > On Fri, Jan 27, 2012 at 4:31 AM, Chris Lattner wrote: >> On Jan 24, 2012, at 6:01 PM, Liang Wang wrote: >>> Hi, >>> >>> Currently, llvmgrep shows only filename since it uses -l as egrep >>> option on Darwin. It should use -H as for Linux. >>> The following patch fixes the problem. OK for trunk? >> >> Sure, please commit. > > Thanks. I haven't got commit access yet. Could you please help > commit this patch? Done in r149085. -eric From netcasper at gmail.com Thu Jan 26 16:25:51 2012 From: netcasper at gmail.com (Liang Wang) Date: Fri, 27 Jan 2012 06:25:51 +0800 Subject: [llvm-commits] fix egrep options for Darwin in utils/llvmgrep In-Reply-To: <0EC4A19A-4E23-48F7-9FAC-B48F6BBA781E@apple.com> References: <42F11728-26F5-47C0-B460-F7E1781D3ED3@apple.com> <0EC4A19A-4E23-48F7-9FAC-B48F6BBA781E@apple.com> Message-ID: On Fri, Jan 27, 2012 at 6:10 AM, Eric Christopher wrote: > > On Jan 26, 2012, at 1:31 PM, Liang Wang wrote: > >> On Fri, Jan 27, 2012 at 4:31 AM, Chris Lattner wrote: >>> On Jan 24, 2012, at 6:01 PM, Liang Wang wrote: >>>> Hi, >>>> >>>> Currently, llvmgrep shows only filename since it uses -l as egrep >>>> option on Darwin. ?It should use -H as for Linux. >>>> The following patch fixes the problem. ?OK for trunk? >>> >>> Sure, please commit. >> >> Thanks. ?I haven't got commit access yet. ?Could you please help >> commit this patch? > > Done in r149085. > Thank you. -liang > -eric From stoklund at 2pi.dk Thu Jan 26 16:59:28 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Thu, 26 Jan 2012 22:59:28 -0000 Subject: [llvm-commits] [llvm] r149088 - in /llvm/trunk: lib/Target/X86/X86InstrControl.td lib/Target/X86/X86RegisterInfo.td test/CodeGen/X86/avx-win64.ll Message-ID: <20120126225929.00F312A6C12C@llvm.org> Author: stoklund Date: Thu Jan 26 16:59:28 2012 New Revision: 149088 URL: http://llvm.org/viewvc/llvm-project?rev=149088&view=rev Log: Handle call-clobbered ymm registers on Win64. The Win64 calling convention has xmm6-15 as callee-saved while still clobbering all ymm registers. Add a YMM_HI_6_15 pseudo-register that aliases the clobbered part of the ymm registers, and mark that as call-clobbered. This allows live xmm registers across calls. This hack wouldn't be necessary with RegisterMask operands representing the call clobbers, but they are not quite operational yet. Added: llvm/trunk/test/CodeGen/X86/avx-win64.ll Modified: llvm/trunk/lib/Target/X86/X86InstrControl.td llvm/trunk/lib/Target/X86/X86RegisterInfo.td Modified: llvm/trunk/lib/Target/X86/X86InstrControl.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrControl.td?rev=149088&r1=149087&r2=149088&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrControl.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrControl.td Thu Jan 26 16:59:28 2012 @@ -249,7 +249,7 @@ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, YMM_HI_6_15, EFLAGS], Uses = [RSP] in { def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=149088&r1=149087&r2=149088&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Thu Jan 26 16:59:28 2012 @@ -206,6 +206,13 @@ def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias; } + // Pseudo-register that aliases the high part of ymm6-ymm15 that is clobbered + // by win64 calls. Doesn't alias the callee-saved xmm6-xmm15. + def YMM_HI_6_15 : Register<"ymmhi-6-15"> { + let Aliases = [YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, + YMM15]; + } + class STRegister A> : Register { let Aliases = A; } Added: llvm/trunk/test/CodeGen/X86/avx-win64.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-win64.ll?rev=149088&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-win64.ll (added) +++ llvm/trunk/test/CodeGen/X86/avx-win64.ll Thu Jan 26 16:59:28 2012 @@ -0,0 +1,48 @@ +; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s +; PR11862 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-pc-win32" + +; This function has live ymm registers across a win64 call. +; The ymm6-15 registers are still call-clobbered even if xmm6-15 are callee-saved. +; Verify that callee-saved registers are not being used. + +; CHECK: f___vyf +; CHECK: pushq %rbp +; CHECK-NOT: vmovaps{{.*}}(%r +; CHECK: vmovmsk +; CHECK: vmovaps %ymm{{.*}}(%r +; CHECK: vmovaps %ymm{{.*}}(%r +; CHECK: call +; Two reloads. It's OK if these get folded. +; CHECK: vmovaps {{.*\(%r.*}}, %ymm +; CHECK: vmovaps {{.*\(%r.*}}, %ymm +; CHECK: blend +define <8 x float> @f___vyf(<8 x float> %x, <8 x i32> %__mask) nounwind readnone { +allocas: + %bincmp = fcmp oeq <8 x float> %x, zeroinitializer + %val_to_boolvec32 = sext <8 x i1> %bincmp to <8 x i32> + %"~test" = xor <8 x i32> %val_to_boolvec32, + %"internal_mask&function_mask25" = and <8 x i32> %"~test", %__mask + %floatmask.i46 = bitcast <8 x i32> %"internal_mask&function_mask25" to <8 x float> + %v.i47 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i46) nounwind readnone + %any_mm_cmp27 = icmp eq i32 %v.i47, 0 + br i1 %any_mm_cmp27, label %safe_if_after_false, label %safe_if_run_false + +safe_if_run_false: ; preds = %allocas + %binop = fadd <8 x float> %x, + %calltmp = call <8 x float> @f___vyf(<8 x float> %binop, <8 x i32> %"internal_mask&function_mask25") + %binop33 = fadd <8 x float> %calltmp, %x + %mask_as_float.i48 = bitcast <8 x i32> %"~test" to <8 x float> + %blend.i52 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %x, <8 x float> %binop33, <8 x float> %mask_as_float.i48) nounwind + br label %safe_if_after_false + +safe_if_after_false: ; preds = %safe_if_run_false, %allocas + %0 = phi <8 x float> [ %x, %allocas ], [ %blend.i52, %safe_if_run_false ] + ret <8 x float> %0 +} + +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone +declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone From hfinkel at anl.gov Thu Jan 26 17:20:49 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Thu, 26 Jan 2012 17:20:49 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: References: <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> <1321932161.2507.101.camel@sapling> <1322067157.2507.263.camel@sapling> <4ED8F7B0.8050309@grosser.es> <1323822351.590.1687.camel@sapling> <4EFC7291.9040808@grosser.es> <1325179929.13080.2839.camel@sapling> <4EFD7FD3.8040800@grosser.es> <1327378420.32397.1603.camel@sapling> <1327452092.2489.29.camel@sapling> <1327610993.2489.317.camel@sapling> <1327612774.2489.324.camel@sapling> <1327614112.2489.326.camel@sapling> Message-ID: <1327620049.2489.347.camel@sapling> On Thu, 2012-01-26 at 15:49 -0600, Sebastian Pop wrote: > On Thu, Jan 26, 2012 at 3:41 PM, Hal Finkel wrote: > > On Thu, 2012-01-26 at 15:36 -0600, Sebastian Pop wrote: > >> arm-none-linux-gnueabi For what cpu are you compiling? I think this may be a case where not having information on exactly what can be vectorized on the backend my be hurting us. The LLVM output looks okay (attached), but it may be that the post-legalization optimizations are just not good enough to undo the damage done by an unfortunate selection of instructions to vectorize. The options available in the pass currently are fairly coarse, but please try setting them as appropriate for your cpu and see if that makes a difference: -bb-vectorize-aligned-only - Only generate aligned loads and stores -bb-vectorize-no-casts - Don't try to vectorize casting (conversion) operations -bb-vectorize-no-floats - Don't try to vectorize floating-point values -bb-vectorize-no-fma - Don't try to vectorize the fused-multiply-add intrinsic -bb-vectorize-no-ints - Don't try to vectorize integer values -bb-vectorize-no-math - Don't try to vectorize floating-point math intrinsics -bb-vectorize-no-mem-ops - Don't try to vectorize loads and stores -bb-vectorize-vector-bits= - The size of the native vector registers (128 is the default) > > > > Indeed, adding -ccc-host-triple arm-none-linux-gnueabi I also get > > Minor remark: please use -target instead of -ccc-host-triple that is > now deprecated. Correct, thanks! -Hal > > Thanks for looking at this testcase. > Sebastian > -- > Qualcomm Innovation Center, Inc is a member of Code Aurora Forum -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory -------------- next part -------------- ; ModuleID = 'test-s-20120126.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" target triple = "armv4t-none-linux-gnueabi" %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } %struct.timeval = type { i32, i32 } %struct.timezone = type { i32, i32 } @stdout = external global %struct._IO_FILE* @.str = private unnamed_addr constant [35 x i8] c"kernel execution time: %18.9f sec\0A\00", align 1 define i32 @main() nounwind { entry: %start = alloca %struct.timeval, align 4 %end = alloca %struct.timeval, align 4 %call = call noalias i8* @malloc(i32 30000) nounwind %call1 = call noalias i8* @malloc(i32 30000) nounwind br label %for.body for.body: ; preds = %for.body, %entry %i.068 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %x.067 = phi i8* [ %call, %entry ], [ %incdec.ptr, %for.body ] %conv = trunc i32 %i.068 to i8 %incdec.ptr = getelementptr inbounds i8* %x.067, i32 1 store i8 %conv, i8* %x.067, align 1, !tbaa !0 %inc = add nsw i32 %i.068, 1 %exitcond70 = icmp eq i32 %inc, 30000 br i1 %exitcond70, label %for.end, label %for.body for.end: ; preds = %for.body %call2 = call i32 @gettimeofday(%struct.timeval* %start, %struct.timezone* null) nounwind br label %for.cond7.preheader for.cond7.preheader: ; preds = %for.inc45, %for.end %k.066 = phi i32 [ 0, %for.end ], [ %inc46, %for.inc45 ] br label %for.body10 for.body10: ; preds = %for.body10, %for.cond7.preheader %w.065 = phi i8* [ %call1, %for.cond7.preheader ], [ %incdec.ptr41, %for.body10 ] %i.164 = phi i32 [ 0, %for.cond7.preheader ], [ %inc43, %for.body10 ] %r.063 = phi i8* [ %call, %for.cond7.preheader ], [ %incdec.ptr13, %for.body10 ] %incdec.ptr11 = getelementptr inbounds i8* %r.063, i32 1 %0 = load i8* %r.063, align 1, !tbaa !0 %incdec.ptr12 = getelementptr inbounds i8* %r.063, i32 2 %1 = load i8* %incdec.ptr11, align 1, !tbaa !0 %incdec.ptr13 = getelementptr inbounds i8* %r.063, i32 3 %2 = load i8* %incdec.ptr12, align 1, !tbaa !0 %conv14 = zext i8 %0 to i32 %mul = mul nsw i32 %conv14, 123 %conv15 = zext i8 %1 to i32 %mul16 = mul nsw i32 %conv15, 321 %conv17 = zext i8 %2 to i32 %mul18 = mul nsw i32 %conv17, 567 %add = add i32 %mul16, %mul %add19 = add i32 %add, %mul18 %conv20 = trunc i32 %add19 to i8 %incdec.ptr21 = getelementptr inbounds i8* %w.065, i32 1 store i8 %conv20, i8* %w.065, align 1, !tbaa !0 %mul23 = mul nsw i32 %conv14, 234 %mul25 = mul nsw i32 %conv15, 432 %mul28 = mul nsw i32 %conv17, 987 %add26 = add i32 %mul25, %mul23 %add29 = add i32 %add26, %mul28 %conv30 = trunc i32 %add29 to i8 %incdec.ptr31 = getelementptr inbounds i8* %w.065, i32 2 store i8 %conv30, i8* %incdec.ptr21, align 1, !tbaa !0 %mul33 = mul nsw i32 %conv14, 345 %mul35 = mul nsw i32 %conv15, 543 %mul38 = mul nsw i32 %conv17, 789 %add36 = add i32 %mul35, %mul33 %add39 = add i32 %add36, %mul38 %conv40 = trunc i32 %add39 to i8 %incdec.ptr41 = getelementptr inbounds i8* %w.065, i32 3 store i8 %conv40, i8* %incdec.ptr31, align 1, !tbaa !0 %inc43 = add nsw i32 %i.164, 1 %exitcond = icmp eq i32 %inc43, 10000 br i1 %exitcond, label %for.inc45, label %for.body10 for.inc45: ; preds = %for.body10 %inc46 = add nsw i32 %k.066, 1 %exitcond69 = icmp eq i32 %inc46, 10000 br i1 %exitcond69, label %for.end47, label %for.cond7.preheader for.end47: ; preds = %for.inc45 %call48 = call i32 @gettimeofday(%struct.timeval* %end, %struct.timezone* null) nounwind %tv_sec = getelementptr inbounds %struct.timeval* %end, i32 0, i32 0 %3 = load i32* %tv_sec, align 4, !tbaa !2 %tv_sec49 = getelementptr inbounds %struct.timeval* %start, i32 0, i32 0 %4 = load i32* %tv_sec49, align 4, !tbaa !2 %sub = sub nsw i32 %3, %4 %mul50 = mul nsw i32 %sub, 1000000 %conv51 = sext i32 %mul50 to i64 %tv_usec = getelementptr inbounds %struct.timeval* %end, i32 0, i32 1 %5 = load i32* %tv_usec, align 4, !tbaa !2 %tv_usec53 = getelementptr inbounds %struct.timeval* %start, i32 0, i32 1 %6 = load i32* %tv_usec53, align 4, !tbaa !2 %sub54 = sub nsw i32 %5, %6 %conv55 = sext i32 %sub54 to i64 %add56 = add i64 %conv55, %conv51 %7 = load %struct._IO_FILE** @stdout, align 4, !tbaa !3 %conv57 = uitofp i64 %add56 to double %div = fdiv double %conv57, 1.000000e+06 %call58 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([35 x i8]* @.str, i32 0, i32 0), double %div) nounwind %arrayidx = getelementptr inbounds i8* %call1, i32 12 %8 = load i8* %arrayidx, align 1, !tbaa !0 %conv59 = zext i8 %8 to i32 %arrayidx60 = getelementptr inbounds i8* %call1, i32 9988 %9 = load i8* %arrayidx60, align 1, !tbaa !0 %conv61 = zext i8 %9 to i32 %add62 = add nsw i32 %conv61, %conv59 ret i32 %add62 } declare noalias i8* @malloc(i32) nounwind declare i32 @gettimeofday(%struct.timeval*, %struct.timezone*) nounwind declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture, ...) nounwind !0 = metadata !{metadata !"omnipotent char", metadata !1} !1 = metadata !{metadata !"Simple C/C++ TBAA", null} !2 = metadata !{metadata !"long", metadata !0} !3 = metadata !{metadata !"any pointer", metadata !0} -------------- next part -------------- ; ModuleID = 'test-s-20120126.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" target triple = "armv4t-none-linux-gnueabi" %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } %struct.timeval = type { i32, i32 } %struct.timezone = type { i32, i32 } @stdout = external global %struct._IO_FILE* @.str = private unnamed_addr constant [35 x i8] c"kernel execution time: %18.9f sec\0A\00", align 1 define i32 @main() nounwind { entry: %start = alloca %struct.timeval, align 4 %end = alloca %struct.timeval, align 4 %call = call noalias i8* @malloc(i32 30000) nounwind %call1 = call noalias i8* @malloc(i32 30000) nounwind br label %for.body for.body: ; preds = %for.body, %entry %i.068 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %x.067 = phi i8* [ %call, %entry ], [ %incdec.ptr, %for.body ] %conv = trunc i32 %i.068 to i8 %incdec.ptr = getelementptr inbounds i8* %x.067, i32 1 store i8 %conv, i8* %x.067, align 1, !tbaa !0 %inc = add nsw i32 %i.068, 1 %exitcond70 = icmp eq i32 %inc, 30000 br i1 %exitcond70, label %for.end, label %for.body for.end: ; preds = %for.body %call2 = call i32 @gettimeofday(%struct.timeval* %start, %struct.timezone* null) nounwind br label %for.cond7.preheader for.cond7.preheader: ; preds = %for.inc45, %for.end %k.066 = phi i32 [ 0, %for.end ], [ %inc46, %for.inc45 ] br label %for.body10 for.body10: ; preds = %for.body10, %for.cond7.preheader %w.065 = phi i8* [ %call1, %for.cond7.preheader ], [ %incdec.ptr41, %for.body10 ] %i.164 = phi i32 [ 0, %for.cond7.preheader ], [ %inc43, %for.body10 ] %r.063 = phi i8* [ %call, %for.cond7.preheader ], [ %incdec.ptr13, %for.body10 ] %0 = bitcast i8* %r.063 to <2 x i8>* %incdec.ptr12 = getelementptr inbounds i8* %r.063, i32 2 %1 = load <2 x i8>* %0, align 1, !tbaa !0 %incdec.ptr13 = getelementptr inbounds i8* %r.063, i32 3 %2 = load i8* %incdec.ptr12, align 1, !tbaa !0 %conv14 = zext <2 x i8> %1 to <2 x i32> %mul = mul nsw <2 x i32> %conv14, %mul.v.r1 = extractelement <2 x i32> %mul, i32 0 %mul.v.r2 = extractelement <2 x i32> %mul, i32 1 %conv17 = zext i8 %2 to i32 %mul18 = mul nsw i32 %conv17, 567 %add = add i32 %mul.v.r2, %mul.v.r1 %add19 = add i32 %add, %mul18 %conv20 = trunc i32 %add19 to i8 %incdec.ptr21 = getelementptr inbounds i8* %w.065, i32 1 store i8 %conv20, i8* %w.065, align 1, !tbaa !0 %mul28 = mul nsw i32 %conv17, 987 %mul25.v.i0 = shufflevector <2 x i32> %conv14, <2 x i32> undef, <2 x i32> %mul25 = mul nsw <2 x i32> %mul25.v.i0, %mul23 = mul nsw <2 x i32> %conv14, %mul38 = mul nsw i32 %conv17, 789 %add26.v.i1 = shufflevector <2 x i32> %mul23, <2 x i32> %mul25, <2 x i32> %add26.v.i0 = shufflevector <2 x i32> %mul25, <2 x i32> %mul23, <2 x i32> %add26 = add <2 x i32> %add26.v.i0, %add26.v.i1 %add29.v.i1.1 = insertelement <2 x i32> undef, i32 %mul28, i32 0 %add29.v.i1.2 = insertelement <2 x i32> %add29.v.i1.1, i32 %mul38, i32 1 %add29 = add <2 x i32> %add26, %add29.v.i1.2 %conv30 = trunc <2 x i32> %add29 to <2 x i8> %3 = bitcast i8* %incdec.ptr21 to <2 x i8>* %incdec.ptr41 = getelementptr inbounds i8* %w.065, i32 3 store <2 x i8> %conv30, <2 x i8>* %3, align 1, !tbaa !0 %inc43 = add nsw i32 %i.164, 1 %exitcond = icmp eq i32 %inc43, 10000 br i1 %exitcond, label %for.inc45, label %for.body10 for.inc45: ; preds = %for.body10 %inc46 = add nsw i32 %k.066, 1 %exitcond69 = icmp eq i32 %inc46, 10000 br i1 %exitcond69, label %for.end47, label %for.cond7.preheader for.end47: ; preds = %for.inc45 %call48 = call i32 @gettimeofday(%struct.timeval* %end, %struct.timezone* null) nounwind %tv_sec = getelementptr inbounds %struct.timeval* %end, i32 0, i32 0 %4 = load i32* %tv_sec, align 4, !tbaa !2 %tv_sec49 = getelementptr inbounds %struct.timeval* %start, i32 0, i32 0 %5 = load i32* %tv_sec49, align 4, !tbaa !2 %sub = sub nsw i32 %4, %5 %mul50 = mul nsw i32 %sub, 1000000 %conv51 = sext i32 %mul50 to i64 %tv_usec = getelementptr inbounds %struct.timeval* %end, i32 0, i32 1 %6 = load i32* %tv_usec, align 4, !tbaa !2 %tv_usec53 = getelementptr inbounds %struct.timeval* %start, i32 0, i32 1 %7 = load i32* %tv_usec53, align 4, !tbaa !2 %sub54 = sub nsw i32 %6, %7 %conv55 = sext i32 %sub54 to i64 %add56 = add i64 %conv55, %conv51 %8 = load %struct._IO_FILE** @stdout, align 4, !tbaa !3 %conv57 = uitofp i64 %add56 to double %div = fdiv double %conv57, 1.000000e+06 %call58 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([35 x i8]* @.str, i32 0, i32 0), double %div) nounwind %arrayidx = getelementptr inbounds i8* %call1, i32 12 %9 = load i8* %arrayidx, align 1, !tbaa !0 %conv59 = zext i8 %9 to i32 %arrayidx60 = getelementptr inbounds i8* %call1, i32 9988 %10 = load i8* %arrayidx60, align 1, !tbaa !0 %conv61 = zext i8 %10 to i32 %add62 = add nsw i32 %conv61, %conv59 ret i32 %add62 } declare noalias i8* @malloc(i32) nounwind declare i32 @gettimeofday(%struct.timeval*, %struct.timezone*) nounwind declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture, ...) nounwind !0 = metadata !{metadata !"omnipotent char", metadata !1} !1 = metadata !{metadata !"Simple C/C++ TBAA", null} !2 = metadata !{metadata !"long", metadata !0} !3 = metadata !{metadata !"any pointer", metadata !0} From grosbach at apple.com Thu Jan 26 17:20:05 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 23:20:05 -0000 Subject: [llvm-commits] [llvm] r149090 - in /llvm/trunk: include/llvm/MC/MCContext.h lib/MC/MCContext.cpp Message-ID: <20120126232005.B30552A6C12C@llvm.org> Author: grosbach Date: Thu Jan 26 17:20:05 2012 New Revision: 149090 URL: http://llvm.org/viewvc/llvm-project?rev=149090&view=rev Log: Add SourceMgr to MCContext for backend diagnostics. Modified: llvm/trunk/include/llvm/MC/MCContext.h llvm/trunk/lib/MC/MCContext.cpp Modified: llvm/trunk/include/llvm/MC/MCContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCContext.h?rev=149090&r1=149089&r2=149090&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCContext.h (original) +++ llvm/trunk/include/llvm/MC/MCContext.h Thu Jan 26 17:20:05 2012 @@ -43,6 +43,8 @@ public: typedef StringMap SymbolTable; private: + /// The SourceMgr for this object, if any. + const SourceMgr *SrcMgr; /// The MCAsmInfo for this target. const MCAsmInfo &MAI; @@ -137,9 +139,11 @@ public: explicit MCContext(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCObjectFileInfo *MOFI); + const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0); ~MCContext(); + const SourceMgr *getSourceManager() const { return SrcMgr; } + const MCAsmInfo &getAsmInfo() const { return MAI; } const MCRegisterInfo &getRegisterInfo() const { return MRI; } Modified: llvm/trunk/lib/MC/MCContext.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCContext.cpp?rev=149090&r1=149089&r2=149090&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCContext.cpp (original) +++ llvm/trunk/lib/MC/MCContext.cpp Thu Jan 26 17:20:05 2012 @@ -28,8 +28,8 @@ MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri, - const MCObjectFileInfo *mofi) : - MAI(mai), MRI(mri), MOFI(mofi), + const MCObjectFileInfo *mofi, const SourceMgr *mgr) : + SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(), Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0), CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), From grosbach at apple.com Thu Jan 26 17:20:08 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 23:20:08 -0000 Subject: [llvm-commits] [llvm] r149091 - /llvm/trunk/tools/llvm-mc/llvm-mc.cpp Message-ID: <20120126232008.400612A6C12C@llvm.org> Author: grosbach Date: Thu Jan 26 17:20:07 2012 New Revision: 149091 URL: http://llvm.org/viewvc/llvm-project?rev=149091&view=rev Log: llvm-mc pass through SourceMgr to MCContext. Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/llvm-mc.cpp?rev=149091&r1=149090&r2=149091&view=diff ============================================================================== --- llvm/trunk/tools/llvm-mc/llvm-mc.cpp (original) +++ llvm/trunk/tools/llvm-mc/llvm-mc.cpp Thu Jan 26 17:20:07 2012 @@ -383,7 +383,7 @@ // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and // MCObjectFileInfo needs a MCContext reference in order to initialize itself. OwningPtr MOFI(new MCObjectFileInfo()); - MCContext Ctx(*MAI, *MRI, MOFI.get()); + MCContext Ctx(*MAI, *MRI, MOFI.get(), &SrcMgr); MOFI->InitMCObjectFileInfo(TripleName, RelocModel, CMModel, Ctx); if (SaveTempLabels) From grosbach at apple.com Thu Jan 26 17:20:12 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 23:20:12 -0000 Subject: [llvm-commits] [llvm] r149092 - in /llvm/trunk: include/llvm/MC/MCContext.h include/llvm/MC/MCFixup.h include/llvm/MC/MCInst.h lib/MC/MCContext.cpp Message-ID: <20120126232012.184272A6C12C@llvm.org> Author: grosbach Date: Thu Jan 26 17:20:11 2012 New Revision: 149092 URL: http://llvm.org/viewvc/llvm-project?rev=149092&view=rev Log: Add simple support for keeping MCFixup source information. Can be used to issue more user friendly diagnostics for faulty relocation constructs and such. Modified: llvm/trunk/include/llvm/MC/MCContext.h llvm/trunk/include/llvm/MC/MCFixup.h llvm/trunk/include/llvm/MC/MCInst.h llvm/trunk/lib/MC/MCContext.cpp Modified: llvm/trunk/include/llvm/MC/MCContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCContext.h?rev=149092&r1=149091&r2=149092&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCContext.h (original) +++ llvm/trunk/include/llvm/MC/MCContext.h Thu Jan 26 17:20:11 2012 @@ -15,6 +15,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/raw_ostream.h" #include // FIXME: Shouldn't be needed. @@ -319,6 +321,11 @@ } void Deallocate(void *Ptr) { } + + // Unrecoverable error has occured. Display the best diagnostic we can + // and bail via exit(1). For now, most MC backend errors are unrecoverable. + // FIXME: We should really do something about that. + LLVM_ATTRIBUTE_NORETURN void FatalError(SMLoc L, const Twine &Msg); }; } // end namespace llvm Modified: llvm/trunk/include/llvm/MC/MCFixup.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCFixup.h?rev=149092&r1=149091&r2=149092&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCFixup.h (original) +++ llvm/trunk/include/llvm/MC/MCFixup.h Thu Jan 26 17:20:11 2012 @@ -11,6 +11,7 @@ #define LLVM_MC_MCFIXUP_H #include "llvm/Support/DataTypes.h" +#include "llvm/Support/SMLoc.h" #include namespace llvm { @@ -69,14 +70,17 @@ /// determine how the operand value should be encoded into the instruction. unsigned Kind; + /// The source location which gave rise to the fixup, if any. + SMLoc Loc; public: static MCFixup Create(uint32_t Offset, const MCExpr *Value, - MCFixupKind Kind) { + MCFixupKind Kind, SMLoc Loc = SMLoc()) { assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!"); MCFixup FI; FI.Value = Value; FI.Offset = Offset; FI.Kind = unsigned(Kind); + FI.Loc = Loc; return FI; } @@ -98,6 +102,8 @@ case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8; } } + + SMLoc getLoc() const { return Loc; } }; } // End llvm namespace Modified: llvm/trunk/include/llvm/MC/MCInst.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCInst.h?rev=149092&r1=149091&r2=149092&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCInst.h (original) +++ llvm/trunk/include/llvm/MC/MCInst.h Thu Jan 26 17:20:11 2012 @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/SMLoc.h" namespace llvm { class raw_ostream; @@ -148,14 +149,17 @@ /// instruction. class MCInst { unsigned Opcode; + SMLoc Loc; SmallVector Operands; public: MCInst() : Opcode(0) {} void setOpcode(unsigned Op) { Opcode = Op; } - unsigned getOpcode() const { return Opcode; } + void setLoc(SMLoc loc) { Loc = loc; } + SMLoc getLoc() const { return Loc; } + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } MCOperand &getOperand(unsigned i) { return Operands[i]; } unsigned getNumOperands() const { return Operands.size(); } Modified: llvm/trunk/lib/MC/MCContext.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCContext.cpp?rev=149092&r1=149091&r2=149092&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCContext.cpp (original) +++ llvm/trunk/lib/MC/MCContext.cpp Thu Jan 26 17:20:11 2012 @@ -20,6 +20,9 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/Signals.h" using namespace llvm; typedef StringMap MachOUniqueMapTy; @@ -321,3 +324,19 @@ return MCDwarfFiles[FileNumber] != 0; } + +void MCContext::FatalError(SMLoc Loc, const Twine &Msg) { + // If we have a source manager and a location, use it. Otherwise just + // use the generic report_fatal_error(). + if (!SrcMgr || Loc == SMLoc()) + report_fatal_error(Msg); + + // Use the source manager to print the message. + SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg); + + // If we reached here, we are failing ungracefully. Run the interrupt handlers + // to make sure any special cleanups get done, in particular that we remove + // files registered with RemoveFileOnSignal. + sys::RunInterruptHandlers(); + exit(1); +} From grosbach at apple.com Thu Jan 26 17:20:15 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 23:20:15 -0000 Subject: [llvm-commits] [llvm] r149093 - in /llvm/trunk/lib/Target/ARM: AsmParser/ARMAsmParser.cpp MCTargetDesc/ARMMCCodeEmitter.cpp MCTargetDesc/ARMMachObjectWriter.cpp Message-ID: <20120126232015.CA7B32A6C12C@llvm.org> Author: grosbach Date: Thu Jan 26 17:20:15 2012 New Revision: 149093 URL: http://llvm.org/viewvc/llvm-project?rev=149093&view=rev Log: Keep source information, if available, around for ARM Fixups. Adjust an example MachObjectWriter diagnostic to use the information to issue a better message. Before: LLVM ERROR: unknown ARM fixup kind! After: x.s:6:5: error: unsupported relocation on symbol beq bar ^ rdar://9800182 Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=149093&r1=149092&r2=149093&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Thu Jan 26 17:20:15 2012 @@ -7157,6 +7157,7 @@ if (Inst.getOpcode() == ARM::ITasm) return false; + Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); return false; case Match_MissingFeature: Modified: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp?rev=149093&r1=149092&r2=149093&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp Thu Jan 26 17:20:15 2012 @@ -464,7 +464,7 @@ assert(MO.isExpr() && "Unexpected branch target type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); // All of the information is in the fixup. return 0; @@ -716,7 +716,7 @@ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); else Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { @@ -791,7 +791,7 @@ assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else @@ -878,7 +878,7 @@ : ARM::fixup_arm_movw_lo16); break; } - Fixups.push_back(MCFixup::Create(0, E, Kind)); + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; }; @@ -999,7 +999,7 @@ assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; return (Rn << 9) | (1 << 13); @@ -1077,7 +1077,7 @@ Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); else Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { Modified: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp?rev=149093&r1=149092&r2=149093&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp (original) +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp Thu Jan 26 17:20:15 2012 @@ -13,6 +13,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -294,9 +295,13 @@ unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; unsigned RelocType = macho::RIT_Vanilla; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { - report_fatal_error("unknown ARM fixup kind!"); - } + if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) + // If we failed to get fixup kind info, it's because there's no legal + // relocation type for the fixup kind. This happens when it's a fixup that's + // expected to always be resolvable at assembly time and not have any + // relocations needed. + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported relocation on symbol"); // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered From grosbach at apple.com Thu Jan 26 17:47:45 2012 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 26 Jan 2012 23:47:45 -0000 Subject: [llvm-commits] [llvm] r149096 - /llvm/trunk/lib/MC/MCObjectStreamer.cpp Message-ID: <20120126234745.A0E0B2A6C12C@llvm.org> Author: grosbach Date: Thu Jan 26 17:47:45 2012 New Revision: 149096 URL: http://llvm.org/viewvc/llvm-project?rev=149096&view=rev Log: Tidy up. Modified: llvm/trunk/lib/MC/MCObjectStreamer.cpp Modified: llvm/trunk/lib/MC/MCObjectStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCObjectStreamer.cpp?rev=149096&r1=149095&r2=149096&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCObjectStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCObjectStreamer.cpp Thu Jan 26 17:47:45 2012 @@ -233,7 +233,7 @@ } void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, - unsigned char Value) { + unsigned char Value) { int64_t Res; if (Offset->EvaluateAsAbsolute(Res, getAssembler())) { new MCOrgFragment(*Offset, Value, getCurrentSectionData()); From lhames at gmail.com Thu Jan 26 18:05:42 2012 From: lhames at gmail.com (Lang Hames) Date: Fri, 27 Jan 2012 00:05:42 -0000 Subject: [llvm-commits] [llvm] r149097 - in /llvm/trunk: lib/CodeGen/RegisterCoalescer.cpp test/CodeGen/ARM/2012-01-26-CoalescerBug.ll Message-ID: <20120127000542.773C92A6C12C@llvm.org> Author: lhames Date: Thu Jan 26 18:05:42 2012 New Revision: 149097 URL: http://llvm.org/viewvc/llvm-project?rev=149097&view=rev Log: Rewrite instruction operands in AdjustCopiesBackFrom. Fixes PR11861. Added: llvm/trunk/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll Modified: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Modified: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp?rev=149097&r1=149096&r2=149097&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp (original) +++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Thu Jan 26 18:05:42 2012 @@ -553,10 +553,12 @@ if (UIdx != -1) { ValLREndInst->getOperand(UIdx).setIsKill(false); } - - // If the copy instruction was killing the destination register before the - // merge, find the last use and trim the live range. That will also add the - // isKill marker. + + // Rewrite the copy. If the copy instruction was killing the destination + // register before the merge, find the last use and trim the live range. That + // will also add the isKill marker. + CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(), + *TRI); if (ALR->end == CopyIdx) LIS->shrinkToUses(&IntA); Added: llvm/trunk/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll?rev=149097&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll (added) +++ llvm/trunk/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll Thu Jan 26 18:05:42 2012 @@ -0,0 +1,21 @@ +; RUN: llc < %s -verify-coalescing +; PR11861 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-eabi" + +define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 { + br label %1 + +;