From craig.topper at gmail.com Mon Jan 9 00:38:55 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Jan 2012 06:38:55 -0000 Subject: [llvm-commits] [llvm] r147766 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td Message-ID: <20120109063856.0E9F02A6C12C@llvm.org> Author: ctopper Date: Mon Jan 9 00:38:55 2012 New Revision: 147766 URL: http://llvm.org/viewvc/llvm-project?rev=147766&view=rev Log: Mark MOVNTI as being supported in SSE2 OR AVX mode. This instruction has no AVX equivalent so we should use the SSE version. Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=147766&r1=147765&r2=147766&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Jan 9 00:38:55 2012 @@ -3304,11 +3304,11 @@ def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; + TB, Requires<[HasXMMInt]>; def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movnti{q}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; + TB, Requires<[HasXMMInt]>; } //===----------------------------------------------------------------------===// From craig.topper at gmail.com Mon Jan 9 00:52:46 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Jan 2012 06:52:46 -0000 Subject: [llvm-commits] [llvm] r147767 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-nontemporal.ll Message-ID: <20120109065247.0E45C2A6C12C@llvm.org> Author: ctopper Date: Mon Jan 9 00:52:46 2012 New Revision: 147767 URL: http://llvm.org/viewvc/llvm-project?rev=147767&view=rev Log: Clean up patterns for MOVNT*. Not sure why there were floating point types on MOVNTPS and MOVNTDQ. And v4i64 was completely missing. Added: llvm/trunk/test/CodeGen/X86/avx2-nontemporal.ll Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=147767&r1=147766&r2=147767&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Jan 9 00:52:46 2012 @@ -3234,17 +3234,12 @@ "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>, VEX; - def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; let ExeDomain = SSEPackedInt in def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), + [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>, VEX; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), @@ -3260,16 +3255,11 @@ "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f64 VR256:$src), addr:$dst)]>, VEX; - def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; let ExeDomain = SSEPackedInt in def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), + [(alignednontemporalstore (v4i64 VR256:$src), addr:$dst)]>, VEX; } @@ -3288,14 +3278,10 @@ "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; -def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; - let ExeDomain = SSEPackedInt in def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; Added: llvm/trunk/test/CodeGen/X86/avx2-nontemporal.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-nontemporal.ll?rev=147767&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-nontemporal.ll (added) +++ llvm/trunk/test/CodeGen/X86/avx2-nontemporal.ll Mon Jan 9 00:52:46 2012 @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s + +define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) { +; CHECK: vmovntps + %cast = bitcast i8* %B to <8 x float>* + %A2 = fadd <8 x float> %A, + store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0 +; CHECK: vmovntdq + %cast1 = bitcast i8* %B to <4 x i64>* + %E2 = add <4 x i64> %E, + store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0 +; CHECK: vmovntpd + %cast2 = bitcast i8* %B to <4 x double>* + %C2 = fadd <4 x double> %C, + store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0 +; CHECK: movnti + %cast3 = bitcast i8* %B to i32* + store i32 %D, i32* %cast3, align 16, !nontemporal !0 + ret void +} + +!0 = metadata !{i32 1} From clattner at apple.com Mon Jan 9 01:17:37 2012 From: clattner at apple.com (Chris Lattner) Date: Sun, 08 Jan 2012 23:17:37 -0800 Subject: [llvm-commits] [patch] Remove MCLoggingStreamer In-Reply-To: <4F07C105.6050103@gmail.com> References: <4F07C105.6050103@gmail.com> Message-ID: On Jan 6, 2012, at 7:50 PM, Rafael ?vila de Esp?ndola wrote: > It looks to me that MCLoggingStreamer is costing more than it is worth. > > It is a interesting debugging idea, but it doesn't add a lot more > information than the regular text asm printer. Specially in verbose asm > mode. > > The value of the information being logged is also reduced when we split > a method to provide a non-virtual interface. After the split, we are > logging an implementation detail. > > While the situation can be improved with cleaner separation of the > interface provided by MCStreamer and its implementation, it looks better > to just remove the logging streamer. Please do. Thanks Rafael, -Chris From craig.topper at gmail.com Mon Jan 9 02:10:39 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Jan 2012 08:10:39 -0000 Subject: [llvm-commits] [llvm] r147768 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td Message-ID: <20120109081039.1EBCE2A6C12C@llvm.org> Author: ctopper Date: Mon Jan 9 02:10:38 2012 New Revision: 147768 URL: http://llvm.org/viewvc/llvm-project?rev=147768&view=rev Log: Reorder a bunch of patterns to put the AVX version first thus giving it priority over the SSE version. Another step towards trying to remove the AVX hack that disables SSE from X86Subtarget. Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=147768&r1=147767&r2=147768&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Jan 9 02:10:38 2012 @@ -304,11 +304,11 @@ // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX] in def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; @@ -404,127 +404,6 @@ [(store FR64:$src, addr:$dst)]>; // Patterns -let Predicates = [HasSSE1] in { - let AddedComplexity = 15 in { - // Extract the low 32-bit value from one vector and insert it into another. - def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; - def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; - - // Move scalar to XMM zero-extended, zeroing a VR128 then do a - // MOVSS to the lower bits. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; - def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; - def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; - } - - let AddedComplexity = 20 in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; - def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; - def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; - } - - // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - - // Shuffle with MOVSS - def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), - (MOVSSrr VR128:$src1, FR32:$src2)>; - def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; - def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; -} - -let Predicates = [HasSSE2] in { - let AddedComplexity = 15 in { - // Extract the low 64-bit value from one vector and insert it into another. - def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; - - // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd - def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; - def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; - - // Move scalar to XMM zero-extended, zeroing a VR128 then do a - // MOVSD to the lower bits. - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; - } - - let AddedComplexity = 20 in { - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - } - - // Extract and store. - def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; - - // Shuffle with MOVSD - def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), - (MOVSDrr VR128:$src1, FR64:$src2)>; - def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; - def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; - def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; - - // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem - // is during lowering, where it's not possible to recognize the fold cause - // it has two uses through a bitcast. One use disappears at isel time and the - // fold opportunity reappears. - def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; - def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; - def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; - def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; -} - let Predicates = [HasAVX] in { let AddedComplexity = 15 in { // Extract the low 32-bit value from one vector and insert it into another. @@ -708,6 +587,127 @@ sub_sd))>; } +let Predicates = [HasSSE1] in { + let AddedComplexity = 15 in { + // Extract the low 32-bit value from one vector and insert it into another. + def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSS to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + } + + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + } + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + + // Shuffle with MOVSS + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (MOVSSrr VR128:$src1, FR32:$src2)>; + def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 15 in { + // Extract the low 64-bit value from one vector and insert it into another. + def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + + // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd + def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSD to the lower bits. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + } + + let AddedComplexity = 20 in { + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + } + + // Extract and store. + def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + + // Shuffle with MOVSD + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (MOVSDrr VR128:$src1, FR64:$src2)>; + def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem + // is during lowering, where it's not possible to recognize the fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; + def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions //===----------------------------------------------------------------------===// @@ -867,37 +867,6 @@ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), (MOVUPDmr addr:$dst, VR128:$src)>; -// Use movaps / movups for SSE integer load / store (one byte shorter). -// The instructions selected below are then converted to MOVDQA/MOVDQU -// during the SSE domain pass. -let Predicates = [HasSSE1] in { - def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>; - def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>; - - def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -} - // Use vmovaps/vmovups for AVX integer load/store. let Predicates = [HasAVX] in { // 128-bit load/store @@ -954,28 +923,53 @@ (VMOVUPSYmr addr:$dst, VR256:$src)>; } +// Use movaps / movups for SSE integer load / store (one byte shorter). +// The instructions selected below are then converted to MOVDQA/MOVDQU +// during the SSE domain pass. +let Predicates = [HasSSE1] in { + def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! let neverHasSideEffects = 1 in { -def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; -def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", []>, VEX; def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), "movapd\t{$src, $dst|$dst, $src}", []>, VEX; +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; } // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! let canFoldAsLoad = 1, isReMaterializable = 1 in { -def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), - "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; -def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", @@ -984,6 +978,12 @@ "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX; } +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; } //===----------------------------------------------------------------------===// @@ -1236,7 +1236,8 @@ // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst), + (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), + (bc_v2f64 (v4f32 VR128:$src))), (iPTR 0))), addr:$dst), (VMOVHPSmr addr:$dst, VR128:$src)>; def : Pat<(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst), @@ -1259,7 +1260,8 @@ // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst), + (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), + (bc_v2f64 (v4f32 VR128:$src))), (iPTR 0))), addr:$dst), (MOVHPSmr addr:$dst, VR128:$src)>; } @@ -2229,20 +2231,6 @@ SSEPackedDouble>, TB, OpSize; } -let Predicates = [HasSSE1] in { -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; -} - -let Predicates = [HasSSE2] in { -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; -} - let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; @@ -2263,6 +2251,20 @@ (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; } +let Predicates = [HasSSE1] in { +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +} + +let Predicates = [HasSSE2] in { +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Shuffle Instructions //===----------------------------------------------------------------------===// @@ -2305,61 +2307,6 @@ memopv2f64, SSEPackedDouble>, TB, OpSize; } -let Predicates = [HasSSE1] in { - def : Pat<(v4f32 (X86Shufp VR128:$src1, - (memopv4f32 addr:$src2), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v4f32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - def : Pat<(v4i32 (X86Shufp VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but - // fall back to this for SSE1) - def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), - (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special unary SHUFPSrri case. - def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; -} - -let Predicates = [HasSSE2] in { - // Special binary v4i32 shuffle cases with SHUFPS. - def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - def : Pat<(v4i32 (shufp:$src3 VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special unary SHUFPDrri cases. - def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special binary v2i64 shuffle cases using SHUFPDrri. - def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Generic SHUFPD patterns - def : Pat<(v2i64 (X86Shufp VR128:$src1, - (memopv2i64 addr:$src2), (i8 imm:$imm))), - (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v2f64 (X86Shufp VR128:$src1, - (memopv2f64 addr:$src2), (i8 imm:$imm))), - (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; - def : Pat<(v2f64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; -} - let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86Shufp VR128:$src1, (memopv4f32 addr:$src2), (i8 imm:$imm))), @@ -2437,6 +2384,61 @@ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; } +let Predicates = [HasSSE1] in { + def : Pat<(v4f32 (X86Shufp VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4f32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufp VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but + // fall back to this for SSE1) + def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), + (SHUFPSrri VR128:$src2, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPSrri case. + def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; +} + +let Predicates = [HasSSE2] in { + // Special binary v4i32 shuffle cases with SHUFPS. + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPDrri cases. + def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special binary v2i64 shuffle cases using SHUFPDrri. + def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Generic SHUFPD patterns + def : Pat<(v2i64 (X86Shufp VR128:$src1, + (memopv2i64 addr:$src2), (i8 imm:$imm))), + (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v2f64 (X86Shufp VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v2f64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Unpack Instructions //===----------------------------------------------------------------------===// @@ -2595,24 +2597,6 @@ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; } -defm MOVMSKPS : sse12_extr_sign_mask, TB; -defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; - -def : Pat<(i32 (X86fgetsign FR32:$src)), - (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; -def : Pat<(i64 (X86fgetsign FR32:$src)), - (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; -def : Pat<(i32 (X86fgetsign FR64:$src)), - (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; -def : Pat<(i64 (X86fgetsign FR64:$src)), - (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; - let Predicates = [HasAVX] in { defm VMOVMSKPS : sse12_extr_sign_mask, TB, VEX; @@ -2651,6 +2635,24 @@ OpSize, VEX; } +defm MOVMSKPS : sse12_extr_sign_mask, TB; +defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; + +def : Pat<(i32 (X86fgetsign FR32:$src)), + (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i64 (X86fgetsign FR32:$src)), + (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i32 (X86fgetsign FR64:$src)), + (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; +def : Pat<(i64 (X86fgetsign FR64:$src)), + (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; + //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions //===---------------------------------------------------------------------===// @@ -4604,15 +4606,6 @@ (loadi32 addr:$src))))))]>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { - def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), - (MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), - (MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (MOVZDI2PDIrm addr:$src)>; -} - let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. let AddedComplexity = 20 in { @@ -4632,6 +4625,15 @@ (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } +let Predicates = [HasSSE2], AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), + (MOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (MOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (MOVZDI2PDIrm addr:$src)>; +} + // These are the correct encodings of the instructions so that we know how to // read correct assembly, even though we continue to emit the wrong ones for // compatibility with Darwin's buggy assembler. @@ -4704,14 +4706,6 @@ (loadi64 addr:$src))))))]>, XS, Requires<[HasSSE2]>; -let Predicates = [HasSSE2], AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (MOVZQI2PQIrm addr:$src)>; - def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), - (MOVZQI2PQIrm addr:$src)>; - def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; -} - let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (VMOVZQI2PQIrm addr:$src)>; @@ -4721,6 +4715,14 @@ (VMOVZQI2PQIrm addr:$src)>; } +let Predicates = [HasSSE2], AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (MOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), + (MOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; +} + let Predicates = [HasAVX] in { def : Pat<(v4i64 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; @@ -4756,18 +4758,18 @@ } let AddedComplexity = 20 in { - let Predicates = [HasSSE2] in { - def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), - (MOVZPQILo2PQIrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), - (MOVZPQILo2PQIrr VR128:$src)>; - } let Predicates = [HasAVX] in { def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), (VMOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (VMOVZPQILo2PQIrr VR128:$src)>; } + let Predicates = [HasSSE2] in { + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), + (MOVZPQILo2PQIrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (MOVZPQILo2PQIrr VR128:$src)>; + } } // Instructions to match in the assembler @@ -4876,28 +4878,17 @@ let Predicates = [HasAVX] in { defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", v4f32, VR128, memopv4f32, f128mem>, VEX; - defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v4f32, VR128, memopv4f32, f128mem>, VEX; - defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v8f32, VR256, memopv8f32, f256mem>, VEX; - defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v8f32, VR256, memopv8f32, f256mem>, VEX; -} -defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, - memopv4f32, f128mem>; -defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, - memopv4f32, f128mem>; - -let Predicates = [HasSSE3] in { - def : Pat<(v4i32 (X86Movshdup VR128:$src)), - (MOVSHDUPrr VR128:$src)>; - def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), - (MOVSHDUPrm addr:$src)>; - def : Pat<(v4i32 (X86Movsldup VR128:$src)), - (MOVSLDUPrr VR128:$src)>; - def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), - (MOVSLDUPrm addr:$src)>; + defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v4f32, VR128, memopv4f32, f128mem>, VEX; + defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v8f32, VR256, memopv8f32, f256mem>, VEX; + defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v8f32, VR256, memopv8f32, f256mem>, VEX; } +defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, + memopv4f32, f128mem>; +defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, + memopv4f32, f128mem>; let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), @@ -4918,6 +4909,17 @@ (VMOVSLDUPYrm addr:$src)>; } +let Predicates = [HasSSE3] in { + def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSHDUPrm addr:$src)>; + def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSLDUPrm addr:$src)>; +} + //===---------------------------------------------------------------------===// // SSE3 - Replicate Double FP - MOVDDUP //===---------------------------------------------------------------------===// @@ -4949,31 +4951,6 @@ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; -let Predicates = [HasSSE3] in { - def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef)), - (MOVDDUPrm addr:$src)>; - let AddedComplexity = 5 in { - def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; - def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>; - def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; - def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>; - } - def : Pat<(X86Movddup (memopv2f64 addr:$src)), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (bc_v2f64 - (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (MOVDDUPrm addr:$src)>; -} - let Predicates = [HasAVX] in { def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), (undef)), @@ -5013,6 +4990,31 @@ (VMOVDDUPYrr VR256:$src)>; } +let Predicates = [HasSSE3] in { + def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), + (MOVDDUPrm addr:$src)>; + let AddedComplexity = 5 in { + def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>; + } + def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (MOVDDUPrm addr:$src)>; +} + //===---------------------------------------------------------------------===// // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// @@ -5333,29 +5335,6 @@ int_x86_ssse3_pmul_hr_sw_128>; } -let Predicates = [HasSSSE3] in { - def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>; - def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>; - - def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNDrr128 VR128:$src1, VR128:$src2)>; - - def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDDrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBDrr128 VR128:$src1, VR128:$src2)>; -} - let Predicates = [HasAVX] in { def : Pat<(X86pshufb VR128:$src, VR128:$mask), (VPSHUFBrr128 VR128:$src, VR128:$mask)>; @@ -5397,6 +5376,29 @@ (VPHSUBDrr256 VR256:$src1, VR256:$src2)>; } +let Predicates = [HasSSSE3] in { + def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (PSHUFBrr128 VR128:$src, VR128:$mask)>; + def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (PSHUFBrm128 VR128:$src, addr:$mask)>; + + def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), + (PSIGNBrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), + (PSIGNWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), + (PSIGNDrr128 VR128:$src1, VR128:$src2)>; + + def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), + (PHADDWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), + (PHADDDrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), + (PHSUBWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), + (PHSUBDrr128 VR128:$src1, VR128:$src2)>; +} + //===---------------------------------------------------------------------===// // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// @@ -5444,26 +5446,26 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in defm PALIGN : ssse3_palign<"palignr">; -let Predicates = [HasSSSE3] in { +let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSSE3] in { def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } //===---------------------------------------------------------------------===// @@ -5558,70 +5560,70 @@ defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; -let Predicates = [HasSSE41] in { +let Predicates = [HasAVX] in { // Common patterns involving scalar load. def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>; + (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>; + (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>; + (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>; + (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>; + (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>; + (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>; + (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>; + (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>; + (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>; + (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>; + (VPMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>; + (VPMOVZXDQrm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE41] in { // Common patterns involving scalar load. def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), - (VPMOVSXBWrm addr:$src)>; + (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), - (VPMOVSXBWrm addr:$src)>; + (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), - (VPMOVSXWDrm addr:$src)>; + (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), - (VPMOVSXWDrm addr:$src)>; + (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), - (VPMOVSXDQrm addr:$src)>; + (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), - (VPMOVSXDQrm addr:$src)>; + (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), - (VPMOVZXBWrm addr:$src)>; + (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), - (VPMOVZXBWrm addr:$src)>; + (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), - (VPMOVZXWDrm addr:$src)>; + (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), - (VPMOVZXWDrm addr:$src)>; + (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), - (VPMOVZXDQrm addr:$src)>; + (PMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), - (VPMOVZXDQrm addr:$src)>; + (PMOVZXDQrm addr:$src)>; } @@ -5677,30 +5679,30 @@ defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; -let Predicates = [HasSSE41] in { +let Predicates = [HasAVX] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), - (PMOVSXBDrm addr:$src)>; + (VPMOVSXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), - (PMOVSXWQrm addr:$src)>; + (VPMOVSXWQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), - (PMOVZXBDrm addr:$src)>; + (VPMOVZXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), - (PMOVZXWQrm addr:$src)>; + (VPMOVZXWQrm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), - (VPMOVSXBDrm addr:$src)>; + (PMOVSXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), - (VPMOVSXWQrm addr:$src)>; + (PMOVSXWQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), - (VPMOVZXBDrm addr:$src)>; + (PMOVZXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), - (VPMOVZXWQrm addr:$src)>; + (PMOVZXWQrm addr:$src)>; } multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { @@ -5745,30 +5747,30 @@ defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; -let Predicates = [HasSSE41] in { +let Predicates = [HasAVX] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVSXBQrm addr:$src)>; + (VPMOVSXBQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVZXBQrm addr:$src)>; + (VPMOVZXBQrm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (VPMOVSXBQrm addr:$src)>; + (PMOVSXBQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (VPMOVZXBQrm addr:$src)>; + (PMOVZXBQrm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -5898,13 +5900,13 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), - (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; + (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasAVX]>; def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), - (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasAVX]>; + (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasSSE41]>; //===----------------------------------------------------------------------===// // SSE4.1 - Insert Instructions @@ -6014,10 +6016,10 @@ } let ExeDomain = SSEPackedSingle in { - let Constraints = "$src1 = $dst" in - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; let Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; + let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; } def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), @@ -7125,31 +7127,31 @@ // Carry-less Multiplication instructions let neverHasSideEffects = 1 in { -let Constraints = "$src1 = $dst" in { -def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +// AVX carry-less Multiplication instructions +def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; let mayLoad = 1 in -def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -} -// AVX carry-less Multiplication instructions -def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +let Constraints = "$src1 = $dst" in { +def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; let mayLoad = 1 in -def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; -} +} // Constraints = "$src1 = $dst" +} // neverHasSideEffects = 1 multiclass pclmul_alias { From craig.topper at gmail.com Mon Jan 9 02:34:00 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Jan 2012 08:34:00 -0000 Subject: [llvm-commits] [llvm] r147769 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td Message-ID: <20120109083400.D326C2A6C12E@llvm.org> Author: ctopper Date: Mon Jan 9 02:34:00 2012 New Revision: 147769 URL: http://llvm.org/viewvc/llvm-project?rev=147769&view=rev Log: Add HasAVX predicate to some of the AVX patterns. Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=147769&r1=147768&r2=147769&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Jan 9 02:34:00 2012 @@ -7223,6 +7223,7 @@ def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, int_x86_avx2_vbroadcasti128>; +let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; @@ -7242,12 +7243,14 @@ []>, VEX_4V; } +let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +} //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values @@ -7264,12 +7267,14 @@ []>, VEX; } +let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +} //===----------------------------------------------------------------------===// // VMASKMOV - Conditional SIMD Packed Loads and Stores @@ -7358,6 +7363,7 @@ int_x86_avx_vpermil_pd_256>; } +let Predicates = [HasAVX] in { def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), @@ -7375,6 +7381,7 @@ (VPERMILPSYmi addr:$src1, imm:$imm)>; def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))), (VPERMILPDYmi addr:$src1, imm:$imm)>; +} //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks @@ -7391,6 +7398,7 @@ []>, VEX_4V; } +let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), @@ -7407,6 +7415,7 @@ def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +} //===----------------------------------------------------------------------===// // VZERO - Zero YMM registers @@ -7545,6 +7554,7 @@ } // AVX1 broadcast patterns +let Predicates = [HasAVX] in { def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), @@ -7558,6 +7568,7 @@ (VBROADCASTSSrm addr:$src)>; def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSrm addr:$src)>; +} //===----------------------------------------------------------------------===// // VPERM - Permute instructions @@ -7646,6 +7657,7 @@ } // AVX1 patterns +let Predicates = [HasAVX] in { def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), @@ -7677,6 +7689,7 @@ def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +} //===----------------------------------------------------------------------===// @@ -7715,6 +7728,7 @@ } // AVX1 patterns +let Predicates = [HasAVX] in { def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), (i32 imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, @@ -7739,6 +7753,7 @@ (i32 imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; +} //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values @@ -7774,6 +7789,7 @@ } // AVX1 patterns +let Predicates = [HasAVX] in { def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (v4f32 (VEXTRACTF128rr (v8f32 VR256:$src1), @@ -7798,6 +7814,7 @@ (v16i8 (VEXTRACTF128rr (v32i8 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +} //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores From STPWORLD at narod.ru Mon Jan 9 02:40:22 2012 From: STPWORLD at narod.ru (Stepan Dyatkovskiy) Date: Mon, 09 Jan 2012 12:40:22 +0400 Subject: [llvm-commits] [LLVM, SwitchInst, case ranges] Auxiliary patch #1 In-Reply-To: <790551325840553@web126.yandex.ru> References: <4EAA9B5D.802@narod.ru> <4EAA9DE8.80000@free.fr> <485181319805488@web67.yandex.ru> <4EAB079D.6000606@free.fr> <4EB18F12.6060409@narod.ru> <4EB7C319.1000709@narod.ru> <4EDE7D75.704@narod.ru> <4EDFD0F4.1040204@narod.ru> <4EE25B61.9070006@narod.ru> <4EE5C06C.3050705@narod.ru> <333531323974498@web57.yandex.ru> <4EEB9C52.1050301@narod.ru> <4EF37B6B.6000205@narod.ru> <4EFA0748.9080702@narod.ru> <4EFCAA6A.20203@narod.ru> <610731325498362@web6.yandex.ru> <200871325616668@web103.yandex.ru> <5633CF27-DD22-4595-A197-DFFCC56B6342@apple.com> <4F058C43.7020908@narod.ru> <6F0FECE5-F12D-4F5E-890B-385AADCB563B@apple.com> <989681325785425@web41.yandex.ru> <9DD2B823-1EC4-4A2B-BF52-73DDC8A6B923@apple.com> <30291325791059@web135.yandex.ru> <4DD01E8C-836C-43EA-BDE4-329A223E2EB3@apple.com> <790551325840553@web126.yandex.ru> Message-ID: <239211326098422@web147.yandex.ru> ping. -Stepan. 06.01.2012, 13:02, "Stepan Dyatkovskiy" : >> ?My half-baked thought was that we could store 1, 4...7, 12 ?as [1, <4, 7>, 12], just to distinguish ranges from pairs of scalar values. > > May be ConstantStruct? Something like this: > > struct { // 0-level struct represents set of values and ranges. > ??unsigned v1 = 1; > ??struct { // 1-level struct represents ranges, and must have two fields only: "low" and "high". > ????unsigned low = 4; > ????unsigned high = 7; > ??} v2; > ??unsigned v3 = 12; > } > >>> ??CaseValue = SI->getOperand(SomeIndex); // We use User methods keeping SwitchInst format in mind. >>> ??CaseSuccessor = SI->getSuccessor(SomeIndex2); // We use TerminatorInst methods keeping what each successor means in mind. >>> ??with >>> ??CaseValue = SI->getCaseValue(SomeCaseValueIndex); >>> ??Successor = SI->getCaseSuccessor(SomeSuccessorIndex); >> ?I may not be understanding what you mean, but I think that getting rid of getOperand() (hiding it in SwitchInst) makes sense. ?getSuccessor() still needs to exist though. > > Now 0-case value is not a case value instead. It is a Condition. The same with successors. Zero-indexed successor is default destination (not a case successor). We store items with different types and roles in single collection. > I propose totally separate this terms on SwitchInst level: to use getCondition() if you need condition, use getDefaultDest() for default destination. Use getCaseValue only for resolving some case value. So getCaseValue(0) means that I need first case value (not condition) and getCaseSuccessor(0) means that I need successors for first case value (not default dest). Ideally, developer that uses SwitchInst should know nothing about internal operators format. > > -Stepan. From chandlerc at gmail.com Mon Jan 9 02:48:46 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Mon, 9 Jan 2012 00:48:46 -0800 Subject: [llvm-commits] PATCH: Teach the x86 backend to fold mask & shift patterns into an addressing mode computation In-Reply-To: <4F09A1A0.5040508@free.fr> References: <4F09A1A0.5040508@free.fr> Message-ID: I've attached an updated patch. This addresses your comments as well as a bunch of comment Owen made to me on IRC. There was some overlap. It also includes an extra test case that helped me catch some cases it wasn't firing, and it factors all of the logic out into a helper function. This is used to apply the logic whether the AND or the SRL node come first in the DAG, I've found inputs with it both ways around (hence the extra test case) and the transform itself is easily generalized to either ordering. Owen expressed some concerns about whether this is doing the correct thing WRT the topological sort of the DAG, and I'm now fairly confident in this part. The code inserts the new nodes, in the correct sequence, each one before the input node 'N', so they will be ordered correctly for the remaining ISel progression. The contract of ISel is slightly violated by assigning them all the same node ID number, so that the IDs are no longer unique, but there are several other transforms in the same section (x86 address mode matching) that do the same thing, so this appears to be fine in practice. My suspicion is that the IDs merely need to form a SWO or some such, and that at least is satisfied. I could write code that would re-number all subsequent nodes in order to have unique numbers again, but I don't think it should be part of this patch considering the existing precedent. I'm also starting more extensive testing of this code to make sure it doesn't silently miscompile anything. On Sun, Jan 8, 2012 at 6:01 AM, Duncan Sands wrote: > Hi Chandler, > > > --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > > +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > > @@ -4237,6 +4237,17 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { > > return SDValue(N, 0); // Return N so it doesn't get rechecked! > > } > > > > + // fold (zext (truncate (zextload x))) -> (zext (zextload x)) > > + if (LoadSDNode *Load = > dyn_cast(N0.getOperand(0).getNode())) { > > + EVT TruncVT = N0.getValueType(), MemVT = Load->getMemoryVT(); > > + // This is safe as long as the truncate doesn't truncate any of > the bits > > + // loaded from memory and we zero extended the rest of the bits. > > + if (ISD::isZEXTLoad(Load) && TruncVT.bitsGE(MemVT) && 0) { > > + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, > > + N0.getOperand(0)); > > + } > > + } > > this is a special case of a much more general transform: zext(trunc(x)) -> > zext(x) if the bits dropped by the trunc are known zero [in full generality > the result of the transform might be x if the zext is just restoring the > bits truncated, or trunc(x) (to a different size) if the zext does not > restore all of the truncated bits]. How about implementing the general > transform instead? > Done. > > > --- a/lib/Target/X86/X86ISelDAGToDAG.cpp > > +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp > > @@ -814,6 +814,106 @@ bool > X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, > > break; > > } > > > > + case ISD::SRL: { > > + // Try some heroics to detect shifts of masked values where the > mask can be > > + // replaced by extending the shift and undoing that in the > addressing mode > > + // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized > into > > + // (and (srl x, SHIFT), MASK) by DAGCombines that don't know the > shl can be > > + // done in the addressing mode. > > How about an explicit example of what you want to achieve? > Added, although its a bit weird to have here instead of in the testcase. Still, maybe it helps as I agree this is a bit of a weird transform. > > > + > > + // Scale must not be used already. > > + if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; > > + > > + if (N.getNumOperands() != 2) break; > > Can this check actually ever fire? Also, might this be loading a vector > value? > The check was dead, but I don't understand the comment about vector values? This may be about to load a vector, but the input shouldn't be. None of the existing code worries about vectors, likely because legalize has already precluded them from being inputs to this chain? > > + ConstantSDNode *ShiftAmtNode = > dyn_cast(N.getOperand(1)); > > + if (!ShiftAmtNode) break; > > + SDValue And = N.getOperand(0); > > + if (And.getOpcode() != ISD::AND || And.getNumOperands() != 2) break; > > Can an "and" really have more than two operands? > Nope, nuked. > > > + ConstantSDNode *MaskNode = > dyn_cast(And.getOperand(1)); > > + if (!MaskNode) break; > > + SDValue X = And.getOperand(0); > > + > > + // We only handle up to 64-bit values here for simplicity. > > + if (X.getValueSizeInBits() > 64) break; > > + > > + // The number of bits that would have to be shifted left is the > number of > > + // zero bits in the mask *after* it is shifted right. > > Are you sure about that? shl(shr x, N),M should result in M bits being > masked. > Thus I would have expected AMShiftAmt below to be MaskTZ (assuming > AMShiftAmt > is the amount the zapped shl would have shifted by, which is what your > comment > suggested to me; how about more explicit variable names or comments). > Sorry, this comment was stale, and highly misleading, as it dated from an earlier version of th ecode.. I've removed it as there is a much better comment now directly attached to AMShiftAmt. > > > + uint64_t Mask = MaskNode->getZExtValue(); > > + unsigned ShiftAmt = ShiftAmtNode->getZExtValue(); > > + unsigned MaskLZ = CountLeadingZeros_64(Mask); > > + unsigned MaskTZ = CountTrailingZeros_64(Mask); > > + int AMShiftAmt = MaskTZ - ShiftAmt; > > + > > + // There is nothing we can do here unless the mask is removing some > bits. > > + // Also, the addressing mode can only represent shifts of 1, 2, or > 3 bits. > > + if (AMShiftAmt <= 0 || AMShiftAmt > 3) break; > > + > > + // We also need to ensure that mask is a continuous run of bits. > > + if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) > break; > > Is this the best way to check this? > Other code uses the same pattern to check this... If there is a better way, I'd love to know it? It seems... oddly elegant. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/42cf42af/attachment-0001.html -------------- next part -------------- A non-text attachment was scrubbed... Name: crazy-addr-mode2.patch Type: text/x-patch Size: 9901 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/42cf42af/attachment-0001.bin From chandlerc at gmail.com Mon Jan 9 02:57:33 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Mon, 9 Jan 2012 00:57:33 -0800 Subject: [llvm-commits] PATCH: Teach the x86 backend to fold mask & shift patterns into an addressing mode computation In-Reply-To: References: <4F09A1A0.5040508@free.fr> Message-ID: On Mon, Jan 9, 2012 at 12:48 AM, Chandler Carruth wrote: > I've attached an updated patch. > ... which still contained debugging code to test that my tests were actually checking the right thing. This time with feeling. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/9f7a26a7/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: crazy-addr-mode2.patch Type: text/x-patch Size: 9885 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/9f7a26a7/attachment.bin From craig.topper at gmail.com Mon Jan 9 03:02:13 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Jan 2012 09:02:13 -0000 Subject: [llvm-commits] [llvm] r147770 - in /llvm/trunk/lib/Target/X86: X86.td X86Subtarget.cpp X86Subtarget.h Message-ID: <20120109090213.7508B2A6C12C@llvm.org> Author: ctopper Date: Mon Jan 9 03:02:13 2012 New Revision: 147770 URL: http://llvm.org/viewvc/llvm-project?rev=147770&view=rev Log: Remove AVX hack in X86Subtarget. AVX/AVX2 are now treated as an SSE level. Predicate functions have been altered to maintain previous names and behavior. Modified: llvm/trunk/lib/Target/X86/X86.td llvm/trunk/lib/Target/X86/X86Subtarget.cpp llvm/trunk/lib/Target/X86/X86Subtarget.h Modified: llvm/trunk/lib/Target/X86/X86.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=147770&r1=147769&r2=147770&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86.td (original) +++ llvm/trunk/lib/Target/X86/X86.td Mon Jan 9 03:02:13 2012 @@ -80,9 +80,10 @@ "Support SSE 4a instructions", [FeatureSSE3]>; -def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", - "Enable AVX instructions">; -def FeatureAVX2 : SubtargetFeature<"avx2", "HasAVX2", "true", +def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", + "Enable AVX instructions", + [FeatureSSE42]>; +def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true", Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=147770&r1=147769&r2=147770&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original) +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Mon Jan 9 03:02:13 2012 @@ -198,7 +198,7 @@ if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); } + //if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; @@ -295,7 +295,7 @@ } // FIXME: AVX2 codegen support is not ready. //if ((EBX >> 5) & 0x1) { - // HasAVX2 = true; + // X86SSELevel = AVX2;; // ToggleFeature(X86::FeatureAVX2); //} if ((EBX >> 8) & 0x1) { @@ -317,8 +317,6 @@ , HasX86_64(false) , HasPOPCNT(false) , HasSSE4A(false) - , HasAVX(false) - , HasAVX2(false) , HasAES(false) , HasCLMUL(false) , HasFMA3(false) @@ -372,7 +370,7 @@ HasX86_64 = true; ToggleFeature(X86::Feature64Bit); HasCMov = true; ToggleFeature(X86::FeatureCMOV); - if (!HasAVX && X86SSELevel < SSE2) { + if (X86SSELevel < SSE2) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE1); ToggleFeature(X86::FeatureSSE2); @@ -385,9 +383,6 @@ if (In64BitMode) ToggleFeature(X86::Mode64Bit); - if (HasAVX) - X86SSELevel = MMX; - DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=147770&r1=147769&r2=147770&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original) +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Mon Jan 9 03:02:13 2012 @@ -42,7 +42,7 @@ class X86Subtarget : public X86GenSubtargetInfo { protected: enum X86SSEEnum { - NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 + NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2 }; enum X863DNowEnum { @@ -75,12 +75,6 @@ /// HasSSE4A - True if the processor supports SSE4A instructions. bool HasSSE4A; - /// HasAVX - Target has AVX instructions - bool HasAVX; - - /// HasAVX2 - Target has AVX2 instructions - bool HasAVX2; - /// HasAES - Target has AES instructions bool HasAES; @@ -179,24 +173,24 @@ bool hasCMov() const { return HasCMov; } bool hasMMX() const { return X86SSELevel >= MMX; } - bool hasSSE1() const { return X86SSELevel >= SSE1; } - bool hasSSE2() const { return X86SSELevel >= SSE2; } - bool hasSSE3() const { return X86SSELevel >= SSE3; } - bool hasSSSE3() const { return X86SSELevel >= SSSE3; } - bool hasSSE41() const { return X86SSELevel >= SSE41; } - bool hasSSE42() const { return X86SSELevel >= SSE42; } + bool hasSSE1() const { return X86SSELevel >= SSE1 && !hasAVX(); } + bool hasSSE2() const { return X86SSELevel >= SSE2 && !hasAVX(); } + bool hasSSE3() const { return X86SSELevel >= SSE3 && !hasAVX(); } + bool hasSSSE3() const { return X86SSELevel >= SSSE3 && !hasAVX(); } + bool hasSSE41() const { return X86SSELevel >= SSE41 && !hasAVX(); } + bool hasSSE42() const { return X86SSELevel >= SSE42 && !hasAVX(); } bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } - bool hasAVX() const { return HasAVX; } - bool hasAVX2() const { return HasAVX2; } - bool hasXMM() const { return hasSSE1() || hasAVX(); } - bool hasXMMInt() const { return hasSSE2() || hasAVX(); } - bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); } - bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); } - bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); } - bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); } + bool hasAVX() const { return X86SSELevel >= AVX; } + bool hasAVX2() const { return X86SSELevel >= AVX2; } + bool hasXMM() const { return X86SSELevel >= SSE1; } + bool hasXMMInt() const { return X86SSELevel >= SSE2; } + bool hasSSE3orAVX() const { return X86SSELevel >= SSE3; } + bool hasSSSE3orAVX() const { return X86SSELevel >= SSSE3; } + bool hasSSE41orAVX() const { return X86SSELevel >= SSE41; } + bool hasSSE42orAVX() const { return X86SSELevel >= SSE42; } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } From nadav.rotem at intel.com Mon Jan 9 03:23:09 2012 From: nadav.rotem at intel.com (Rotem, Nadav) Date: Mon, 9 Jan 2012 09:23:09 +0000 Subject: [llvm-commits] PATCH: Teach the x86 backend to fold mask & shift patterns into an addressing mode computation In-Reply-To: References: <4F09A1A0.5040508@free.fr> Message-ID: <7DE70FDACDE4CD4887C4278C12A2E30505B406@HASMSX104.ger.corp.intel.com> I am not familiar with the addressing mode code but I have a few small comments. + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + // FIXME: We should extend this to work for vectors too. + if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) { + SDValue Op = N0.getOperand(0); + APInt TruncatedBits Why is this optimization disabled for vectors ? You can use getScalarSizeInBits and I think that it should work. +static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, + X86ISelAddressMode &AM) { + return true; Return true ? :) + // Scale must not be used already. + if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) return true; + + SDValue Shift = N; + SDValue And = N.getOperand(0); + if (N.getOpcode() != ISD::SRL) + std::swap(Shift, And); + if (Shift.getOpcode() != ISD::SRL || And.getOpcode() != ISD::AND || + !isa(Shift.getOperand(1)) || + !isa(And.getOperand(1))) + return true; + SDValue X = N == Shift ? And.getOperand(0) : Shift.getOperand(0); Can you add parenthesis here ? + unsigned ShiftAmt = Shift.getConstantOperandVal(1); + unsigned MaskLZ = CountLeadingZeros_64(Mask); + unsigned MaskTZ = CountTrailingZeros_64(Mask); + + // The amount of shift we're trying ot fit into the addressing mode is taken Typo ot -> to. + } + SDValue NewShiftAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); Please use getShiftAmountTy. --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. From chandlerc at gmail.com Mon Jan 9 03:44:26 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Mon, 09 Jan 2012 09:44:26 -0000 Subject: [llvm-commits] [llvm] r147772 - /llvm/trunk/test/CodeGen/X86/fold-and-shift.ll Message-ID: <20120109094426.ACC0D2A6C12C@llvm.org> Author: chandlerc Date: Mon Jan 9 03:44:26 2012 New Revision: 147772 URL: http://llvm.org/viewvc/llvm-project?rev=147772&view=rev Log: Cleanup and FileCheck-ize a test. Modified: llvm/trunk/test/CodeGen/X86/fold-and-shift.ll Modified: llvm/trunk/test/CodeGen/X86/fold-and-shift.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-and-shift.ll?rev=147772&r1=147771&r2=147772&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/fold-and-shift.ll (original) +++ llvm/trunk/test/CodeGen/X86/fold-and-shift.ll Mon Jan 9 03:44:26 2012 @@ -1,21 +1,33 @@ -; RUN: llc < %s -march=x86 | not grep and +; RUN: llc < %s -march=x86 | FileCheck %s define i32 @t1(i8* %X, i32 %i) { +; CHECK: t1: +; CHECK-NOT: and +; CHECK: movzbl +; CHECK: movl (%{{...}},%{{...}},4), +; CHECK: ret + entry: - %tmp2 = shl i32 %i, 2 ; [#uses=1] - %tmp4 = and i32 %tmp2, 1020 ; [#uses=1] - %tmp7 = getelementptr i8* %X, i32 %tmp4 ; [#uses=1] - %tmp78 = bitcast i8* %tmp7 to i32* ; [#uses=1] - %tmp9 = load i32* %tmp78, align 4 ; [#uses=1] - ret i32 %tmp9 + %tmp2 = shl i32 %i, 2 + %tmp4 = and i32 %tmp2, 1020 + %tmp7 = getelementptr i8* %X, i32 %tmp4 + %tmp78 = bitcast i8* %tmp7 to i32* + %tmp9 = load i32* %tmp78 + ret i32 %tmp9 } define i32 @t2(i16* %X, i32 %i) { +; CHECK: t2: +; CHECK-NOT: and +; CHECK: movzwl +; CHECK: movl (%{{...}},%{{...}},4), +; CHECK: ret + entry: - %tmp2 = shl i32 %i, 1 ; [#uses=1] - %tmp4 = and i32 %tmp2, 131070 ; [#uses=1] - %tmp7 = getelementptr i16* %X, i32 %tmp4 ; [#uses=1] - %tmp78 = bitcast i16* %tmp7 to i32* ; [#uses=1] - %tmp9 = load i32* %tmp78, align 4 ; [#uses=1] - ret i32 %tmp9 + %tmp2 = shl i32 %i, 1 + %tmp4 = and i32 %tmp2, 131070 + %tmp7 = getelementptr i16* %X, i32 %tmp4 + %tmp78 = bitcast i16* %tmp7 to i32* + %tmp9 = load i32* %tmp78 + ret i32 %tmp9 } From chandlerc at gmail.com Mon Jan 9 03:47:25 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Mon, 09 Jan 2012 09:47:25 -0000 Subject: [llvm-commits] [llvm] r147773 - /llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <20120109094725.C03592A6C12C@llvm.org> Author: chandlerc Date: Mon Jan 9 03:47:25 2012 New Revision: 147773 URL: http://llvm.org/viewvc/llvm-project?rev=147773&view=rev Log: Don't rely on the fact that shift values are never very large, and thus this substraction will result in small negative numbers at worst which become very large positive numbers on assignment and are thus caught by the <=4 check on the next line. The >0 check clearly intended to catch these as negative numbers. Spotted by inspection, and impossible to trigger given the shift widths that can be used. Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=147773&r1=147772&r2=147773&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Jan 9 03:47:25 2012 @@ -996,7 +996,7 @@ // allows us to convert the shift and and into an h-register extract and // a scaled index. if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { - unsigned ScaleLog = 8 - C1->getZExtValue(); + int ScaleLog = 8 - C1->getZExtValue(); if (ScaleLog > 0 && ScaleLog < 4 && C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { SDValue Eight = CurDAG->getConstant(8, MVT::i8); From STPWORLD at narod.ru Mon Jan 9 05:58:02 2012 From: STPWORLD at narod.ru (Stepan Dyatkovskiy) Date: Mon, 09 Jan 2012 15:58:02 +0400 Subject: [llvm-commits] [LLVM, llc] PATCH for PR11691: llc assertion failure generating arm neon vst2 with pointer update Message-ID: <908661326110282@web69.yandex.ru> Hi all. It seems that I found where the bug lives. We want to expand VST2d16PseudoWB_fixed to VST2d8wb_fixed. Look at first one: %vreg1 = VST2d16PseudoWB_fixed %vreg1, 0, %noreg, %vreg3, pred:14, pred:%noreg; mem:ST16[%lsr.iv1](align=2) GPR:%vreg1 QPR:%vreg3 Op #4 is dummy here (%noreg). Looking at SelectVST method I can conclude that Op4 is added to keep format of all VST2XXXPseudoWB_XXXXX the same. What the Reg0 means here though? Looks like NULL register. If I get right it Op4 is write-back operand, but it is not used in "fixed" vst instructions. So when we want to expand this instruction to VST2d8wb_fixed we should skip write-back place-holder. Please find the attached patch. -Stepan. -------------- next part -------------- A non-text attachment was scrubbed... Name: 11691.patch Type: application/octet-stream Size: 597 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/a6bc87ae/attachment.obj From chandlerc at gmail.com Mon Jan 9 06:20:53 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Mon, 9 Jan 2012 04:20:53 -0800 Subject: [llvm-commits] PATCH: Teach the x86 backend to fold mask & shift patterns into an addressing mode computation In-Reply-To: <7DE70FDACDE4CD4887C4278C12A2E30505B406@HASMSX104.ger.corp.intel.com> References: <4F09A1A0.5040508@free.fr> <7DE70FDACDE4CD4887C4278C12A2E30505B406@HASMSX104.ger.corp.intel.com> Message-ID: On Mon, Jan 9, 2012 at 1:23 AM, Rotem, Nadav wrote: > I am not familiar with the addressing mode code but I have a few small > comments. > Thanks for the feedback! > Why is this optimization disabled for vectors ? You can use > getScalarSizeInBits and I think that it should work. > I'm not familiar w/ the semantics of the masked bits computation, etc. I'm happy to add vector support to this, but I'd like to do it in a second patch with appropriate tests that it does the correct thing. Return true ? :) > Yea, as I posted to the list, the first patch had a debug line of code in it... > + SDValue X = N == Shift ? And.getOperand(0) : Shift.getOperand(0); > > Can you add parenthesis here ? > Done, I think. At least, the only precedence I find surprising is that of the entire conditional expression, not the condition itself... > + // The amount of shift we're trying ot fit into the addressing mode is > taken > > Typo ot -> to. > Fixed. > + SDValue NewShiftAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); > > Please use getShiftAmountTy. > None of the other address mode computations use it. =/ It seems like goodness, but it requires threading a TargetLowing handle into this code. I've got 4 or 5 more patches queued up behind this that changing the interface in that way would make kind of annoying. I'd prefer to clean up all of the i8 shift types in one pass afterward. That sound good to you? I've attached an updated patch, but I made a significant change beyond your comments. The previous patch could cause unfortunate re-computations because we never re-computed the final value in a non-addressing-mode context. I've changed the code to do so, and replace uses of the final value with that computation, while leaving the addressing mode able to select the simpler computation. Subsequent passes of ISel will trivially fold the left-shifts into addressing modes as well when possible. I've also disable the transform when the pre-masked shifted value has multiple users as that will also force those users to re-compute the value. I'm actually a bit dubious on this one, as it seems like it might still be a win to do the transform, but I decided to start safer, and make this more aggressive in follow-ups if benchmarks indicate it is a good tradeoff. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/73264d67/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: crazy-addr-mode3.patch Type: text/x-patch Size: 10455 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/73264d67/attachment.bin From jan_sjodin at yahoo.com Mon Jan 9 08:47:13 2012 From: jan_sjodin at yahoo.com (Jan Sjodin) Date: Mon, 9 Jan 2012 06:47:13 -0800 (PST) Subject: [llvm-commits] [llvm] r147770 - in /llvm/trunk/lib/Target/X86: X86.td X86Subtarget.cpp X86Subtarget.h In-Reply-To: <20120109090213.7508B2A6C12C@llvm.org> References: <20120109090213.7508B2A6C12C@llvm.org> Message-ID: <1326120433.43884.YahooMailNeo@web161501.mail.bf1.yahoo.com> What should be done with the other extensions: FMA3, FMA4 and XOP? Should they also be included in this list? - Jan >________________________________ > From: Craig Topper >To: llvm-commits at cs.uiuc.edu >Sent: Monday, January 9, 2012 4:02 AM >Subject: [llvm-commits] [llvm] r147770 - in /llvm/trunk/lib/Target/X86: X86.td X86Subtarget.cpp X86Subtarget.h > >Author: ctopper >Date: Mon Jan? 9 03:02:13 2012 >New Revision: 147770 > >URL: http://llvm.org/viewvc/llvm-project?rev=147770&view=rev >Log: >Remove AVX hack in X86Subtarget. AVX/AVX2 are now treated as an SSE level. Predicate functions have been altered to maintain previous names and behavior. > >Modified: >? ? llvm/trunk/lib/Target/X86/X86.td >? ? llvm/trunk/lib/Target/X86/X86Subtarget.cpp >? ? llvm/trunk/lib/Target/X86/X86Subtarget.h > >Modified: llvm/trunk/lib/Target/X86/X86.td >URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=147770&r1=147769&r2=147770&view=diff >============================================================================== >--- llvm/trunk/lib/Target/X86/X86.td (original) >+++ llvm/trunk/lib/Target/X86/X86.td Mon Jan? 9 03:02:13 2012 >@@ -80,9 +80,10 @@ >? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "Support SSE 4a instructions", >? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? [FeatureSSE3]>; > >-def FeatureAVX? ? : SubtargetFeature<"avx", "HasAVX", "true", >-? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "Enable AVX instructions">; >-def FeatureAVX2? ? : SubtargetFeature<"avx2", "HasAVX2", "true", >+def FeatureAVX? ? : SubtargetFeature<"avx", "X86SSELevel", "AVX", >+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "Enable AVX instructions", >+? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? [FeatureSSE42]>; >+def FeatureAVX2? ? : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", >? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "Enable AVX2 instructions", >? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? [FeatureAVX]>; >def FeatureCLMUL? : SubtargetFeature<"clmul", "HasCLMUL", "true", > >Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp >URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=147770&r1=147769&r2=147770&view=diff >============================================================================== >--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original) >+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Mon Jan? 9 03:02:13 2012 >@@ -198,7 +198,7 @@ >? if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} >? if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} >? // FIXME: AVX codegen support is not ready. >-? //if ((ECX >> 28) & 1) { HasAVX = true;? ToggleFeature(X86::FeatureAVX); } >+? //if ((ECX >> 28) & 1) { X86SSELevel = AVX;? ToggleFeature(X86::FeatureAVX); } > >? bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; >? bool IsAMD? = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; >@@ -295,7 +295,7 @@ >? ? ? } >? ? ? // FIXME: AVX2 codegen support is not ready. >? ? ? //if ((EBX >> 5) & 0x1) { >-? ? ? //? HasAVX2 = true; >+? ? ? //? X86SSELevel = AVX2;; >? ? ? //? ToggleFeature(X86::FeatureAVX2); >? ? ? //} >? ? ? if ((EBX >> 8) & 0x1) { >@@ -317,8 +317,6 @@ >? , HasX86_64(false) >? , HasPOPCNT(false) >? , HasSSE4A(false) >-? , HasAVX(false) >-? , HasAVX2(false) >? , HasAES(false) >? , HasCLMUL(false) >? , HasFMA3(false) >@@ -372,7 +370,7 @@ >? ? ? HasX86_64 = true; ToggleFeature(X86::Feature64Bit); >? ? ? HasCMov = true;? ToggleFeature(X86::FeatureCMOV); > >-? ? ? if (!HasAVX && X86SSELevel < SSE2) { >+? ? ? if (X86SSELevel < SSE2) { >? ? ? ? X86SSELevel = SSE2; >? ? ? ? ToggleFeature(X86::FeatureSSE1); >? ? ? ? ToggleFeature(X86::FeatureSSE2); >@@ -385,9 +383,6 @@ >? if (In64BitMode) >? ? ToggleFeature(X86::Mode64Bit); > >-? if (HasAVX) >-? ? X86SSELevel = MMX; >-? ? >? DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel >? ? ? ? ? ? ? ? << ", 3DNowLevel " << X863DNowLevel >? ? ? ? ? ? ? ? << ", 64bit " << HasX86_64 << "\n"); > >Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h >URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=147770&r1=147769&r2=147770&view=diff >============================================================================== >--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original) >+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Mon Jan? 9 03:02:13 2012 >@@ -42,7 +42,7 @@ >class X86Subtarget : public X86GenSubtargetInfo { >protected: >? enum X86SSEEnum { >-? ? NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 >+? ? NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2 >? }; > >? enum X863DNowEnum { >@@ -75,12 +75,6 @@ >? /// HasSSE4A - True if the processor supports SSE4A instructions. >? bool HasSSE4A; > >-? /// HasAVX - Target has AVX instructions >-? bool HasAVX; >- >-? /// HasAVX2 - Target has AVX2 instructions >-? bool HasAVX2; >- >? /// HasAES - Target has AES instructions >? bool HasAES; > >@@ -179,24 +173,24 @@ > >? bool hasCMov() const { return HasCMov; } >? bool hasMMX() const { return X86SSELevel >= MMX; } >-? bool hasSSE1() const { return X86SSELevel >= SSE1; } >-? bool hasSSE2() const { return X86SSELevel >= SSE2; } >-? bool hasSSE3() const { return X86SSELevel >= SSE3; } >-? bool hasSSSE3() const { return X86SSELevel >= SSSE3; } >-? bool hasSSE41() const { return X86SSELevel >= SSE41; } >-? bool hasSSE42() const { return X86SSELevel >= SSE42; } >+? bool hasSSE1() const { return X86SSELevel >= SSE1 && !hasAVX(); } >+? bool hasSSE2() const { return X86SSELevel >= SSE2 && !hasAVX(); } >+? bool hasSSE3() const { return X86SSELevel >= SSE3 && !hasAVX(); } >+? bool hasSSSE3() const { return X86SSELevel >= SSSE3 && !hasAVX(); } >+? bool hasSSE41() const { return X86SSELevel >= SSE41 && !hasAVX(); } >+? bool hasSSE42() const { return X86SSELevel >= SSE42 && !hasAVX(); } >? bool hasSSE4A() const { return HasSSE4A; } >? bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } >? bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } >? bool hasPOPCNT() const { return HasPOPCNT; } >-? bool hasAVX() const { return HasAVX; } >-? bool hasAVX2() const { return HasAVX2; } >-? bool hasXMM() const { return hasSSE1() || hasAVX(); } >-? bool hasXMMInt() const { return hasSSE2() || hasAVX(); } >-? bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); } >-? bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); } >-? bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); } >-? bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); } >+? bool hasAVX() const { return X86SSELevel >= AVX; } >+? bool hasAVX2() const { return X86SSELevel >= AVX2; } >+? bool hasXMM() const { return X86SSELevel >= SSE1; } >+? bool hasXMMInt() const { return X86SSELevel >= SSE2; } >+? bool hasSSE3orAVX() const { return X86SSELevel >= SSE3; } >+? bool hasSSSE3orAVX() const { return X86SSELevel >= SSSE3; } >+? bool hasSSE41orAVX() const { return X86SSELevel >= SSE41; } >+? bool hasSSE42orAVX() const { return X86SSELevel >= SSE42; } >? bool hasAES() const { return HasAES; } >? bool hasCLMUL() const { return HasCLMUL; } >? bool hasFMA3() const { return HasFMA3; } > > >_______________________________________________ >llvm-commits mailing list >llvm-commits at cs.uiuc.edu >http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/b3c1273c/attachment.html From eli.bendersky at intel.com Mon Jan 9 09:15:14 2012 From: eli.bendersky at intel.com (Bendersky, Eli) Date: Mon, 9 Jan 2012 15:15:14 +0000 Subject: [llvm-commits] [PATCH] ELFObjectFile with dynamic loading support Message-ID: <9BBE4537D1BAAB479E9E8F9D4234619D07D484@HASMSX103.ger.corp.intel.com> Hello, Following the email I sent to LLVMdev earlier today (http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046671.html), please find attached the first patch in the MCJIT/ELF series. It presents a subclass of ELFObjectFile, named DyldELFObject, which supports basic dynamic loading. This class is used by MCJIT/ELF to load an ELF image generated by MC into memory and executing it. Please note that there are no stand-alone tests for this class yet. It is being tested extensively in the ExecutionEngine tests run on MCJIT/ELF, which will be part of the next patch in the series, once this one is accepted and committed. Thanks in advance, Eli --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/593ee087/attachment-0001.html -------------- next part -------------- A non-text attachment was scrubbed... Name: elfobjectfile.1.patch Type: application/octet-stream Size: 13381 bytes Desc: elfobjectfile.1.patch Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/593ee087/attachment-0001.obj From craig.topper at gmail.com Mon Jan 9 10:21:22 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 9 Jan 2012 08:21:22 -0800 Subject: [llvm-commits] [llvm] r147770 - in /llvm/trunk/lib/Target/X86: X86.td X86Subtarget.cpp X86Subtarget.h In-Reply-To: <1326120433.43884.YahooMailNeo@web161501.mail.bf1.yahoo.com> References: <20120109090213.7508B2A6C12C@llvm.org> <1326120433.43884.YahooMailNeo@web161501.mail.bf1.yahoo.com> Message-ID: The SSELevel variable implies a linear progression which only works for SSE and AVX. FMA3/FMA4/XOP aren't linear since Intel only supports FMA3 and AMD supports FMA4/XOP and they all branch off from AVX1. On Mon, Jan 9, 2012 at 6:47 AM, Jan Sjodin wrote: > What should be done with the other extensions: FMA3, FMA4 and XOP? Should > they also be included in this list? > > - Jan > > ------------------------------ > *From:* Craig Topper > *To:* llvm-commits at cs.uiuc.edu > *Sent:* Monday, January 9, 2012 4:02 AM > *Subject:* [llvm-commits] [llvm] r147770 - in /llvm/trunk/lib/Target/X86: > X86.td X86Subtarget.cpp X86Subtarget.h > > Author: ctopper > Date: Mon Jan 9 03:02:13 2012 > New Revision: 147770 > > URL: http://llvm.org/viewvc/llvm-project?rev=147770&view=rev > Log: > Remove AVX hack in X86Subtarget. AVX/AVX2 are now treated as an SSE level. > Predicate functions have been altered to maintain previous names and > behavior. > > Modified: > llvm/trunk/lib/Target/X86/X86.td > llvm/trunk/lib/Target/X86/X86Subtarget.cpp > llvm/trunk/lib/Target/X86/X86Subtarget.h > > Modified: llvm/trunk/lib/Target/X86/X86.td > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=147770&r1=147769&r2=147770&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86.td (original) > +++ llvm/trunk/lib/Target/X86/X86.td Mon Jan 9 03:02:13 2012 > @@ -80,9 +80,10 @@ > "Support SSE 4a instructions", > [FeatureSSE3]>; > > -def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", > - "Enable AVX instructions">; > -def FeatureAVX2 : SubtargetFeature<"avx2", "HasAVX2", "true", > +def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", > + "Enable AVX instructions", > + [FeatureSSE42]>; > +def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", > "Enable AVX2 instructions", > [FeatureAVX]>; > def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true", > > Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=147770&r1=147769&r2=147770&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Mon Jan 9 03:02:13 2012 > @@ -198,7 +198,7 @@ > if ((ECX >> 19) & 1) { X86SSELevel = SSE41; > ToggleFeature(X86::FeatureSSE41);} > if ((ECX >> 20) & 1) { X86SSELevel = SSE42; > ToggleFeature(X86::FeatureSSE42);} > // FIXME: AVX codegen support is not ready. > - //if ((ECX >> 28) & 1) { HasAVX = true; > ToggleFeature(X86::FeatureAVX); } > + //if ((ECX >> 28) & 1) { X86SSELevel = AVX; > ToggleFeature(X86::FeatureAVX); } > > bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; > bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; > @@ -295,7 +295,7 @@ > } > // FIXME: AVX2 codegen support is not ready. > //if ((EBX >> 5) & 0x1) { > - // HasAVX2 = true; > + // X86SSELevel = AVX2;; > // ToggleFeature(X86::FeatureAVX2); > //} > if ((EBX >> 8) & 0x1) { > @@ -317,8 +317,6 @@ > , HasX86_64(false) > , HasPOPCNT(false) > , HasSSE4A(false) > - , HasAVX(false) > - , HasAVX2(false) > , HasAES(false) > , HasCLMUL(false) > , HasFMA3(false) > @@ -372,7 +370,7 @@ > HasX86_64 = true; ToggleFeature(X86::Feature64Bit); > HasCMov = true; ToggleFeature(X86::FeatureCMOV); > > - if (!HasAVX && X86SSELevel < SSE2) { > + if (X86SSELevel < SSE2) { > X86SSELevel = SSE2; > ToggleFeature(X86::FeatureSSE1); > ToggleFeature(X86::FeatureSSE2); > @@ -385,9 +383,6 @@ > if (In64BitMode) > ToggleFeature(X86::Mode64Bit); > > - if (HasAVX) > - X86SSELevel = MMX; > - > DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel > << ", 3DNowLevel " << X863DNowLevel > << ", 64bit " << HasX86_64 << "\n"); > > Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=147770&r1=147769&r2=147770&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original) > +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Mon Jan 9 03:02:13 2012 > @@ -42,7 +42,7 @@ > class X86Subtarget : public X86GenSubtargetInfo { > protected: > enum X86SSEEnum { > - NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 > + NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2 > }; > > enum X863DNowEnum { > @@ -75,12 +75,6 @@ > /// HasSSE4A - True if the processor supports SSE4A instructions. > bool HasSSE4A; > > - /// HasAVX - Target has AVX instructions > - bool HasAVX; > - > - /// HasAVX2 - Target has AVX2 instructions > - bool HasAVX2; > - > /// HasAES - Target has AES instructions > bool HasAES; > > @@ -179,24 +173,24 @@ > > bool hasCMov() const { return HasCMov; } > bool hasMMX() const { return X86SSELevel >= MMX; } > - bool hasSSE1() const { return X86SSELevel >= SSE1; } > - bool hasSSE2() const { return X86SSELevel >= SSE2; } > - bool hasSSE3() const { return X86SSELevel >= SSE3; } > - bool hasSSSE3() const { return X86SSELevel >= SSSE3; } > - bool hasSSE41() const { return X86SSELevel >= SSE41; } > - bool hasSSE42() const { return X86SSELevel >= SSE42; } > + bool hasSSE1() const { return X86SSELevel >= SSE1 && !hasAVX(); } > + bool hasSSE2() const { return X86SSELevel >= SSE2 && !hasAVX(); } > + bool hasSSE3() const { return X86SSELevel >= SSE3 && !hasAVX(); } > + bool hasSSSE3() const { return X86SSELevel >= SSSE3 && !hasAVX(); } > + bool hasSSE41() const { return X86SSELevel >= SSE41 && !hasAVX(); } > + bool hasSSE42() const { return X86SSELevel >= SSE42 && !hasAVX(); } > bool hasSSE4A() const { return HasSSE4A; } > bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } > bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } > bool hasPOPCNT() const { return HasPOPCNT; } > - bool hasAVX() const { return HasAVX; } > - bool hasAVX2() const { return HasAVX2; } > - bool hasXMM() const { return hasSSE1() || hasAVX(); } > - bool hasXMMInt() const { return hasSSE2() || hasAVX(); } > - bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); } > - bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); } > - bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); } > - bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); } > + bool hasAVX() const { return X86SSELevel >= AVX; } > + bool hasAVX2() const { return X86SSELevel >= AVX2; } > + bool hasXMM() const { return X86SSELevel >= SSE1; } > + bool hasXMMInt() const { return X86SSELevel >= SSE2; } > + bool hasSSE3orAVX() const { return X86SSELevel >= SSE3; } > + bool hasSSSE3orAVX() const { return X86SSELevel >= SSSE3; } > + bool hasSSE41orAVX() const { return X86SSELevel >= SSE41; } > + bool hasSSE42orAVX() const { return X86SSELevel >= SSE42; } > bool hasAES() const { return HasAES; } > bool hasCLMUL() const { return HasCLMUL; } > bool hasFMA3() const { return HasFMA3; } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > -- ~Craig -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/9fc67938/attachment.html From benny.kra at googlemail.com Mon Jan 9 11:23:27 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Mon, 09 Jan 2012 17:23:27 -0000 Subject: [llvm-commits] [llvm] r147777 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp test/Transforms/InstCombine/sign-test-and-or.ll Message-ID: <20120109172327.A6DA21BE003@llvm.org> Author: d0k Date: Mon Jan 9 11:23:27 2012 New Revision: 147777 URL: http://llvm.org/viewvc/llvm-project?rev=147777&view=rev Log: InstCombine: Teach foldLogOpOfMaskedICmpsHelper that sign bit tests are bit tests. This subsumes several other transforms while enabling us to catch more cases. Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=147777&r1=147776&r2=147777&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Mon Jan 9 11:23:27 2012 @@ -496,6 +496,38 @@ return result; } +/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z) +/// if possible. The returned predicate is either == or !=. Returns false if +/// decomposition fails. +static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, + Value *&X, Value *&Y, Value *&Z) { + // X < 0 is equivalent to (X & SignBit) != 0. + if (I->getPredicate() == ICmpInst::ICMP_SLT) + if (ConstantInt *C = dyn_cast(I->getOperand(1))) + if (C->isZero()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_NE; + Z = C; + return true; + } + + // X > -1 is equivalent to (X & SignBit) == 0. + if (I->getPredicate() == ICmpInst::ICMP_SGT) + if (ConstantInt *C = dyn_cast(I->getOperand(1))) + if (C->isAllOnesValue()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_EQ; + Z = ConstantInt::getNullValue(C->getType()); + return true; + } + + return false; +} + /// foldLogOpOfMaskedICmpsHelper: /// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// return the set of pattern classes (from MaskedICmpType) @@ -503,10 +535,9 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value*& B, Value*& C, Value*& D, Value*& E, - ICmpInst *LHS, ICmpInst *RHS) { - ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); - if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0; - if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0; + ICmpInst *LHS, ICmpInst *RHS, + ICmpInst::Predicate &LHSCC, + ICmpInst::Predicate &RHSCC) { if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; // vectors are not (yet?) supported if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; @@ -520,40 +551,60 @@ Value *L1 = LHS->getOperand(0); Value *L2 = LHS->getOperand(1); Value *L11,*L12,*L21,*L22; - if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { - if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + // Check whether the icmp can be decomposed into a bit test. + if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { + L21 = L22 = L1 = 0; + } else { + // Look for ANDs in the LHS icmp. + if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { + if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + L21 = L22 = 0; + } else { + if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) + return 0; + std::swap(L1, L2); L21 = L22 = 0; - } - else { - if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) - return 0; - std::swap(L1, L2); - L21 = L22 = 0; + } } + // Bail if LHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(LHSCC)) + return 0; + Value *R1 = RHS->getOperand(0); Value *R2 = RHS->getOperand(1); Value *R11,*R12; bool ok = false; - if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { - if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { - A = R11; D = R12; E = R2; ok = true; + if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + A = R12; D = R11; + } else { + return 0; } - else - if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + E = R2; R1 = 0; ok = true; + } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R2; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { A = R12; D = R11; E = R2; ok = true; } } + + // Bail if RHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(RHSCC)) + return 0; + + // Look for ANDs in on the right side of the RHS icmp. if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { - if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { - A = R11; D = R12; E = R1; ok = true; - } - else - if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R1; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { A = R12; D = R11; E = R1; ok = true; - } - else + } else { return 0; + } } if (!ok) return 0; @@ -582,7 +633,11 @@ ICmpInst::Predicate NEWCC, llvm::InstCombiner::BuilderTy* Builder) { Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; - unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS); + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, + LHSCC, RHSCC); + assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && + "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); if (mask == 0) return 0; if (NEWCC == ICmpInst::ICMP_NE) @@ -631,11 +686,11 @@ ConstantInt *CCst = dyn_cast(C); if (CCst == 0) return 0; - if (LHS->getPredicate() != NEWCC) + if (LHSCC != NEWCC) CCst = dyn_cast( ConstantExpr::getXor(BCst, CCst) ); ConstantInt *ECst = dyn_cast(E); if (ECst == 0) return 0; - if (RHS->getPredicate() != NEWCC) + if (RHSCC != NEWCC) ECst = dyn_cast( ConstantExpr::getXor(DCst, ECst) ); ConstantInt* MCst = dyn_cast( ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), @@ -694,18 +749,6 @@ Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) - if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { - Value *NewAnd = Builder->CreateAnd(Val, Val2); - return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); - } - - // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) - if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); - } } // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 @@ -744,21 +787,6 @@ } } - // (X & C) == 0 & X > -1 -> (X & (C | SignBit)) == 0 - if ((LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero() && - RHSCC == ICmpInst::ICMP_SGT && RHSCst->isAllOnesValue()) || - (RHSCC == ICmpInst::ICMP_EQ && RHSCst->isZero() && - LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue())) { - ICmpInst *I = LHSCC == ICmpInst::ICMP_EQ ? LHS : RHS; - Value *X; ConstantInt *C; - if (I->hasOneUse() && - match(I->getOperand(0), m_OneUse(m_And(m_Value(X), m_ConstantInt(C))))){ - APInt New = C->getValue() | APInt::getSignBit(C->getBitWidth()); - return Builder->CreateICmpEQ(Builder->CreateAnd(X, Builder->getInt(New)), - I->getOperand(1)); - } - } - // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; @@ -1443,33 +1471,6 @@ Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) - if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); - } - - // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) - if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { - Value *NewAnd = Builder->CreateAnd(Val, Val2); - return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); - } - } - - // (X & C) != 0 | X < 0 -> (X & (C | SignBit)) != 0 - if ((LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero() && - RHSCC == ICmpInst::ICMP_SLT && RHSCst->isZero()) || - (RHSCC == ICmpInst::ICMP_NE && RHSCst->isZero() && - LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero())) { - ICmpInst *I = LHSCC == ICmpInst::ICMP_NE ? LHS : RHS; - Value *X; ConstantInt *C; - if (I->hasOneUse() && - match(I->getOperand(0), m_OneUse(m_And(m_Value(X), m_ConstantInt(C))))){ - APInt New = C->getValue() | APInt::getSignBit(C->getBitWidth()); - return Builder->CreateICmpNE(Builder->CreateAnd(X, Builder->getInt(New)), - I->getOperand(1)); - } } // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) Modified: llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll?rev=147777&r1=147776&r2=147777&view=diff ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll (original) +++ llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll Mon Jan 9 11:23:27 2012 @@ -157,3 +157,23 @@ if.end: ret void } + +define void @test9(i32 %a) nounwind { + %1 = and i32 %a, 1073741824 + %2 = icmp ne i32 %1, 0 + %3 = icmp sgt i32 %a, -1 + %or.cond = and i1 %2, %3 + br i1 %or.cond, label %if.then, label %if.end + +; CHECK: @test9 +; CHECK-NEXT: %1 = and i32 %a, -1073741824 +; CHECK-NEXT: %2 = icmp eq i32 %1, 1073741824 +; CHECK-NEXT: br i1 %2, label %if.then, label %if.end + +if.then: + tail call void @foo() nounwind + ret void + +if.end: + ret void +} From resistor at mac.com Mon Jan 9 11:32:48 2012 From: resistor at mac.com (Owen Anderson) Date: Mon, 09 Jan 2012 09:32:48 -0800 Subject: [llvm-commits] PATCH: Teach the x86 backend to fold mask & shift patterns into an addressing mode computation In-Reply-To: References: <4F09A1A0.5040508@free.fr> <7DE70FDACDE4CD4887C4278C12A2E30505B406@HASMSX104.ger.corp.intel.com> Message-ID: <0AE9804F-C89F-4EBA-B1F2-61E5339BF683@mac.com> On Jan 9, 2012, at 4:20 AM, Chandler Carruth wrote: > > + SDValue NewShiftAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); > > Please use getShiftAmountTy. > > None of the other address mode computations use it. =/ It seems like goodness, but it requires threading a TargetLowing handle into this code. I've got 4 or 5 more patches queued up behind this that changing the interface in that way would make kind of annoying. I'd prefer to clean up all of the i8 shift types in one pass afterward. That sound good to you? This isn't necessary. This file is X86-specific (and isn't trying to handle vector shifts), so the shift type is fixed to MVT::i8. getShiftAmountTy() is only needed in target-independent parts of codegen where you don't know what the shift type will be. --Owen -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/e251f2e3/attachment.html From benny.kra at googlemail.com Mon Jan 9 11:33:30 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Mon, 9 Jan 2012 18:33:30 +0100 Subject: [llvm-commits] [llvm] r147749 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp test/Transforms/InstCombine/sign-test-and-or.ll In-Reply-To: <4F0A350D.4080800@gmail.com> References: <20120108183224.701CE2A6C12C@llvm.org> <4F0A350D.4080800@gmail.com> Message-ID: <6D2D8EF2-AD58-49D5-BC3D-2B8C9BA7BC2E@googlemail.com> On 09.01.2012, at 01:30, Dirk Steinke wrote: > On 01/08/2012 11:50 PM, Eli Friedman wrote: >> On Sun, Jan 8, 2012 at 10:32 AM, Benjamin Kramer >> wrote: >>> Author: d0k >>> Date: Sun Jan 8 12:32:24 2012 >>> New Revision: 147749 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=147749&view=rev >>> Log: >>> InstCombine: If we have a bit test and a sign test anded/ored together, merge the sign bit into the bit test. > [snip] >> Err, shouldn't this be "(X & C) != 0 | X < 0"? It looks like the >> actual transformation is correct, just the comment is wrong. I fixed that in a followup, stupid copy&paste. >> On a more general note, could this code be combined with the code that >> transforms "(X& C1) == 0& (X& C2) == 0 -> (X& (C1|C2)) == 0"? >> It's essentially the same transformation; it's just that there's a >> more canonical way of writing "(X& SignBit) == 0". Should be pretty >> straightforward to write an "isAnyBitSetComparison()" helper. (Could >> also easily add some other potentially interesting transformations, >> like "(X> >> -Eli > > The code for "(X & C1) == 0 & (X & C2) == 0 -> (X & (C1|C2)) == 0" can actually combine a lot more bitpattern comparisons into a single icmp instruction (like (X & 5) == 0 & (X & 6) == 2 -> (X & 7) == 2). > And it handles the inversed case as well: > (X & 5) != 0 | (X & 6) != 2 -> (X & 7) != 2. > > It currently only accepts instructions of the form A & B == C, or A & B != C. If the function to check this (foldLogOpOfMaskedICmpsHelper) was appropriately adapted, the code should also be able to handle > other transformations, which are currently special cased, > like > (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) > or > (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 > (I guess, a helper function for the output icmp would be in order, so that the code would not generate (icmp eq (X & signbit), 0), but (icmp sgt X,-1).) I extended foldLogOpOfMaskedICmpsHelper to handle (icmp sgt X, -1) and (icmp slt X, 0) in r147777. Adding more patterns should be straightforward now. - Ben > The bad thing about the current approach is, that the code only ever tries to combine icmps, which are direct parameters to the same "and" > (or "or") instruction. If we have something like (icmp A & icmp B) & icmp C, > A and C will never be combined, unless A and B could be combined first. From stoklund at 2pi.dk Mon Jan 9 11:36:46 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 09 Jan 2012 09:36:46 -0800 Subject: [llvm-commits] [llvm] r147765 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td In-Reply-To: <20120109050702.026E22A6C12C@llvm.org> References: <20120109050702.026E22A6C12C@llvm.org> Message-ID: <1DE32319-5A30-405E-9781-E1350C36C8C9@2pi.dk> On Jan 8, 2012, at 9:07 PM, Craig Topper wrote: > Author: ctopper > Date: Sun Jan 8 23:07:01 2012 > New Revision: 147765 > > URL: http://llvm.org/viewvc/llvm-project?rev=147765&view=rev > Log: > Move SSE2 logical operations PAND/POR/PXOR/PANDN above SSE1 logical operations ANDPS/ORPS/XORPS/ANDNPS. This fixes a pattern ordering issue that meant that the SSE2 instructions could never be directly selected since the SSE1 patterns would always match first. This is largely moot with the ExeDepsFix pass, but I'm trying to audit for all such ordering issues. Ordering? The order of patterns in .td files is not significant, AFAIK. /jakob From benny.kra at googlemail.com Mon Jan 9 11:36:29 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Mon, 09 Jan 2012 17:36:29 -0000 Subject: [llvm-commits] [llvm] r147779 - /llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Message-ID: <20120109173629.7BE211BE003@llvm.org> Author: d0k Date: Mon Jan 9 11:36:29 2012 New Revision: 147779 URL: http://llvm.org/viewvc/llvm-project?rev=147779&view=rev Log: Move assert to the right place. Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=147779&r1=147778&r2=147779&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Mon Jan 9 11:36:29 2012 @@ -636,9 +636,9 @@ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, LHSCC, RHSCC); + if (mask == 0) return 0; assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); - if (mask == 0) return 0; if (NEWCC == ICmpInst::ICMP_NE) mask >>= 1; // treat "Not"-states as normal states From craig.topper at gmail.com Mon Jan 9 11:44:09 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 9 Jan 2012 09:44:09 -0800 Subject: [llvm-commits] [llvm] r147765 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td In-Reply-To: <1DE32319-5A30-405E-9781-E1350C36C8C9@2pi.dk> References: <20120109050702.026E22A6C12C@llvm.org> <1DE32319-5A30-405E-9781-E1350C36C8C9@2pi.dk> Message-ID: When patterns are collapsed the ordering of predicate checks is maintained. Not sure whether that's a feature or not, but it is what's making AVX2 broadcast instructions for integer vectors preferred over the FP versions. Maybe some other cases too. On Mon, Jan 9, 2012 at 9:36 AM, Jakob Stoklund Olesen wrote: > > On Jan 8, 2012, at 9:07 PM, Craig Topper wrote: > > > Author: ctopper > > Date: Sun Jan 8 23:07:01 2012 > > New Revision: 147765 > > > > URL: http://llvm.org/viewvc/llvm-project?rev=147765&view=rev > > Log: > > Move SSE2 logical operations PAND/POR/PXOR/PANDN above SSE1 logical > operations ANDPS/ORPS/XORPS/ANDNPS. This fixes a pattern ordering issue > that meant that the SSE2 instructions could never be directly selected > since the SSE1 patterns would always match first. This is largely moot with > the ExeDepsFix pass, but I'm trying to audit for all such ordering issues. > > Ordering? > > The order of patterns in .td files is not significant, AFAIK. > > /jakob > > -- ~Craig -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/4cd8173d/attachment.html From dpatel at apple.com Mon Jan 9 11:49:48 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 09 Jan 2012 17:49:48 -0000 Subject: [llvm-commits] [llvm] r147781 - /llvm/trunk/lib/Analysis/DIBuilder.cpp Message-ID: <20120109174948.247CE1BE003@llvm.org> Author: dpatel Date: Mon Jan 9 11:49:47 2012 New Revision: 147781 URL: http://llvm.org/viewvc/llvm-project?rev=147781&view=rev Log: Update language check. Do not ignore DW_LANG_Python. Patch by Joe Groff! Modified: llvm/trunk/lib/Analysis/DIBuilder.cpp Modified: llvm/trunk/lib/Analysis/DIBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DIBuilder.cpp?rev=147781&r1=147780&r2=147781&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/DIBuilder.cpp (original) +++ llvm/trunk/lib/Analysis/DIBuilder.cpp Mon Jan 9 11:49:47 2012 @@ -76,7 +76,8 @@ StringRef Directory, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RunTimeVer) { - assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89 + assert ((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89 + || Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user) && "Invalid Language tag"); assert (!Filename.empty() && "Unable to create compile unit without filename"); From dpatel at apple.com Mon Jan 9 11:53:40 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 09 Jan 2012 09:53:40 -0800 Subject: [llvm-commits] Path lib/Analysis/DIBuilder.cpp to admit Python, user dwarf language tags In-Reply-To: References: Message-ID: Looks good. I applied your patch. r147781. Thanks! - Devang On Jan 7, 2012, at 10:14 AM, Joe Groff wrote: > Hi everyone. In LLVM 3.0, DIBuilder incorrectly rejects DW_LANG_Python > and user DWARF tags. Attached is a patch against vanilla LLVM 3.0 that > fixes the problem. Let me know if anything needs to be changed. > Thanks! > > -Joe > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From stoklund at 2pi.dk Mon Jan 9 11:54:48 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 09 Jan 2012 09:54:48 -0800 Subject: [llvm-commits] [llvm] r147765 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td In-Reply-To: References: <20120109050702.026E22A6C12C@llvm.org> <1DE32319-5A30-405E-9781-E1350C36C8C9@2pi.dk> Message-ID: On Jan 9, 2012, at 9:44 AM, Craig Topper wrote: > When patterns are collapsed the ordering of predicate checks is maintained. Not sure whether that's a feature or not, but it is what's making AVX2 broadcast instructions for integer vectors preferred over the FP versions. Maybe some other cases too. TableGen orders records alphabetically: class RecordKeeper { std::map Classes, Defs; There is a source location in Record, but it is only used for error reporting AFAICT. It is possible that the names invented for anonymous defs are source order related, but you shouldn't depend on it. It is much too fragile. /jakob From jan_sjodin at yahoo.com Mon Jan 9 11:58:14 2012 From: jan_sjodin at yahoo.com (Jan Sjodin) Date: Mon, 9 Jan 2012 09:58:14 -0800 (PST) Subject: [llvm-commits] XOP Intrinsics patch In-Reply-To: References: <1325607159.50862.YahooMailNeo@web161503.mail.bf1.yahoo.com> <1325620848.29233.YahooMailNeo@web161506.mail.bf1.yahoo.com> Message-ID: <1326131894.43865.YahooMailNeo@web161504.mail.bf1.yahoo.com> Finally got back to fixing this. I fixed the memops and refactored most patterns to include the intrinsics, not sure how to do vpcom and vpcmov so I kept them as-is. Added memory access tests for each unique kind of instruction for the intrinsics. - Jan >________________________________ > From: Craig Topper >To: Jan Sjodin >Cc: "llvm-commits at cs.uiuc.edu" >Sent: Tuesday, January 3, 2012 4:23 PM >Subject: Re: [llvm-commits] XOP Intrinsics patch > > >On Tue, Jan 3, 2012 at 12:00 PM, Jan Sjodin wrote: > >>All integer vector loads in patterns need to be memopv2i64(!28-bit) or memopv4i64(256-bit) and bitcasted to the correct type because all integer vector loads are promoted to v2i64 or v4i64. So for instance >> >>> >>>+def : Pat<(int_x86_xop_vphsubwd (alignedloadv8i16 addr:$src1)), >>>+????????? (VPHSUBWDrm addr:$src1)>; >>> >>>Needs to be >>> >>>+def : Pat<(int_x86_xop_vphsubwd (bc_v8i16 (memopv2i64 addr:$src1))), >>>+????????? (VPHSUBWDrm addr:$src1)>; >>> >> >>If the promotion always happens, maybe it would be best to remove patterns like: >> >>def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>; >>def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; >> >> >>- Jan >> >> >I had meant to do that after I was sure they were all gone. I'll try to check on that tonight and see if they can be removed now. > >-- >~Craig > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/a9fc603c/attachment-0001.html -------------- next part -------------- A non-text attachment was scrubbed... Name: 0061_xop_intrinsics_reworked.patch Type: application/octet-stream Size: 111356 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/a9fc603c/attachment-0001.obj From dpatel at apple.com Mon Jan 9 11:59:27 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 09 Jan 2012 09:59:27 -0800 Subject: [llvm-commits] [llvm] r147751 - /llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp In-Reply-To: <20120108195228.59B3C2A6C12C@llvm.org> References: <20120108195228.59B3C2A6C12C@llvm.org> Message-ID: <07DBA048-7B68-482E-8EAA-75F13BDE064C@apple.com> On Jan 8, 2012, at 11:52 AM, Evan Cheng wrote: > Author: evancheng > Date: Sun Jan 8 13:52:28 2012 > New Revision: 147751 > > URL: http://llvm.org/viewvc/llvm-project?rev=147751&view=rev > Log: > Avoid eraseing copies from a reserved register unless the definition can be > safely proven not to have been clobbered. No small test case possible. > > Modified: > llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp > > Modified: llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp?rev=147751&r1=147750&r2=147751&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp (original) > +++ llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp Sun Jan 8 13:52:28 2012 > @@ -83,6 +83,25 @@ > } > } > > +static bool NoInterveningSideEffect(const MachineInstr *CopyMI, > + const MachineInstr *MI) { Please add comment before the function. It'd be good idea to follow coding standard for new pass and start function names with lower case letter. - Devang > + const MachineBasicBlock *MBB = CopyMI->getParent(); > + if (MI->getParent() != MBB) > + return false; > + MachineBasicBlock::const_iterator I = CopyMI; > + MachineBasicBlock::const_iterator E = MBB->end(); > + MachineBasicBlock::const_iterator E2 = MI; > + > + ++I; > + while (I != E && I != E2) { > + if (I->hasUnmodeledSideEffects() || I->isCall() || > + I->isTerminator()) > + return false; > + ++I; > + } > + return true; > +} > + > bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { > SmallSetVector MaybeDeadCopies; // Candidates for deletion > DenseMap AvailCopyMap; // Def -> available copies map > @@ -108,6 +127,7 @@ > MachineInstr *CopyMI = CI->second; > unsigned SrcSrc = CopyMI->getOperand(1).getReg(); > if (!ReservedRegs.test(Def) && > + (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && > (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) { > // The two copies cancel out and the source of the first copy > // hasn't been overridden, eliminate the second one. e.g. > @@ -116,6 +136,12 @@ > // %EAX = COPY %ECX > // => > // %ECX = COPY %EAX > + // > + // Also avoid eliminating a copy from reserved registers unless the > + // definition is proven not clobbered. e.g. > + // %RSP = COPY %RAX > + // CALL > + // %RAX = COPY %RSP > CopyMI->getOperand(1).setIsKill(false); > MI->eraseFromParent(); > Changed = true; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From craig.topper at gmail.com Mon Jan 9 12:00:12 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 9 Jan 2012 10:00:12 -0800 Subject: [llvm-commits] [llvm] r147765 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td In-Reply-To: References: <20120109050702.026E22A6C12C@llvm.org> <1DE32319-5A30-405E-9781-E1350C36C8C9@2pi.dk> Message-ID: Ordering seems to be maintained even for patterns declared with instructions and those wouldn't get an anonymous def would they? On Mon, Jan 9, 2012 at 9:54 AM, Jakob Stoklund Olesen wrote: > > On Jan 9, 2012, at 9:44 AM, Craig Topper wrote: > > > When patterns are collapsed the ordering of predicate checks is > maintained. Not sure whether that's a feature or not, but it is what's > making AVX2 broadcast instructions for integer vectors preferred over the > FP versions. Maybe some other cases too. > > TableGen orders records alphabetically: > > class RecordKeeper { > std::map Classes, Defs; > > There is a source location in Record, but it is only used for error > reporting AFAICT. > > It is possible that the names invented for anonymous defs are source order > related, but you shouldn't depend on it. It is much too fragile. > > /jakob > > -- ~Craig -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120109/41c4b06f/attachment.html From STPWORLD at narod.ru Mon Jan 9 12:00:54 2012 From: STPWORLD at narod.ru (Stepan Dyatkovskiy) Date: Mon, 09 Jan 2012 22:00:54 +0400 Subject: [llvm-commits] [LLVM, opt, LoopUnswitch] Compile-time improvements. In-Reply-To: <885601325664968@web89.yandex.ru> References: <4EFDAB2C.5000606@narod.ru> <640641325498007@web20.yandex.ru> <200741325616644@web103.yandex.ru> <885601325664968@web89.yandex.ru> Message-ID: <478721326132054@web78.yandex.ru> Ping. -Stepan. 04.01.2012, 12:16, "Stepan Dyatkovskiy" : > Ping. > -Stepan > > 03.01.2012, 22:50, "Stepan Dyatkovskiy" : > >> ?Ping. >> ?-Stepan. >> >> ?02.01.2012, 13:53, "Stepan Dyatkovskiy" : >>> ??ping. >>> ??-Stepan. >>> >>> ??30.12.2011, 16:14, "Stepan Dyatkovskiy" : >>>> ???Hi. A made some fixes that improves compile-time: >>>> >>>> ???1. Size heuristics changed. Now we calculate number of unswitching >>>> ???branches only once per loop. >>>> ???2. Some checks was moved from UnswitchIfProfitable to >>>> ???processCurrentLoop, since it is not changed during processCurrentLoop >>>> ???iteration. It allows decide to skip some loops at an early stage. >>>> >>>> ???I checked the compile-time on test >>>> >>>> ???MultiSource/Benchmarks/Prolangs-C++/shapes/shapes >>>> ???(there was compile time regression after my previous patch). >>>> >>>> ???Relative to my previous patch the compile-time improved on ~8.5%. Relative >>>> ???to old revisions (before r146578) the compile time is improved on ~2%. >>>> >>>> ???Please find the patch in attachment for review. >>>> >>>> ???-Stepan. >>>> >>>> ???_______________________________________________ >>>> ???llvm-commits mailing list >>>> ???llvm-commits at cs.uiuc.edu >>>> ???http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From stoklund at 2pi.dk Mon Jan 9 12:15:32 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 09 Jan 2012 10:15:32 -0800 Subject: [llvm-commits] [llvm] r147765 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td In-Reply-To: References: <20120109050702.026E22A6C12C@llvm.org> <1DE32319-5A30-405E-9781-E1350C36C8C9@2pi.dk> Message-ID: <41C4E4C6-B55C-4B27-ADBE-57FF894BED88@2pi.dk> On Jan 9, 2012, at 10:00 AM, Craig Topper wrote: > Ordering seems to be maintained even for patterns declared with instructions and those wouldn't get an anonymous def would they? See PatternSortingPredicate in DAGISelEmitter.cpp. The very last tie-breaker criterion is the Record ID, so that would be equivalent to source ordering. It is very fragile to depend on this for correctness, it only kicks in when all the other criterions are identical. I think it would be safer to use AddedComplexity. /jakob From xerxes at zafena.se Mon Jan 9 12:56:16 2012 From: xerxes at zafena.se (=?ISO-8859-1?Q?Xerxes_R=E5nby?=) Date: Mon, 09 Jan 2012 19:56:16 +0100 Subject: [llvm-commits] [PATCH] ELFObjectFile with dynamic loading support In-Reply-To: <9BBE4537D1BAAB479E9E8F9D4234619D07D484@HASMSX103.ger.corp.intel.com> References: <9BBE4537D1BAAB479E9E8F9D4234619D07D484@HASMSX103.ger.corp.intel.com> Message-ID: <4F0B3850.8010009@zafena.se> 2012-01-09 16:15, Bendersky, Eli skrev: > Hello, Following the email I sent to LLVMdev earlier today (http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046671.html), please find attached the first patch in the MCJIT/ELF series. It presents a subclass of ELFObjectFile, named DyldELFObject, which supports basic dynamic loading. This class is used by MCJIT/ELF to load an ELF image generated by MC into memory and executing it. > > Please note that there are no stand-alone tests for this class yet. It is being tested extensively in the ExecutionEngine tests run on MCJIT/ELF, which will be part of the next patch in the series, once this one is accepted and committed. > > Thanks in advance, > > Eli Hi Eli! Thank you and your team for working on MCJIT/ELF support! +// Walk through the ELF headers, updating virtual addresses to reflect where +// the object is currently loaded in memory +template +void DyldELFObject::rebaseObject( ... + else { + if (sec->sh_flags & ELF::SHF_WRITE) { + // TODO: setRangeWritable + } + if (sec->sh_flags & ELF::SHF_EXECINSTR) { + // TODO: setRangeExecutable + } It would be nice if these two TODOs got replaced by report_fatal_error("Unimplemented DyldELFObject setRangeWritable"); and report_fatal_error("Unimplemented DyldELFObject setRangeExecutable"); to prevent more obscure errors to sneak in if someone expect the two flags to have effect. Cheers Xerxes From kcc at google.com Mon Jan 9 12:53:15 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 18:53:15 -0000 Subject: [llvm-commits] [compiler-rt] r147784 - in /compiler-rt/trunk/lib/asan: asan_interceptors.cc asan_interceptors.h asan_internal.h asan_linux.cc asan_mac.cc asan_rtl.cc Message-ID: <20120109185315.7D0551BE003@llvm.org> Author: kcc Date: Mon Jan 9 12:53:15 2012 New Revision: 147784 URL: http://llvm.org/viewvc/llvm-project?rev=147784&view=rev Log: [asan] refactoring: move all interceptors to a single file Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/asan_interceptors.h compiler-rt/trunk/lib/asan/asan_internal.h compiler-rt/trunk/lib/asan/asan_linux.cc compiler-rt/trunk/lib/asan/asan_mac.cc compiler-rt/trunk/lib/asan/asan_rtl.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=147784&r1=147783&r2=147784&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 9 12:53:15 2012 @@ -19,7 +19,9 @@ #include "asan_mapping.h" #include "asan_stack.h" #include "asan_stats.h" +#include "asan_thread_registry.h" +#include #include #include #include @@ -27,6 +29,29 @@ namespace __asan { +typedef void (*longjmp_f)(void *env, int val); +typedef longjmp_f _longjmp_f; +typedef longjmp_f siglongjmp_f; +typedef void (*__cxa_throw_f)(void *, void *, void *); +typedef int (*pthread_create_f)(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine) (void *), void *arg); +#ifdef __APPLE__ +dispatch_async_f_f real_dispatch_async_f; +dispatch_sync_f_f real_dispatch_sync_f; +dispatch_after_f_f real_dispatch_after_f; +dispatch_barrier_async_f_f real_dispatch_barrier_async_f; +dispatch_group_async_f_f real_dispatch_group_async_f; +pthread_workqueue_additem_np_f real_pthread_workqueue_additem_np; +#endif + +sigaction_f real_sigaction; +signal_f real_signal; +longjmp_f real_longjmp; +_longjmp_f real__longjmp; +siglongjmp_f real_siglongjmp; +__cxa_throw_f real___cxa_throw; +pthread_create_f real_pthread_create; + index_f real_index; memcmp_f real_memcmp; memcpy_f real_memcpy; @@ -156,6 +181,32 @@ INTERCEPT_FUNCTION(strncasecmp); INTERCEPT_FUNCTION(strncmp); INTERCEPT_FUNCTION(strncpy); + + INTERCEPT_FUNCTION(sigaction); + INTERCEPT_FUNCTION(signal); + INTERCEPT_FUNCTION(longjmp); + INTERCEPT_FUNCTION(_longjmp); + INTERCEPT_FUNCTION_IF_EXISTS(__cxa_throw); + INTERCEPT_FUNCTION(pthread_create); + +#ifdef __APPLE__ + INTERCEPT_FUNCTION(dispatch_async_f); + INTERCEPT_FUNCTION(dispatch_sync_f); + INTERCEPT_FUNCTION(dispatch_after_f); + INTERCEPT_FUNCTION(dispatch_barrier_async_f); + INTERCEPT_FUNCTION(dispatch_group_async_f); + // We don't need to intercept pthread_workqueue_additem_np() to support the + // libdispatch API, but it helps us to debug the unsupported functions. Let's + // intercept it only during verbose runs. + if (FLAG_v >= 2) { + INTERCEPT_FUNCTION(pthread_workqueue_additem_np); + } +#else + // On Darwin siglongjmp tailcalls longjmp, so we don't want to intercept it + // there. + INTERCEPT_FUNCTION(siglongjmp); +#endif + #ifndef __APPLE__ INTERCEPT_FUNCTION(strnlen); #endif @@ -169,6 +220,136 @@ // ---------------------- Wrappers ---------------- {{{1 using namespace __asan; // NOLINT +#define OPERATOR_NEW_BODY \ + GET_STACK_TRACE_HERE_FOR_MALLOC;\ + return asan_memalign(0, size, &stack); + +#ifdef ANDROID +void *operator new(size_t size) { OPERATOR_NEW_BODY; } +void *operator new[](size_t size) { OPERATOR_NEW_BODY; } +#else +void *operator new(size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; } +void *operator new[](size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; } +void *operator new(size_t size, std::nothrow_t const&) throw() +{ OPERATOR_NEW_BODY; } +void *operator new[](size_t size, std::nothrow_t const&) throw() +{ OPERATOR_NEW_BODY; } +#endif + +#define OPERATOR_DELETE_BODY \ + GET_STACK_TRACE_HERE_FOR_FREE(ptr);\ + asan_free(ptr, &stack); + +void operator delete(void *ptr) throw() { OPERATOR_DELETE_BODY; } +void operator delete[](void *ptr) throw() { OPERATOR_DELETE_BODY; } +void operator delete(void *ptr, std::nothrow_t const&) throw() +{ OPERATOR_DELETE_BODY; } +void operator delete[](void *ptr, std::nothrow_t const&) throw() +{ OPERATOR_DELETE_BODY;} + +static void *asan_thread_start(void *arg) { + AsanThread *t = (AsanThread*)arg; + asanThreadRegistry().SetCurrent(t); + return t->ThreadStart(); +} + +extern "C" +#ifndef __APPLE__ +__attribute__((visibility("default"))) +#endif +int WRAP(pthread_create)(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine) (void *), void *arg) { + GET_STACK_TRACE_HERE(kStackTraceMax, /*fast_unwind*/false); + AsanThread *curr_thread = asanThreadRegistry().GetCurrent(); + CHECK(curr_thread || asanThreadRegistry().IsCurrentThreadDying()); + int current_tid = asanThreadRegistry().GetCurrentTidOrMinusOne(); + AsanThread *t = AsanThread::Create(current_tid, start_routine, arg); + asanThreadRegistry().RegisterThread(t, current_tid, &stack); + return real_pthread_create(thread, attr, asan_thread_start, t); +} + +extern "C" +void *WRAP(signal)(int signum, void *handler) { + if (!AsanInterceptsSignal(signum)) { + return real_signal(signum, handler); + } + return NULL; +} + +extern "C" +int WRAP(sigaction)(int signum, const struct sigaction *act, + struct sigaction *oldact) { + if (!AsanInterceptsSignal(signum)) { + return real_sigaction(signum, act, oldact); + } + return 0; +} + + +static void UnpoisonStackFromHereToTop() { + int local_stack; + AsanThread *curr_thread = asanThreadRegistry().GetCurrent(); + CHECK(curr_thread); + uintptr_t top = curr_thread->stack_top(); + uintptr_t bottom = ((uintptr_t)&local_stack - kPageSize) & ~(kPageSize-1); + PoisonShadow(bottom, top - bottom, 0); +} + +extern "C" void WRAP(longjmp)(void *env, int val) { + UnpoisonStackFromHereToTop(); + real_longjmp(env, val); +} + +extern "C" void WRAP(_longjmp)(void *env, int val) { + UnpoisonStackFromHereToTop(); + real__longjmp(env, val); +} + +extern "C" void WRAP(siglongjmp)(void *env, int val) { + UnpoisonStackFromHereToTop(); + real_siglongjmp(env, val); +} + +extern "C" void __cxa_throw(void *a, void *b, void *c); + +#if ASAN_HAS_EXCEPTIONS == 1 +extern "C" void WRAP(__cxa_throw)(void *a, void *b, void *c) { + CHECK(&real___cxa_throw); + UnpoisonStackFromHereToTop(); + real___cxa_throw(a, b, c); +} +#endif + +extern "C" { +// intercept mlock and friends. +// Since asan maps 16T of RAM, mlock is completely unfriendly to asan. +// All functions return 0 (success). +static void MlockIsUnsupported() { + static bool printed = 0; + if (printed) return; + printed = true; + Printf("INFO: AddressSanitizer ignores mlock/mlockall/munlock/munlockall\n"); +} +int mlock(const void *addr, size_t len) { + MlockIsUnsupported(); + return 0; +} +int munlock(const void *addr, size_t len) { + MlockIsUnsupported(); + return 0; +} +int mlockall(int flags) { + MlockIsUnsupported(); + return 0; +} +int munlockall(void) { + MlockIsUnsupported(); + return 0; +} +} // extern "C" + + + static inline int CharCmp(unsigned char c1, unsigned char c2) { return (c1 == c2) ? 0 : (c1 < c2) ? -1 : 1; } Modified: compiler-rt/trunk/lib/asan/asan_interceptors.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.h?rev=147784&r1=147783&r2=147784&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.h (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.h Mon Jan 9 12:53:15 2012 @@ -101,6 +101,8 @@ typedef int (*strncmp_f)(const char *s1, const char *s2, size_t size); typedef char* (*strncpy_f)(char *to, const char *from, size_t size); typedef size_t (*strnlen_f)(const char *s, size_t maxlen); +typedef void *(*signal_f)(int signum, void *handler); +typedef int (*sigaction_f)(int signum, const void *act, void *oldact); // __asan::real_X() holds pointer to library implementation of X(). extern index_f real_index; @@ -119,6 +121,8 @@ extern strncmp_f real_strncmp; extern strncpy_f real_strncpy; extern strnlen_f real_strnlen; +extern signal_f real_signal; +extern sigaction_f real_sigaction; // __asan::internal_X() is the implementation of X() for use in RTL. size_t internal_strlen(const char *s); Modified: compiler-rt/trunk/lib/asan/asan_internal.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_internal.h?rev=147784&r1=147783&r2=147784&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_internal.h (original) +++ compiler-rt/trunk/lib/asan/asan_internal.h Mon Jan 9 12:53:15 2012 @@ -106,6 +106,8 @@ ssize_t AsanWrite(int fd, const void *buf, size_t count); int AsanClose(int fd); +bool AsanInterceptsSignal(int signum); + // Opens the file 'file_name" and reads up to 'max_len' bytes. // The resulting buffer is mmaped and stored in '*buff'. // The size of the mmaped region is stored in '*buff_size', @@ -151,6 +153,7 @@ extern size_t FLAG_max_malloc_fill_size; extern int FLAG_exitcode; extern bool FLAG_allow_user_poisoning; +extern bool FLAG_handle_segv; extern int asan_inited; // Used to avoid infinite recursion in __asan_init(). Modified: compiler-rt/trunk/lib/asan/asan_linux.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_linux.cc?rev=147784&r1=147783&r2=147784&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_linux.cc (original) +++ compiler-rt/trunk/lib/asan/asan_linux.cc Mon Jan 9 12:53:15 2012 @@ -64,6 +64,10 @@ #endif } +bool AsanInterceptsSignal(int signum) { + return signum == SIGSEGV && FLAG_handle_segv; +} + static void *asan_mmap(void *addr, size_t length, int prot, int flags, int fd, uint64_t offset) { # if __WORDSIZE == 64 Modified: compiler-rt/trunk/lib/asan/asan_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.cc?rev=147784&r1=147783&r2=147784&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_mac.cc Mon Jan 9 12:53:15 2012 @@ -56,6 +56,10 @@ return NULL; } +bool AsanInterceptsSignal(int signum) { + return (signum == SIGSEGV || signum == SIGBUS) && FLAG_handle_segv; +} + static void *asan_mmap(void *addr, size_t length, int prot, int flags, int fd, uint64_t offset) { return mmap(addr, length, prot, flags, fd, offset); Modified: compiler-rt/trunk/lib/asan/asan_rtl.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_rtl.cc?rev=147784&r1=147783&r2=147784&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_rtl.cc (original) +++ compiler-rt/trunk/lib/asan/asan_rtl.cc Mon Jan 9 12:53:15 2012 @@ -24,19 +24,9 @@ #include "asan_thread.h" #include "asan_thread_registry.h" -#include -#include -#include #include #include -#include -#include -#include -#include #include -#include -#include -// must not include on Linux namespace __asan { @@ -69,33 +59,6 @@ int asan_inited; bool asan_init_is_running; -// -------------------------- Interceptors ---------------- {{{1 -typedef int (*sigaction_f)(int signum, const struct sigaction *act, - struct sigaction *oldact); -typedef sig_t (*signal_f)(int signum, sig_t handler); -typedef void (*longjmp_f)(void *env, int val); -typedef longjmp_f _longjmp_f; -typedef longjmp_f siglongjmp_f; -typedef void (*__cxa_throw_f)(void *, void *, void *); -typedef int (*pthread_create_f)(pthread_t *thread, const pthread_attr_t *attr, - void *(*start_routine) (void *), void *arg); -#ifdef __APPLE__ -dispatch_async_f_f real_dispatch_async_f; -dispatch_sync_f_f real_dispatch_sync_f; -dispatch_after_f_f real_dispatch_after_f; -dispatch_barrier_async_f_f real_dispatch_barrier_async_f; -dispatch_group_async_f_f real_dispatch_group_async_f; -pthread_workqueue_additem_np_f real_pthread_workqueue_additem_np; -#endif - -sigaction_f real_sigaction; -signal_f real_signal; -longjmp_f real_longjmp; -_longjmp_f real__longjmp; -siglongjmp_f real_siglongjmp; -__cxa_throw_f real___cxa_throw; -pthread_create_f real_pthread_create; - // -------------------------- Misc ---------------- {{{1 void ShowStatsAndAbort() { __asan_print_accumulated_stats(); @@ -161,11 +124,15 @@ return NULL; // Not found. } -// ---------------------- Thread ------------------------- {{{1 -static void *asan_thread_start(void *arg) { - AsanThread *t= (AsanThread*)arg; - asanThreadRegistry().SetCurrent(t); - return t->ThreadStart(); +static void MaybeInstallSigaction(int signum, + void (*handler)(int, siginfo_t *, void *)) { + if (!AsanInterceptsSignal(signum)) + return; + struct sigaction sigact; + real_memset(&sigact, 0, sizeof(sigact)); + sigact.sa_sigaction = handler; + sigact.sa_flags = SA_SIGINFO; + CHECK(0 == real_sigaction(signum, &sigact, 0)); } // ---------------------- mmap -------------------- {{{1 @@ -359,151 +326,9 @@ } // namespace __asan -// -------------------------- Interceptors ------------------- {{{1 +// ---------------------- Interface ---------------- {{{1 using namespace __asan; // NOLINT -#define OPERATOR_NEW_BODY \ - GET_STACK_TRACE_HERE_FOR_MALLOC;\ - return asan_memalign(0, size, &stack); - -#ifdef ANDROID -void *operator new(size_t size) { OPERATOR_NEW_BODY; } -void *operator new[](size_t size) { OPERATOR_NEW_BODY; } -#else -void *operator new(size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; } -void *operator new[](size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; } -void *operator new(size_t size, std::nothrow_t const&) throw() -{ OPERATOR_NEW_BODY; } -void *operator new[](size_t size, std::nothrow_t const&) throw() -{ OPERATOR_NEW_BODY; } -#endif - -#define OPERATOR_DELETE_BODY \ - GET_STACK_TRACE_HERE_FOR_FREE(ptr);\ - asan_free(ptr, &stack); - -void operator delete(void *ptr) throw() { OPERATOR_DELETE_BODY; } -void operator delete[](void *ptr) throw() { OPERATOR_DELETE_BODY; } -void operator delete(void *ptr, std::nothrow_t const&) throw() -{ OPERATOR_DELETE_BODY; } -void operator delete[](void *ptr, std::nothrow_t const&) throw() -{ OPERATOR_DELETE_BODY;} - -extern "C" -#ifndef __APPLE__ -__attribute__((visibility("default"))) -#endif -int WRAP(pthread_create)(pthread_t *thread, const pthread_attr_t *attr, - void *(*start_routine) (void *), void *arg) { - GET_STACK_TRACE_HERE(kStackTraceMax, /*fast_unwind*/false); - AsanThread *curr_thread = asanThreadRegistry().GetCurrent(); - CHECK(curr_thread || asanThreadRegistry().IsCurrentThreadDying()); - int current_tid = asanThreadRegistry().GetCurrentTidOrMinusOne(); - AsanThread *t = AsanThread::Create(current_tid, start_routine, arg); - asanThreadRegistry().RegisterThread(t, current_tid, &stack); - return real_pthread_create(thread, attr, asan_thread_start, t); -} - -static bool MySignal(int signum) { - if (FLAG_handle_segv && signum == SIGSEGV) return true; -#ifdef __APPLE__ - if (FLAG_handle_segv && signum == SIGBUS) return true; -#endif - return false; -} - -static void MaybeInstallSigaction(int signum, - void (*handler)(int, siginfo_t *, void *)) { - if (!MySignal(signum)) - return; - struct sigaction sigact; - real_memset(&sigact, 0, sizeof(sigact)); - sigact.sa_sigaction = handler; - sigact.sa_flags = SA_SIGINFO; - CHECK(0 == real_sigaction(signum, &sigact, 0)); -} - -extern "C" -sig_t WRAP(signal)(int signum, sig_t handler) { - if (!MySignal(signum)) { - return real_signal(signum, handler); - } - return NULL; -} - -extern "C" -int WRAP(sigaction)(int signum, const struct sigaction *act, - struct sigaction *oldact) { - if (!MySignal(signum)) { - return real_sigaction(signum, act, oldact); - } - return 0; -} - - -static void UnpoisonStackFromHereToTop() { - int local_stack; - AsanThread *curr_thread = asanThreadRegistry().GetCurrent(); - CHECK(curr_thread); - uintptr_t top = curr_thread->stack_top(); - uintptr_t bottom = ((uintptr_t)&local_stack - kPageSize) & ~(kPageSize-1); - PoisonShadow(bottom, top - bottom, 0); -} - -extern "C" void WRAP(longjmp)(void *env, int val) { - UnpoisonStackFromHereToTop(); - real_longjmp(env, val); -} - -extern "C" void WRAP(_longjmp)(void *env, int val) { - UnpoisonStackFromHereToTop(); - real__longjmp(env, val); -} - -extern "C" void WRAP(siglongjmp)(void *env, int val) { - UnpoisonStackFromHereToTop(); - real_siglongjmp(env, val); -} - -extern "C" void __cxa_throw(void *a, void *b, void *c); - -#if ASAN_HAS_EXCEPTIONS == 1 -extern "C" void WRAP(__cxa_throw)(void *a, void *b, void *c) { - CHECK(&real___cxa_throw); - UnpoisonStackFromHereToTop(); - real___cxa_throw(a, b, c); -} -#endif - -extern "C" { -// intercept mlock and friends. -// Since asan maps 16T of RAM, mlock is completely unfriendly to asan. -// All functions return 0 (success). -static void MlockIsUnsupported() { - static bool printed = 0; - if (printed) return; - printed = true; - Printf("INFO: AddressSanitizer ignores mlock/mlockall/munlock/munlockall\n"); -} -int mlock(const void *addr, size_t len) { - MlockIsUnsupported(); - return 0; -} -int munlock(const void *addr, size_t len) { - MlockIsUnsupported(); - return 0; -} -int mlockall(int flags) { - MlockIsUnsupported(); - return 0; -} -int munlockall(void) { - MlockIsUnsupported(); - return 0; -} -} // extern "C" - -// ---------------------- Interface ---------------- {{{1 int __asan_set_error_exit_code(int exit_code) { int old = FLAG_exitcode; FLAG_exitcode = exit_code; @@ -655,30 +480,6 @@ ReplaceSystemMalloc(); - INTERCEPT_FUNCTION(sigaction); - INTERCEPT_FUNCTION(signal); - INTERCEPT_FUNCTION(longjmp); - INTERCEPT_FUNCTION(_longjmp); - INTERCEPT_FUNCTION_IF_EXISTS(__cxa_throw); - INTERCEPT_FUNCTION(pthread_create); -#ifdef __APPLE__ - INTERCEPT_FUNCTION(dispatch_async_f); - INTERCEPT_FUNCTION(dispatch_sync_f); - INTERCEPT_FUNCTION(dispatch_after_f); - INTERCEPT_FUNCTION(dispatch_barrier_async_f); - INTERCEPT_FUNCTION(dispatch_group_async_f); - // We don't need to intercept pthread_workqueue_additem_np() to support the - // libdispatch API, but it helps us to debug the unsupported functions. Let's - // intercept it only during verbose runs. - if (FLAG_v >= 2) { - INTERCEPT_FUNCTION(pthread_workqueue_additem_np); - } -#else - // On Darwin siglongjmp tailcalls longjmp, so we don't want to intercept it - // there. - INTERCEPT_FUNCTION(siglongjmp); -#endif - MaybeInstallSigaction(SIGSEGV, ASAN_OnSIGSEGV); MaybeInstallSigaction(SIGBUS, ASAN_OnSIGSEGV); From atrick at apple.com Mon Jan 9 12:58:16 2012 From: atrick at apple.com (Andrew Trick) Date: Mon, 09 Jan 2012 18:58:16 -0000 Subject: [llvm-commits] [llvm] r147785 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <20120109185816.C129B1BE003@llvm.org> Author: atrick Date: Mon Jan 9 12:58:16 2012 New Revision: 147785 URL: http://llvm.org/viewvc/llvm-project?rev=147785&view=rev Log: "Minor LSR debugging stuff" Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=147785&r1=147784&r2=147785&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Jan 9 12:58:16 2012 @@ -3357,7 +3357,7 @@ VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); } else { DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); - dbgs() << ". Regs:"; + dbgs() << ".\n Regs:"; for (SmallPtrSet::const_iterator I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I) dbgs() << ' ' << **I; @@ -3815,6 +3815,9 @@ SmallVector DeadInsts; SCEVExpander Rewriter(SE, "lsr"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif Rewriter.disableCanonicalMode(); Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); From dpatel at apple.com Mon Jan 9 13:13:28 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 09 Jan 2012 19:13:28 -0000 Subject: [llvm-commits] [llvm] r147787 - in /llvm/trunk: include/llvm/Target/Target.td lib/Target/X86/X86.td utils/TableGen/AsmMatcherEmitter.cpp utils/TableGen/CodeGenTarget.cpp utils/TableGen/CodeGenTarget.h Message-ID: <20120109191328.BACEA1BE003@llvm.org> Author: dpatel Date: Mon Jan 9 13:13:28 2012 New Revision: 147787 URL: http://llvm.org/viewvc/llvm-project?rev=147787&view=rev Log: Split AsmParser into two components - AsmParser and AsmParserVariant AsmParser holds info specific to target parser. AsmParserVariant holds info specific to asm variants supported by the target. Modified: llvm/trunk/include/llvm/Target/Target.td llvm/trunk/lib/Target/X86/X86.td llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp llvm/trunk/utils/TableGen/CodeGenTarget.cpp llvm/trunk/utils/TableGen/CodeGenTarget.h Modified: llvm/trunk/include/llvm/Target/Target.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/Target.td?rev=147787&r1=147786&r2=147787&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/Target.td (original) +++ llvm/trunk/include/llvm/Target/Target.td Mon Jan 9 13:13:28 2012 @@ -712,7 +712,15 @@ // function of the AsmParser class to call on every matched instruction. // This can be used to perform target specific instruction post-processing. string AsmParserInstCleanup = ""; +} +def DefaultAsmParser : AsmParser; +//===----------------------------------------------------------------------===// +// AsmParserVariant - Subtargets can have multiple different assembly parsers +// (e.g. AT&T vs Intel syntax on X86 for example). This class can be +// implemented by targets to describe such variants. +// +class AsmParserVariant { // Variant - AsmParsers can be of multiple different variants. Variants are // used to support targets that need to parser multiple formats for the // assembly language. @@ -729,7 +737,7 @@ // purposes of matching. string RegisterPrefix = ""; } -def DefaultAsmParser : AsmParser; +def DefaultAsmParserVariant : AsmParserVariant; /// AssemblerPredicate - This is a Predicate that can be used when the assembler /// matches instructions and aliases. @@ -840,6 +848,10 @@ // AssemblyParsers - The AsmParser instances available for this target. list AssemblyParsers = [DefaultAsmParser]; + /// AssemblyParserVariants - The AsmParserVariant instances available for + /// this target. + list AssemblyParserVariants = [DefaultAsmParserVariant]; + // AssemblyWriters - The AsmWriter instances available for this target. list AssemblyWriters = [DefaultAsmWriter]; } Modified: llvm/trunk/lib/Target/X86/X86.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=147787&r1=147786&r2=147787&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86.td (original) +++ llvm/trunk/lib/Target/X86/X86.td Mon Jan 9 13:13:28 2012 @@ -246,6 +246,9 @@ // Currently the X86 assembly parser only supports ATT syntax. def ATTAsmParser : AsmParser { string AsmParserClassName = "ATTAsmParser"; +} + +def ATTAsmParserVariant : AsmParserVariant { int Variant = 0; // Discard comments in assembly strings. @@ -275,8 +278,7 @@ def X86 : Target { // Information about the instructions... let InstructionSet = X86InstrInfo; - let AssemblyParsers = [ATTAsmParser]; - + let AssemblyParserVariants = [ATTAsmParserVariant]; let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; } Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp?rev=147787&r1=147786&r2=147787&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Mon Jan 9 13:13:28 2012 @@ -1171,88 +1171,92 @@ assert(FeatureNo < 32 && "Too many subtarget features!"); } - std::string CommentDelimiter = AsmParser->getValueAsString("CommentDelimiter"); - std::string RegisterPrefix = AsmParser->getValueAsString("RegisterPrefix"); - int AsmVariantNo = AsmParser->getValueAsInt("Variant"); - // Parse the instructions; we need to do this first so that we can gather the // singleton register classes. SmallPtrSet SingletonRegisters; - for (CodeGenTarget::inst_iterator I = Target.inst_begin(), - E = Target.inst_end(); I != E; ++I) { - const CodeGenInstruction &CGI = **I; - - // If the tblgen -match-prefix option is specified (for tblgen hackers), - // filter the set of instructions we consider. - if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix)) - continue; - - // Ignore "codegen only" instructions. - if (CGI.TheDef->getValueAsBit("isCodeGenOnly")) - continue; - - // Validate the operand list to ensure we can handle this instruction. - for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { - const CGIOperandList::OperandInfo &OI = CGI.Operands[i]; - - // Validate tied operands. - if (OI.getTiedRegister() != -1) { - // If we have a tied operand that consists of multiple MCOperands, - // reject it. We reject aliases and ignore instructions for now. - if (OI.MINumOperands != 1) { - // FIXME: Should reject these. The ARM backend hits this with $lane - // in a bunch of instructions. It is unclear what the right answer is. - DEBUG({ - errs() << "warning: '" << CGI.TheDef->getName() << "': " - << "ignoring instruction with multi-operand tied operand '" - << OI.Name << "'\n"; - }); - continue; - } + unsigned VariantCount = Target.getAsmParserVariantCount(); + for (unsigned VC = 0; VC != VariantCount; ++VC) { + Record *AsmVariant = Target.getAsmParserVariant(VC); + std::string CommentDelimiter = AsmVariant->getValueAsString("CommentDelimiter"); + std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix"); + int AsmVariantNo = AsmVariant->getValueAsInt("Variant"); + + for (CodeGenTarget::inst_iterator I = Target.inst_begin(), + E = Target.inst_end(); I != E; ++I) { + const CodeGenInstruction &CGI = **I; + + // If the tblgen -match-prefix option is specified (for tblgen hackers), + // filter the set of instructions we consider. + if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix)) + continue; + + // Ignore "codegen only" instructions. + if (CGI.TheDef->getValueAsBit("isCodeGenOnly")) + continue; + + // Validate the operand list to ensure we can handle this instruction. + for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { + const CGIOperandList::OperandInfo &OI = CGI.Operands[i]; + + // Validate tied operands. + if (OI.getTiedRegister() != -1) { + // If we have a tied operand that consists of multiple MCOperands, + // reject it. We reject aliases and ignore instructions for now. + if (OI.MINumOperands != 1) { + // FIXME: Should reject these. The ARM backend hits this with $lane + // in a bunch of instructions. It is unclear what the right answer is. + DEBUG({ + errs() << "warning: '" << CGI.TheDef->getName() << "': " + << "ignoring instruction with multi-operand tied operand '" + << OI.Name << "'\n"; + }); + continue; + } + } } + + OwningPtr II(new MatchableInfo(CGI)); + + II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); + + // Ignore instructions which shouldn't be matched and diagnose invalid + // instruction definitions with an error. + if (!II->Validate(CommentDelimiter, true)) + continue; + + // Ignore "Int_*" and "*_Int" instructions, which are internal aliases. + // + // FIXME: This is a total hack. + if (StringRef(II->TheDef->getName()).startswith("Int_") || + StringRef(II->TheDef->getName()).endswith("_Int")) + continue; + + Matchables.push_back(II.take()); + } + + // Parse all of the InstAlias definitions and stick them in the list of + // matchables. + std::vector AllInstAliases = + Records.getAllDerivedDefinitions("InstAlias"); + for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) { + CodeGenInstAlias *Alias = new CodeGenInstAlias(AllInstAliases[i], Target); + + // If the tblgen -match-prefix option is specified (for tblgen hackers), + // filter the set of instruction aliases we consider, based on the target + // instruction. + if (!StringRef(Alias->ResultInst->TheDef->getName()).startswith( + MatchPrefix)) + continue; + + OwningPtr II(new MatchableInfo(Alias)); + + II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); + + // Validate the alias definitions. + II->Validate(CommentDelimiter, false); + + Matchables.push_back(II.take()); } - - OwningPtr II(new MatchableInfo(CGI)); - - II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); - - // Ignore instructions which shouldn't be matched and diagnose invalid - // instruction definitions with an error. - if (!II->Validate(CommentDelimiter, true)) - continue; - - // Ignore "Int_*" and "*_Int" instructions, which are internal aliases. - // - // FIXME: This is a total hack. - if (StringRef(II->TheDef->getName()).startswith("Int_") || - StringRef(II->TheDef->getName()).endswith("_Int")) - continue; - - Matchables.push_back(II.take()); - } - - // Parse all of the InstAlias definitions and stick them in the list of - // matchables. - std::vector AllInstAliases = - Records.getAllDerivedDefinitions("InstAlias"); - for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) { - CodeGenInstAlias *Alias = new CodeGenInstAlias(AllInstAliases[i], Target); - - // If the tblgen -match-prefix option is specified (for tblgen hackers), - // filter the set of instruction aliases we consider, based on the target - // instruction. - if (!StringRef(Alias->ResultInst->TheDef->getName()).startswith( - MatchPrefix)) - continue; - - OwningPtr II(new MatchableInfo(Alias)); - - II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); - - // Validate the alias definitions. - II->Validate(CommentDelimiter, false); - - Matchables.push_back(II.take()); } // Build info for the register classes. Modified: llvm/trunk/utils/TableGen/CodeGenTarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenTarget.cpp?rev=147787&r1=147786&r2=147787&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenTarget.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenTarget.cpp Mon Jan 9 13:13:28 2012 @@ -150,6 +150,26 @@ return LI[AsmParserNum]; } +/// getAsmParserVariant - Return the AssmblyParserVariant definition for +/// this target. +/// +Record *CodeGenTarget::getAsmParserVariant(unsigned i) const { + std::vector LI = + TargetRec->getValueAsListOfDefs("AssemblyParserVariants"); + if (i >= LI.size()) + throw "Target does not have an AsmParserVariant #" + utostr(i) + "!"; + return LI[i]; +} + +/// getAsmParserVariantCount - Return the AssmblyParserVariant definition +/// available for this target. +/// +unsigned CodeGenTarget::getAsmParserVariantCount() const { + std::vector LI = + TargetRec->getValueAsListOfDefs("AssemblyParserVariants"); + return LI.size(); +} + /// getAsmWriter - Return the AssemblyWriter definition for this target. /// Record *CodeGenTarget::getAsmWriter() const { Modified: llvm/trunk/utils/TableGen/CodeGenTarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenTarget.h?rev=147787&r1=147786&r2=147787&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenTarget.h (original) +++ llvm/trunk/utils/TableGen/CodeGenTarget.h Mon Jan 9 13:13:28 2012 @@ -91,6 +91,16 @@ /// Record *getAsmParser() const; + /// getAsmParserVariant - Return the AssmblyParserVariant definition for + /// this target. + /// + Record *getAsmParserVariant(unsigned i) const; + + /// getAsmParserVariantCount - Return the AssmblyParserVariant definition + /// available for this target. + /// + unsigned getAsmParserVariantCount() const; + /// getAsmWriter - Return the AssemblyWriter definition for this target. /// Record *getAsmWriter() const; From kcc at google.com Mon Jan 9 13:18:27 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 19:18:27 -0000 Subject: [llvm-commits] [compiler-rt] r147788 - in /compiler-rt/trunk/lib/asan: Makefile.old asan_internal.h asan_linux.cc asan_mac.cc asan_posix.cc asan_rtl.cc asan_thread.cc Message-ID: <20120109191827.C98CA1BE003@llvm.org> Author: kcc Date: Mon Jan 9 13:18:27 2012 New Revision: 147788 URL: http://llvm.org/viewvc/llvm-project?rev=147788&view=rev Log: [asan] refactoring: move some common linux/mac code to asan_posix.cc Added: compiler-rt/trunk/lib/asan/asan_posix.cc Modified: compiler-rt/trunk/lib/asan/Makefile.old compiler-rt/trunk/lib/asan/asan_internal.h compiler-rt/trunk/lib/asan/asan_linux.cc compiler-rt/trunk/lib/asan/asan_mac.cc compiler-rt/trunk/lib/asan/asan_rtl.cc compiler-rt/trunk/lib/asan/asan_thread.cc Modified: compiler-rt/trunk/lib/asan/Makefile.old URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/Makefile.old?rev=147788&r1=147787&r2=147788&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/Makefile.old (original) +++ compiler-rt/trunk/lib/asan/Makefile.old Mon Jan 9 13:18:27 2012 @@ -191,6 +191,7 @@ $(BIN)/asan_malloc_linux$(SUFF).o \ $(BIN)/asan_malloc_mac$(SUFF).o \ $(BIN)/asan_poisoning$(SUFF).o \ + $(BIN)/asan_posix$(SUFF).o \ $(BIN)/asan_printf$(SUFF).o \ $(BIN)/asan_stack$(SUFF).o \ $(BIN)/asan_stats$(SUFF).o \ Modified: compiler-rt/trunk/lib/asan/asan_internal.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_internal.h?rev=147788&r1=147787&r2=147788&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_internal.h (original) +++ compiler-rt/trunk/lib/asan/asan_internal.h Mon Jan 9 13:18:27 2012 @@ -107,6 +107,7 @@ int AsanClose(int fd); bool AsanInterceptsSignal(int signum); +void InstallSignalHandlers(); // Opens the file 'file_name" and reads up to 'max_len' bytes. // The resulting buffer is mmaped and stored in '*buff'. Modified: compiler-rt/trunk/lib/asan/asan_linux.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_linux.cc?rev=147788&r1=147787&r2=147788&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_linux.cc (original) +++ compiler-rt/trunk/lib/asan/asan_linux.cc Mon Jan 9 13:18:27 2012 @@ -276,13 +276,6 @@ CHECK(AddrIsInStack((uintptr_t)&attr)); } -void AsanDisableCoreDumper() { - struct rlimit nocore; - nocore.rlim_cur = 0; - nocore.rlim_max = 0; - setrlimit(RLIMIT_CORE, &nocore); -} - } // namespace __asan #endif // __linux__ Modified: compiler-rt/trunk/lib/asan/asan_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.cc?rev=147788&r1=147787&r2=147788&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_mac.cc Mon Jan 9 13:18:27 2012 @@ -131,13 +131,6 @@ CHECK(AddrIsInStack((uintptr_t)&local)); } -void AsanDisableCoreDumper() { - struct rlimit nocore; - nocore.rlim_cur = 0; - nocore.rlim_max = 0; - setrlimit(RLIMIT_CORE, &nocore); -} - // Support for the following functions from libdispatch on Mac OS: // dispatch_async_f() // dispatch_async() Added: compiler-rt/trunk/lib/asan/asan_posix.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_posix.cc?rev=147788&view=auto ============================================================================== --- compiler-rt/trunk/lib/asan/asan_posix.cc (added) +++ compiler-rt/trunk/lib/asan/asan_posix.cc Mon Jan 9 13:18:27 2012 @@ -0,0 +1,68 @@ +//===-- asan_linux.cc -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Posix-specific details. +//===----------------------------------------------------------------------===// +#if defined(__linux__) || defined(__APPLE__) + +#include "asan_internal.h" +#include "asan_interceptors.h" +#include "asan_stack.h" +#include "asan_thread_registry.h" + +#include +#include +#include + +namespace __asan { + +static void MaybeInstallSigaction(int signum, + void (*handler)(int, siginfo_t *, void *)) { + if (!AsanInterceptsSignal(signum)) + return; + struct sigaction sigact; + real_memset(&sigact, 0, sizeof(sigact)); + sigact.sa_sigaction = handler; + sigact.sa_flags = SA_SIGINFO; + CHECK(0 == real_sigaction(signum, &sigact, 0)); +} + +static void ASAN_OnSIGSEGV(int, siginfo_t *siginfo, void *context) { + uintptr_t addr = (uintptr_t)siginfo->si_addr; + // Write the first message using the bullet-proof write. + if (13 != AsanWrite(2, "ASAN:SIGSEGV\n", 13)) ASAN_DIE; + uintptr_t pc, sp, bp; + GetPcSpBp(context, &pc, &sp, &bp); + Report("ERROR: AddressSanitizer crashed on unknown address %p" + " (pc %p sp %p bp %p T%d)\n", + addr, pc, sp, bp, + asanThreadRegistry().GetCurrentTidOrMinusOne()); + Printf("AddressSanitizer can not provide additional info. ABORTING\n"); + GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, false, pc, bp); + stack.PrintStack(); + ShowStatsAndAbort(); +} + +void InstallSignalHandlers() { + MaybeInstallSigaction(SIGSEGV, ASAN_OnSIGSEGV); + MaybeInstallSigaction(SIGBUS, ASAN_OnSIGSEGV); +} + +void AsanDisableCoreDumper() { + struct rlimit nocore; + nocore.rlim_cur = 0; + nocore.rlim_max = 0; + setrlimit(RLIMIT_CORE, &nocore); +} + +} // namespace __asan + +#endif // __linux__ || __APPLE_ Modified: compiler-rt/trunk/lib/asan/asan_rtl.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_rtl.cc?rev=147788&r1=147787&r2=147788&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_rtl.cc (original) +++ compiler-rt/trunk/lib/asan/asan_rtl.cc Mon Jan 9 13:18:27 2012 @@ -24,8 +24,6 @@ #include "asan_thread.h" #include "asan_thread_registry.h" -#include -#include #include namespace __asan { @@ -45,7 +43,6 @@ int FLAG_report_globals; size_t FLAG_malloc_context_size = kMallocContextSize; uintptr_t FLAG_large_malloc; -bool FLAG_lazy_shadow; bool FLAG_handle_segv; bool FLAG_replace_str; bool FLAG_replace_intrin; @@ -124,17 +121,6 @@ return NULL; // Not found. } -static void MaybeInstallSigaction(int signum, - void (*handler)(int, siginfo_t *, void *)) { - if (!AsanInterceptsSignal(signum)) - return; - struct sigaction sigact; - real_memset(&sigact, 0, sizeof(sigact)); - sigact.sa_sigaction = handler; - sigact.sa_flags = SA_SIGINFO; - CHECK(0 == real_sigaction(signum, &sigact, 0)); -} - // ---------------------- mmap -------------------- {{{1 void OutOfMemoryMessageAndDie(const char *mem_type, size_t size) { Report("ERROR: AddressSanitizer failed to allocate " @@ -235,30 +221,6 @@ } // -------------------------- Run-time entry ------------------- {{{1 -static void ASAN_OnSIGSEGV(int, siginfo_t *siginfo, void *context) { - uintptr_t addr = (uintptr_t)siginfo->si_addr; - if (AddrIsInShadow(addr) && FLAG_lazy_shadow) { - // We traped on access to a shadow address. Just map a large chunk around - // this address. - const uintptr_t chunk_size = kPageSize << 10; // 4M - uintptr_t chunk = addr & ~(chunk_size - 1); - AsanMmapFixedReserve(chunk, chunk_size); - return; - } - // Write the first message using the bullet-proof write. - if (13 != AsanWrite(2, "ASAN:SIGSEGV\n", 13)) ASAN_DIE; - uintptr_t pc, sp, bp; - GetPcSpBp(context, &pc, &sp, &bp); - Report("ERROR: AddressSanitizer crashed on unknown address %p" - " (pc %p sp %p bp %p T%d)\n", - addr, pc, sp, bp, - asanThreadRegistry().GetCurrentTidOrMinusOne()); - Printf("AddressSanitizer can not provide additional info. ABORTING\n"); - GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, false, pc, bp); - stack.PrintStack(); - ShowStatsAndAbort(); -} - // exported functions #define ASAN_REPORT_ERROR(type, is_write, size) \ extern "C" void __asan_report_ ## type ## size(uintptr_t addr) \ @@ -318,8 +280,7 @@ } void CheckFailed(const char *cond, const char *file, int line) { - Report("CHECK failed: %s at %s:%d, pthread_self=%p\n", - cond, file, line, pthread_self()); + Report("CHECK failed: %s at %s:%d\n", cond, file, line); PRINT_CURRENT_STACK(); ShowStatsAndAbort(); } @@ -454,7 +415,6 @@ FLAG_atexit = IntFlagValue(options, "atexit=", 0); FLAG_poison_shadow = IntFlagValue(options, "poison_shadow=", 1); FLAG_report_globals = IntFlagValue(options, "report_globals=", 1); - FLAG_lazy_shadow = IntFlagValue(options, "lazy_shadow=", 0); FLAG_handle_segv = IntFlagValue(options, "handle_segv=", ASAN_NEEDS_SEGV); FLAG_symbolize = IntFlagValue(options, "symbolize=", 1); FLAG_demangle = IntFlagValue(options, "demangle=", 1); @@ -479,9 +439,7 @@ InitializeAsanInterceptors(); ReplaceSystemMalloc(); - - MaybeInstallSigaction(SIGSEGV, ASAN_OnSIGSEGV); - MaybeInstallSigaction(SIGBUS, ASAN_OnSIGSEGV); + InstallSignalHandlers(); if (FLAG_v) { Printf("|| `[%p, %p]` || HighMem ||\n", kHighMemBeg, kHighMemEnd); @@ -513,14 +471,12 @@ } { - if (!FLAG_lazy_shadow) { - if (kLowShadowBeg != kLowShadowEnd) { - // mmap the low shadow plus one page. - ReserveShadowMemoryRange(kLowShadowBeg - kPageSize, kLowShadowEnd); - } - // mmap the high shadow. - ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd); + if (kLowShadowBeg != kLowShadowEnd) { + // mmap the low shadow plus one page. + ReserveShadowMemoryRange(kLowShadowBeg - kPageSize, kLowShadowEnd); } + // mmap the high shadow. + ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd); // protect the gap void *prot = AsanMprotect(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1); CHECK(prot == (void*)kShadowGapBeg); Modified: compiler-rt/trunk/lib/asan/asan_thread.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_thread.cc?rev=147788&r1=147787&r2=147788&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_thread.cc (original) +++ compiler-rt/trunk/lib/asan/asan_thread.cc Mon Jan 9 13:18:27 2012 @@ -17,10 +17,6 @@ #include "asan_thread.h" #include "asan_mapping.h" -#include -#include -#include - namespace __asan { AsanThread::AsanThread(LinkerInitialized x) @@ -58,9 +54,9 @@ fake_stack_.Init(stack_size()); if (FLAG_v >= 1) { int local = 0; - Report("T%d: stack [%p,%p) size 0x%lx; local=%p, pthread_self=%p\n", + Report("T%d: stack [%p,%p) size 0x%lx; local=%p\n", tid(), stack_bottom_, stack_top_, - stack_top_ - stack_bottom_, &local, pthread_self()); + stack_top_ - stack_bottom_, &local); } CHECK(AddrIsInMem(stack_bottom_)); From xerxes at zafena.se Mon Jan 9 13:33:08 2012 From: xerxes at zafena.se (=?ISO-8859-1?Q?Xerxes_R=E5nby?=) Date: Mon, 09 Jan 2012 20:33:08 +0100 Subject: [llvm-commits] [PATCH] ELFObjectFile with dynamic loading support In-Reply-To: <4F0B3850.8010009@zafena.se> References: <9BBE4537D1BAAB479E9E8F9D4234619D07D484@HASMSX103.ger.corp.intel.com> <4F0B3850.8010009@zafena.se> Message-ID: <4F0B40F4.1030406@zafena.se> 2012-01-09 19:56, Xerxes R?nby skrev: > 2012-01-09 16:15, Bendersky, Eli skrev: >> Hello, Following the email I sent to LLVMdev earlier today (http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046671.html), please find attached the first patch in the MCJIT/ELF series. It presents a subclass of ELFObjectFile, named DyldELFObject, which supports basic dynamic loading. This class is used by MCJIT/ELF to load an ELF image generated by MC into memory and executing it. >> >> Please note that there are no stand-alone tests for this class yet. It is being tested extensively in the ExecutionEngine tests run on MCJIT/ELF, which will be part of the next patch in the series, once this one is accepted and committed. >> >> Thanks in advance, >> >> Eli > > Hi Eli! > > Thank you and your team for working on MCJIT/ELF support! > Clang on my system got picky while trying to resolve the getSymbolTableIndex when I compiled the patched ELFObjectFile. llvm[2]: Compiling ELFObjectFile.cpp for Debug+Asserts build /home/xranby/llvm/lib/Object/ELFObjectFile.cpp:1583:9: error: use of undeclared identifier 'getSymbolTableIndex' getSymbolTableIndex(symb) == ELF::SHN_COMMON && Size > 0) { ^ this-> /home/xranby/llvm/lib/Object/ELFObjectFile.cpp:1636:20: note: in instantiation of member function '::DyldELFObject<1, false>::rebaseObject' requested here return new DyldELFObject(Object, MemoryMap, ec); ^ /home/xranby/llvm/lib/Object/ELFObjectFile.cpp:388:19: note: must qualify identifier to find this declaration in dependent base class ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const; ^ /home/xranby/llvm/lib/Object/ELFObjectFile.cpp:1619:7: error: use of undeclared identifier 'getSymbolTableIndex' if (getSymbolTableIndex(symb) == ELF::SHN_COMMON) { ^ this-> /home/xranby/llvm/lib/Object/ELFObjectFile.cpp:1636:20: note: in instantiation of member function '::DyldELFObject<1, false>::getSymbolAddress' requested here return new DyldELFObject(Object, MemoryMap, ec); ^ /home/xranby/llvm/lib/Object/ELFObjectFile.cpp:388:19: note: must qualify identifier to find this declaration in dependent base class ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const; ^ /home/xranby/llvm/lib/Object/ELFObjectFile.cpp:1583:9: error: no member named 'getSymbolTableIndex' in '::DyldELFObject<0, false>' getSymbolTableIndex(symb) == ELF::SHN_COMMON && Size > 0) { ^~~~~~~~~~~~~~~~~~~ tested using clang version 2.9 (tags/RELEASE_29/final) Target: i386-pc-linux-gnu Thread model: posix I will re test using clang 3.0. Cheers Xerxes From kcc at google.com Mon Jan 9 13:35:11 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 19:35:11 -0000 Subject: [llvm-commits] [compiler-rt] r147792 - in /compiler-rt/trunk/lib/asan: asan_interceptors.cc asan_interceptors.h Message-ID: <20120109193511.618251BE003@llvm.org> Author: kcc Date: Mon Jan 9 13:35:11 2012 New Revision: 147792 URL: http://llvm.org/viewvc/llvm-project?rev=147792&view=rev Log: [asan]: fix mac build Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/asan_interceptors.h Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=147792&r1=147791&r2=147792&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 9 13:35:11 2012 @@ -16,6 +16,7 @@ #include "asan_allocator.h" #include "asan_interface.h" #include "asan_internal.h" +#include "asan_mac.h" #include "asan_mapping.h" #include "asan_stack.h" #include "asan_stats.h" @@ -161,59 +162,6 @@ return 0; } -void InitializeAsanInterceptors() { -#ifndef __APPLE__ - INTERCEPT_FUNCTION(index); -#else - OVERRIDE_FUNCTION(index, WRAP(strchr)); -#endif - INTERCEPT_FUNCTION(memcmp); - INTERCEPT_FUNCTION(memcpy); - INTERCEPT_FUNCTION(memmove); - INTERCEPT_FUNCTION(memset); - INTERCEPT_FUNCTION(strcasecmp); - INTERCEPT_FUNCTION(strcat); // NOLINT - INTERCEPT_FUNCTION(strchr); - INTERCEPT_FUNCTION(strcmp); - INTERCEPT_FUNCTION(strcpy); // NOLINT - INTERCEPT_FUNCTION(strdup); - INTERCEPT_FUNCTION(strlen); - INTERCEPT_FUNCTION(strncasecmp); - INTERCEPT_FUNCTION(strncmp); - INTERCEPT_FUNCTION(strncpy); - - INTERCEPT_FUNCTION(sigaction); - INTERCEPT_FUNCTION(signal); - INTERCEPT_FUNCTION(longjmp); - INTERCEPT_FUNCTION(_longjmp); - INTERCEPT_FUNCTION_IF_EXISTS(__cxa_throw); - INTERCEPT_FUNCTION(pthread_create); - -#ifdef __APPLE__ - INTERCEPT_FUNCTION(dispatch_async_f); - INTERCEPT_FUNCTION(dispatch_sync_f); - INTERCEPT_FUNCTION(dispatch_after_f); - INTERCEPT_FUNCTION(dispatch_barrier_async_f); - INTERCEPT_FUNCTION(dispatch_group_async_f); - // We don't need to intercept pthread_workqueue_additem_np() to support the - // libdispatch API, but it helps us to debug the unsupported functions. Let's - // intercept it only during verbose runs. - if (FLAG_v >= 2) { - INTERCEPT_FUNCTION(pthread_workqueue_additem_np); - } -#else - // On Darwin siglongjmp tailcalls longjmp, so we don't want to intercept it - // there. - INTERCEPT_FUNCTION(siglongjmp); -#endif - -#ifndef __APPLE__ - INTERCEPT_FUNCTION(strnlen); -#endif - if (FLAG_v > 0) { - Printf("AddressSanitizer: libc interceptors initialized\n"); - } -} } // namespace __asan @@ -277,8 +225,7 @@ } extern "C" -int WRAP(sigaction)(int signum, const struct sigaction *act, - struct sigaction *oldact) { +int WRAP(sigaction)(int signum, const void *act, void *oldact) { if (!AsanInterceptsSignal(signum)) { return real_sigaction(signum, act, oldact); } @@ -569,4 +516,63 @@ } return length; } + +// ---------------------- InitializeAsanInterceptors ---------------- {{{1 +namespace __asan { +void InitializeAsanInterceptors() { +#ifndef __APPLE__ + INTERCEPT_FUNCTION(index); +#else + OVERRIDE_FUNCTION(index, WRAP(strchr)); +#endif + INTERCEPT_FUNCTION(memcmp); + INTERCEPT_FUNCTION(memcpy); + INTERCEPT_FUNCTION(memmove); + INTERCEPT_FUNCTION(memset); + INTERCEPT_FUNCTION(strcasecmp); + INTERCEPT_FUNCTION(strcat); // NOLINT + INTERCEPT_FUNCTION(strchr); + INTERCEPT_FUNCTION(strcmp); + INTERCEPT_FUNCTION(strcpy); // NOLINT + INTERCEPT_FUNCTION(strdup); + INTERCEPT_FUNCTION(strlen); + INTERCEPT_FUNCTION(strncasecmp); + INTERCEPT_FUNCTION(strncmp); + INTERCEPT_FUNCTION(strncpy); + + INTERCEPT_FUNCTION(sigaction); + INTERCEPT_FUNCTION(signal); + INTERCEPT_FUNCTION(longjmp); + INTERCEPT_FUNCTION(_longjmp); + INTERCEPT_FUNCTION_IF_EXISTS(__cxa_throw); + INTERCEPT_FUNCTION(pthread_create); + +#ifdef __APPLE__ + INTERCEPT_FUNCTION(dispatch_async_f); + INTERCEPT_FUNCTION(dispatch_sync_f); + INTERCEPT_FUNCTION(dispatch_after_f); + INTERCEPT_FUNCTION(dispatch_barrier_async_f); + INTERCEPT_FUNCTION(dispatch_group_async_f); + // We don't need to intercept pthread_workqueue_additem_np() to support the + // libdispatch API, but it helps us to debug the unsupported functions. Let's + // intercept it only during verbose runs. + if (FLAG_v >= 2) { + INTERCEPT_FUNCTION(pthread_workqueue_additem_np); + } +#else + // On Darwin siglongjmp tailcalls longjmp, so we don't want to intercept it + // there. + INTERCEPT_FUNCTION(siglongjmp); +#endif + +#ifndef __APPLE__ + INTERCEPT_FUNCTION(strnlen); +#endif + if (FLAG_v > 0) { + Printf("AddressSanitizer: libc interceptors initialized\n"); + } +} + +} // namespace __asan + #endif Modified: compiler-rt/trunk/lib/asan/asan_interceptors.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.h?rev=147792&r1=147791&r2=147792&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.h (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.h Mon Jan 9 13:35:11 2012 @@ -66,22 +66,6 @@ do { real_##func = (func##_f)dlsym(RTLD_NEXT, #func); } while (0) #endif -#ifdef __APPLE__ -int WRAP(memcmp)(const void *a1, const void *a2, size_t size); -void *WRAP(memcpy)(void *to, const void *from, size_t size); -void *WRAP(memmove)(void *to, const void *from, size_t size); -void *WRAP(memset)(void *block, int c, size_t size); -int WRAP(strcasecmp)(const char *s1, const char *s2); -char *WRAP(strcat)(char *to, const char *from); // NOLINT -char *WRAP(strchr)(const char *string, int c); -int WRAP(strcmp)(const char *s1, const char *s2); -char *WRAP(strcpy)(char *to, const char *from); // NOLINT -char *WRAP(strdup)(const char *s); -size_t WRAP(strlen)(const char *s); -int WRAP(strncasecmp)(const char *s1, const char *s2, size_t n); -int WRAP(strncmp)(const char *s1, const char *s2, size_t size); -char *WRAP(strncpy)(char *to, const char *from, size_t size); -#endif namespace __asan { From kcc at google.com Mon Jan 9 13:41:16 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 19:41:16 -0000 Subject: [llvm-commits] [compiler-rt] r147793 - /compiler-rt/trunk/lib/asan/asan_interceptors.cc Message-ID: <20120109194116.3C1B71BE003@llvm.org> Author: kcc Date: Mon Jan 9 13:41:15 2012 New Revision: 147793 URL: http://llvm.org/viewvc/llvm-project?rev=147793&view=rev Log: [asan]: fix typo from previous commit Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=147793&r1=147792&r2=147793&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 9 13:41:15 2012 @@ -516,6 +516,7 @@ } return length; } +#endif // ---------------------- InitializeAsanInterceptors ---------------- {{{1 namespace __asan { @@ -574,5 +575,3 @@ } } // namespace __asan - -#endif From kcc at google.com Mon Jan 9 13:50:07 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 19:50:07 -0000 Subject: [llvm-commits] [compiler-rt] r147796 - in /compiler-rt/trunk/lib/asan: asan_interceptors.cc asan_stack.cc Message-ID: <20120109195007.1BF401BE003@llvm.org> Author: kcc Date: Mon Jan 9 13:50:06 2012 New Revision: 147796 URL: http://llvm.org/viewvc/llvm-project?rev=147796&view=rev Log: [asan] fix mac build once more Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/asan_stack.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=147796&r1=147795&r2=147796&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 9 13:50:06 2012 @@ -225,6 +225,9 @@ } extern "C" +extern int (sigaction)(int signum, const void *act, void *oldact); + +extern "C" int WRAP(sigaction)(int signum, const void *act, void *oldact) { if (!AsanInterceptsSignal(signum)) { return real_sigaction(signum, act, oldact); Modified: compiler-rt/trunk/lib/asan/asan_stack.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_stack.cc?rev=147796&r1=147795&r2=147796&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_stack.cc (original) +++ compiler-rt/trunk/lib/asan/asan_stack.cc Mon Jan 9 13:50:06 2012 @@ -18,8 +18,6 @@ #include "asan_thread.h" #include "asan_thread_registry.h" -#include - #if ASAN_USE_SYSINFO == 1 #include "sysinfo/sysinfo.h" #endif From atrick at apple.com Mon Jan 9 13:50:34 2012 From: atrick at apple.com (Andrew Trick) Date: Mon, 09 Jan 2012 19:50:34 -0000 Subject: [llvm-commits] [llvm] r147797 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <20120109195034.C202F1BE003@llvm.org> Author: atrick Date: Mon Jan 9 13:50:34 2012 New Revision: 147797 URL: http://llvm.org/viewvc/llvm-project?rev=147797&view=rev Log: Adding collection of IV chains to LSR. This collects a set of IV uses within the loop whose values can be computed relative to each other in a sequence. Following checkins will make use of this information. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=147797&r1=147796&r2=147797&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Jan 9 13:50:34 2012 @@ -1345,6 +1345,36 @@ } }; +/// IVInc - An individual increment in a Chain of IV increments. +/// Relate an IV user to an expression that computes the IV it uses from the IV +/// used by the previous link in the Chain. +/// +/// For the head of a chain, IncExpr holds the absolute SCEV expression for the +/// original IVOperand. The head of the chain's IVOperand is only valid during +/// chain collection, before LSR replaces IV users. During chain generation, +/// IncExpr can be used to find the new IVOperand that computes the same +/// expression. +struct IVInc { + Instruction *UserInst; + Value* IVOperand; + const SCEV *IncExpr; + + IVInc(Instruction *U, Value *O, const SCEV *E): + UserInst(U), IVOperand(O), IncExpr(E) {} +}; + +// IVChain - The list of IV increments in program order. +// We typically add the head of a chain without finding subsequent links. +typedef SmallVector IVChain; + +/// ChainUsers - Helper for CollectChains to track multiple IV increment uses. +/// Distinguish between FarUsers that definitely cross IV increments and +/// NearUsers that may be used between IV increments. +struct ChainUsers { + SmallPtrSet FarUsers; + SmallPtrSet NearUsers; +}; + /// LSRInstance - This class holds state for the main loop strength reduction /// logic. class LSRInstance { @@ -1377,11 +1407,23 @@ /// RegUses - Track which uses use which register candidates. RegUseTracker RegUses; + // Limit the number of chains to avoid quadratic behavior. We don't expect to + // have more than a few IV increment chains in a loop. Missing a Chain falls + // back to normal LSR behavior for those uses. + static const unsigned MaxChains = 8; + + /// IVChainVec - IV users can form a chain of IV increments. + SmallVector IVChainVec; + void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); void OptimizeLoopTermCond(); + void ChainInstruction(Instruction *UserInst, Instruction *IVOper, + SmallVectorImpl &ChainUsersVec); + void CollectChains(); + void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -2110,6 +2152,205 @@ DEBUG(print_factors_and_types(dbgs())); } +/// findIVOperand - Helper for CollectChains that finds an IV operand (computed +/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped +/// Instructions to IVStrideUses, we could partially skip this. +static User::op_iterator +findIVOperand(User::op_iterator OI, User::op_iterator OE, + Loop *L, ScalarEvolution &SE) { + for(; OI != OE; ++OI) { + if (Instruction *Oper = dyn_cast(*OI)) { + if (!SE.isSCEVable(Oper->getType())) + continue; + + if (const SCEVAddRecExpr *AR = + dyn_cast(SE.getSCEV(Oper))) { + if (AR->getLoop() == L) + break; + } + } + } + return OI; +} + +/// getWideOperand - IVChain logic must consistenctly peek base TruncInst +/// operands, so wrap it in a convenient helper. +static Value *getWideOperand(Value *Oper) { + if (TruncInst *Trunc = dyn_cast(Oper)) + return Trunc->getOperand(0); + return Oper; +} + +/// isCompatibleIVType - Return true if we allow an IV chain to include both +/// types. +static bool isCompatibleIVType(Value *LVal, Value *RVal) { + Type *LType = LVal->getType(); + Type *RType = RVal->getType(); + return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy()); +} + +/// ChainInstruction - Add this IV user to an existing chain or make it the head +/// of a new chain. +void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, + SmallVectorImpl &ChainUsersVec) { + // When IVs are used as types of varying widths, they are generally converted + // to a wider type with some uses remaining narrow under a (free) trunc. + Value *NextIV = getWideOperand(IVOper); + + // Visit all existing chains. Check if its IVOper can be computed as a + // profitable loop invariant increment from the last link in the Chain. + unsigned ChainIdx = 0, NChains = IVChainVec.size(); + const SCEV *LastIncExpr = 0; + for (; ChainIdx < NChains; ++ChainIdx) { + Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand); + if (!isCompatibleIVType(PrevIV, NextIV)) + continue; + + // A phi nodes terminates a chain. + if (isa(UserInst) + && isa(IVChainVec[ChainIdx].back().UserInst)) + continue; + + const SCEV *IncExpr = SE.getMinusSCEV(SE.getSCEV(NextIV), + SE.getSCEV(PrevIV)); + if (SE.isLoopInvariant(IncExpr, L)) { + LastIncExpr = IncExpr; + break; + } + } + // If we haven't found a chain, create a new one, unless we hit the max. Don't + // bother for phi nodes, because they must be last in the chain. + if (ChainIdx == NChains) { + if (isa(UserInst)) + return; + if (NChains >= MaxChains) { + DEBUG(dbgs() << "IV Chain Limit\n"); + return; + } + ++NChains; + IVChainVec.resize(NChains); + ChainUsersVec.resize(NChains); + LastIncExpr = SE.getSCEV(NextIV); + assert(isa(LastIncExpr) && "expect recurrence at IV user"); + DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr + << "\n"); + } + else + DEBUG(dbgs() << "IV Inc: (" << *UserInst << ") IV+" << *LastIncExpr + << "\n"); + + // Add this IV user to the end of the chain. + IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr)); + + SmallPtrSet &NearUsers = ChainUsersVec[ChainIdx].NearUsers; + // This chain's NearUsers become FarUsers. + if (!LastIncExpr->isZero()) { + ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(), + NearUsers.end()); + NearUsers.clear(); + } + + // All other uses of IVOperand become near uses of the chain. + // We currently ignore intermediate values within SCEV expressions, assuming + // they will eventually be used be the current chain, or can be computed + // from one of the chain increments. To be more precise we could + // transitively follow its user and only add leaf IV users to the set. + for (Value::use_iterator UseIter = IVOper->use_begin(), + UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { + Instruction *OtherUse = dyn_cast(*UseIter); + if (SE.isSCEVable(OtherUse->getType()) + && !isa(SE.getSCEV(OtherUse)) + && IU.isIVUserOrOperand(OtherUse)) { + continue; + } + if (OtherUse && OtherUse != UserInst) + NearUsers.insert(OtherUse); + } + + // Since this user is part of the chain, it's no longer considered a use + // of the chain. + ChainUsersVec[ChainIdx].FarUsers.erase(UserInst); +} + +/// CollectChains - Populate the vector of Chains. +/// +/// This decreases ILP at the architecture level. Targets with ample registers, +/// multiple memory ports, and no register renaming probably don't want +/// this. However, such targets should probably disable LSR altogether. +/// +/// The job of LSR is to make a reasonable choice of induction variables across +/// the loop. Subsequent passes can easily "unchain" computation exposing more +/// ILP *within the loop* if the target wants it. +/// +/// Finding the best IV chain is potentially a scheduling problem. Since LSR +/// will not reorder memory operations, it will recognize this as a chain, but +/// will generate redundant IV increments. Ideally this would be corrected later +/// by a smart scheduler: +/// = A[i] +/// = A[i+x] +/// A[i] = +/// A[i+x] = +/// +/// TODO: Walk the entire domtree within this loop, not just the path to the +/// loop latch. This will discover chains on side paths, but requires +/// maintaining multiple copies of the Chains state. +void LSRInstance::CollectChains() { + SmallVector ChainUsersVec; + + SmallVector LatchPath; + BasicBlock *LoopHeader = L->getHeader(); + for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch()); + Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) { + LatchPath.push_back(Rung->getBlock()); + } + LatchPath.push_back(LoopHeader); + + // Walk the instruction stream from the loop header to the loop latch. + for (SmallVectorImpl::reverse_iterator + BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend(); + BBIter != BBEnd; ++BBIter) { + for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end(); + I != E; ++I) { + // Skip instructions that weren't seen by IVUsers analysis. + if (isa(I) || !IU.isIVUserOrOperand(I)) + continue; + + // Ignore users that are part of a SCEV expression. This way we only + // consider leaf IV Users. This effectively rediscovers a portion of + // IVUsers analysis but in program order this time. + if (SE.isSCEVable(I->getType()) && !isa(SE.getSCEV(I))) + continue; + + // Remove this instruction from any NearUsers set it may be in. + for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); + ChainIdx < NChains; ++ChainIdx) { + ChainUsersVec[ChainIdx].NearUsers.erase(I); + } + // Search for operands that can be chained. + SmallPtrSet UniqueOperands; + User::op_iterator IVOpEnd = I->op_end(); + User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE); + while (IVOpIter != IVOpEnd) { + Instruction *IVOpInst = cast(*IVOpIter); + if (UniqueOperands.insert(IVOpInst)) + ChainInstruction(I, IVOpInst, ChainUsersVec); + IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE); + } + } // Continue walking down the instructions. + } // Continue walking down the domtree. + // Visit phi backedges to determine if the chain can generate the IV postinc. + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast(I); ++I) { + if (!SE.isSCEVable(PN->getType())) + continue; + + Instruction *IncV = + dyn_cast(PN->getIncomingValueForBlock(L->getLoopLatch())); + if (IncV) + ChainInstruction(PN, IncV, ChainUsersVec); + } +} + void LSRInstance::CollectFixupsAndInitialFormulae() { for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { // Record the uses. @@ -3877,6 +4118,7 @@ } // Start collecting data and preparing for the solver. + CollectChains(); CollectInterestingTypesAndFactors(); CollectFixupsAndInitialFormulae(); CollectLoopInvariantFixupsAndFormulae(); From kledzik at apple.com Mon Jan 9 14:18:15 2012 From: kledzik at apple.com (Nick Kledzik) Date: Mon, 09 Jan 2012 20:18:15 -0000 Subject: [llvm-commits] [lld] r147799 - in /lld/trunk: include/lld/Core/SymbolTable.h lib/Core/SymbolTable.cpp lib/Core/YamlReader.cpp lib/Core/YamlWriter.cpp test/cstring-coalesce.objtxt Message-ID: <20120109201815.AEC0F1BE003@llvm.org> Author: kledzik Date: Mon Jan 9 14:18:15 2012 New Revision: 147799 URL: http://llvm.org/viewvc/llvm-project?rev=147799&view=rev Log: add initial support for coalescing by content (c-strings) with test case Added: lld/trunk/test/cstring-coalesce.objtxt Modified: lld/trunk/include/lld/Core/SymbolTable.h lld/trunk/lib/Core/SymbolTable.cpp lld/trunk/lib/Core/YamlReader.cpp lld/trunk/lib/Core/YamlWriter.cpp Modified: lld/trunk/include/lld/Core/SymbolTable.h URL: http://llvm.org/viewvc/llvm-project/lld/trunk/include/lld/Core/SymbolTable.h?rev=147799&r1=147798&r2=147799&view=diff ============================================================================== --- lld/trunk/include/lld/Core/SymbolTable.h (original) +++ lld/trunk/include/lld/Core/SymbolTable.h Mon Jan 9 14:18:15 2012 @@ -14,7 +14,9 @@ #include #include -namespace llvm { class StringRef; } +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/DenseSet.h" + namespace lld { @@ -52,12 +54,21 @@ private: typedef std::map NameToAtom; typedef std::map AtomToAtom; + struct MyMappingInfo { + static const Atom * getEmptyKey() { return NULL; } + static const Atom * getTombstoneKey() { return (Atom*)(-1); } + static unsigned getHashValue(const Atom * const Val); + static bool isEqual(const Atom * const LHS, const Atom * const RHS); + }; + typedef llvm::DenseSet AtomContentSet; void addByName(const Atom &); + void addByContent(const Atom &); Platform& _platform; AtomToAtom _replacedAtoms; NameToAtom _nameTable; + AtomContentSet _contentTable; }; } // namespace lld Modified: lld/trunk/lib/Core/SymbolTable.cpp URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/Core/SymbolTable.cpp?rev=147799&r1=147798&r2=147799&view=diff ============================================================================== --- lld/trunk/lib/Core/SymbolTable.cpp (original) +++ lld/trunk/lib/Core/SymbolTable.cpp Mon Jan 9 14:18:15 2012 @@ -16,6 +16,8 @@ #include "lld/Platform/Platform.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/ArrayRef.h" #include #include @@ -34,7 +36,7 @@ this->addByName(atom); } else if ( atom.mergeDuplicates() ) { - // TO DO: support constants merging + this->addByContent(atom); } } @@ -130,6 +132,58 @@ } } + +unsigned SymbolTable::MyMappingInfo::getHashValue(const Atom * const atom) { + unsigned hash = atom->size(); + if ( atom->contentType() != Atom::typeZeroFill ) { + llvm::ArrayRef content = atom->rawContent(); + for (unsigned int i=0; i < content.size(); ++i) { + hash = hash * 33 + content[i]; + } + } + hash &= 0x00FFFFFF; + hash |= ((unsigned)atom->contentType()) << 24; + //fprintf(stderr, "atom=%p, hash=0x%08X\n", atom, hash); + return hash; +} + + +bool SymbolTable::MyMappingInfo::isEqual(const Atom * const l, + const Atom * const r) { + if ( l == r ) + return true; + if ( l == getEmptyKey() ) + return false; + if ( r == getEmptyKey() ) + return false; + if ( l == getTombstoneKey() ) + return false; + if ( r == getTombstoneKey() ) + return false; + + if ( l->contentType() != r->contentType() ) + return false; + if ( l->size() != r->size() ) + return false; + llvm::ArrayRef lc = l->rawContent(); + llvm::ArrayRef rc = r->rawContent(); + return lc.equals(rc); +} + + +void SymbolTable::addByContent(const Atom & newAtom) { + AtomContentSet::iterator pos = _contentTable.find(&newAtom); + if ( pos == _contentTable.end() ) { + _contentTable.insert(&newAtom); + return; + } + const Atom* existing = *pos; + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; +} + + + const Atom *SymbolTable::findByName(llvm::StringRef sym) { NameToAtom::iterator pos = _nameTable.find(sym); if (pos == _nameTable.end()) Modified: lld/trunk/lib/Core/YamlReader.cpp URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/Core/YamlReader.cpp?rev=147799&r1=147798&r2=147799&view=diff ============================================================================== --- lld/trunk/lib/Core/YamlReader.cpp (original) +++ lld/trunk/lib/Core/YamlReader.cpp Mon Jan 9 14:18:15 2012 @@ -16,6 +16,7 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -66,18 +67,21 @@ class YAML { public: struct Entry { - Entry(const char *k, const char *v, int d, bool bd, bool bs) + Entry(const char *k, const char *v, std::vector* vs, + int d, bool bd, bool bs) : key(strdup(k)) - , value(strdup(v)) + , value(v ? strdup(v) : NULL) + , valueSequenceBytes(vs) , depth(d) , beginSequence(bs) , beginDocument(bd) {} - const char *key; - const char *value; - int depth; - bool beginSequence; - bool beginDocument; + const char * key; + const char * value; + std::vector* valueSequenceBytes; + int depth; + bool beginSequence; + bool beginDocument; }; static void parse(llvm::MemoryBuffer *mb, std::vector&); @@ -107,6 +111,8 @@ int depth = 0; bool nextKeyIsStartOfDocument = false; bool nextKeyIsStartOfSequence = false; + std::vector* sequenceBytes = NULL; + unsigned contentByte = 0; for (const char *s = mb->getBufferStart(); s < mb->getBufferEnd(); ++s) { char c = *s; if (c == '\n') @@ -204,7 +210,7 @@ *p++ = c; state = inValue; } else if (c == '\n') { - entries.push_back(new Entry(key, "", depth, + entries.push_back(new Entry(key, "", NULL, depth, nextKeyIsStartOfDocument, nextKeyIsStartOfSequence)); nextKeyIsStartOfSequence = false; @@ -212,6 +218,8 @@ state = inDocument; depth = 0; } else if (c == '[') { + contentByte = 0; + sequenceBytes = new std::vector(); state = inValueSequence; } else if (c == ' ') { // eat space @@ -226,7 +234,7 @@ *p++ = c; } else if (c == '\n') { *p = '\0'; - entries.push_back(new Entry(key, value, depth, + entries.push_back(new Entry(key, value, NULL, depth, nextKeyIsStartOfDocument, nextKeyIsStartOfSequence)); nextKeyIsStartOfSequence = false; @@ -236,11 +244,33 @@ } break; case inValueSequence: - if (c == ']') + if (c == ']') { + sequenceBytes->push_back(contentByte); state = inValueSequenceEnd; + } + else if (c == ' ') { + // eat white space + } + else if (c == ',') { + sequenceBytes->push_back(contentByte); + } + else if ( isdigit(c) ) { + contentByte = (contentByte << 4) | (c-'0'); + } + else if ( ('a' <= tolower(c)) && (tolower(c) <= 'f') ) { + contentByte = (contentByte << 4) | (tolower(c)-'a'+10); + } + else { + llvm::report_fatal_error("non-hex digit found in content [ ]"); + } break; case inValueSequenceEnd: if (c == '\n') { + entries.push_back(new Entry(key, NULL, sequenceBytes, depth, + nextKeyIsStartOfDocument, + nextKeyIsStartOfSequence)); + nextKeyIsStartOfSequence = false; + nextKeyIsStartOfDocument = false; state = inDocument; depth = 0; } @@ -296,11 +326,13 @@ , YAMLFile& f , const char *n , const char* sn - , uint64_t sz) + , uint64_t sz + , std::vector* c) : Atom(ord, d, s, ct, sc, intn, md, ah, dsk, tb, al, a) , _file(f) , _name(n) , _sectionName(sn) + , _content(c) , _size(sz) , _refStartIndex(f._lastRefIndex) , _refEndIndex(f._references.size()) { @@ -320,7 +352,7 @@ } virtual llvm::StringRef customSectionName() const { - return _sectionName; + return (_sectionName ? _sectionName : llvm::StringRef()); } virtual uint64_t objectAddress() const { @@ -328,19 +360,26 @@ } virtual uint64_t size() const { - return _size; + return (_content ? _content->size() : _size); } - virtual void copyRawContent(uint8_t buffer[]) const { } + llvm::ArrayRef rawContent() const { + if ( _content != NULL ) + return llvm::ArrayRef(*_content); + else + return llvm::ArrayRef(); + } + virtual Reference::iterator referencesBegin() const; virtual Reference::iterator referencesEnd() const; private: - YAMLFile& _file; - const char * _name; - const char * _sectionName; - unsigned long _size; - unsigned int _refStartIndex; - unsigned int _refEndIndex; + YAMLFile& _file; + const char * _name; + const char * _sectionName; + std::vector* _content; + unsigned long _size; + unsigned int _refStartIndex; + unsigned int _refEndIndex; }; Reference::iterator YAMLAtom::referencesBegin() const { @@ -384,6 +423,7 @@ bool _alias; bool _autoHide; const char *_sectionName; + std::vector* _content; Reference _ref; }; @@ -395,13 +435,15 @@ , _type(KeyValues::contentTypeDefault) , _scope(KeyValues::scopeDefault) , _def(KeyValues::definitionDefault) + , _sectionChoice(KeyValues::sectionChoiceDefault) , _internalName(KeyValues::internalNameDefault) , _mergeDuplicates(KeyValues::mergeDuplicatesDefault) , _deadStrip(KeyValues::deadStripKindDefault) , _thumb(KeyValues::isThumbDefault) , _alias(KeyValues::isAliasDefault) , _autoHide(KeyValues::autoHideDefault) - , _sectionName(NULL) { + , _sectionName(NULL) + , _content(NULL) { _ref.target = NULL; _ref.addend = 0; _ref.offsetInAtom = 0; @@ -413,7 +455,7 @@ Atom *a = new YAMLAtom(_ordinal, _def, _scope, _type, _sectionChoice, _internalName, _mergeDuplicates, _autoHide, _deadStrip, _thumb, _alias, _align, f, - _name, _sectionName, _size); + _name, _sectionName, _size, _content); f._atoms.push_back(a); ++_ordinal; @@ -433,6 +475,7 @@ _alias = KeyValues::isAliasDefault; _autoHide = KeyValues::autoHideDefault; _sectionName = NULL; + _content = NULL; _ref.target = NULL; _ref.addend = 0; _ref.offsetInAtom = 0; @@ -593,7 +636,7 @@ haveAtom = true; } else if (strcmp(entry->key, KeyValues::contentKeyword) == 0) { - // TO DO: switch to content mode + atomState._content = entry->valueSequenceBytes; haveAtom = true; } else if (strcmp(entry->key, "align2") == 0) { Modified: lld/trunk/lib/Core/YamlWriter.cpp URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/Core/YamlWriter.cpp?rev=147799&r1=147798&r2=147799&view=diff ============================================================================== --- lld/trunk/lib/Core/YamlWriter.cpp (original) +++ lld/trunk/lib/Core/YamlWriter.cpp Mon Jan 9 14:18:15 2012 @@ -15,6 +15,8 @@ #include "lld/Core/Reference.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/system_error.h" @@ -141,6 +143,24 @@ } + if ( atom.contentType() != Atom::typeZeroFill ) { + _out << " " + << KeyValues::contentKeyword + << ":" + << spacePadding(KeyValues::contentKeyword) + << "[ "; + llvm::ArrayRef arr = atom.rawContent(); + bool needComma = false; + for (unsigned int i=0; i < arr.size(); ++i) { + if ( needComma ) + _out << ", "; + _out << hexdigit(arr[i] >> 4); + _out << hexdigit(arr[i] & 0x0F); + needComma = true; + } + _out << " ]\n"; + } + if (atom.referencesBegin() != atom.referencesEnd()) { _out << " fixups:\n"; for (Reference::iterator it = atom.referencesBegin(), @@ -160,7 +180,12 @@ return &spaces[strlen(key)]; } - + char hexdigit(uint8_t nibble) { + if ( nibble < 0x0A ) + return '0' + nibble; + else + return 'A' + nibble - 0x0A; + } llvm::raw_ostream& _out; bool _firstAtom; Added: lld/trunk/test/cstring-coalesce.objtxt URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/cstring-coalesce.objtxt?rev=147799&view=auto ============================================================================== --- lld/trunk/test/cstring-coalesce.objtxt (added) +++ lld/trunk/test/cstring-coalesce.objtxt Mon Jan 9 14:18:15 2012 @@ -0,0 +1,45 @@ +# RUN: lld-core %s | FileCheck %s + +# +# Test that duplicate c-strings are coalesced +# + +--- +atoms: + - name: L0 + internal-name: true + scope: hidden + type: c-string + merge-duplicates: true + content: [ 68, 65, 6c, 6c, 6f, 00 ] + + - name: L1 + internal-name: true + scope: hidden + type: c-string + merge-duplicates: true + content: [ 74, 68, 65, 72, 65, 00 ] +--- +atoms: + - name: L2 + internal-name: true + scope: hidden + type: c-string + merge-duplicates: true + content: [ 68, 65, 6c, 6c, 6f, 00 ] +--- +atoms: + - name: L2 + internal-name: true + scope: hidden + type: c-string + merge-duplicates: true + content: [ 74, 68, 65, 72, 65, 00 ] +... + +# CHECK: type: c-string +# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] +# CHECK: type: c-string +# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] +# CHECK-NOT: name: +# CHECK: ... From atrick at apple.com Mon Jan 9 15:18:52 2012 From: atrick at apple.com (Andrew Trick) Date: Mon, 09 Jan 2012 21:18:52 -0000 Subject: [llvm-commits] [llvm] r147801 - in /llvm/trunk: lib/Transforms/Scalar/LoopStrengthReduce.cpp test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll Message-ID: <20120109211852.52AF41BE003@llvm.org> Author: atrick Date: Mon Jan 9 15:18:52 2012 New Revision: 147801 URL: http://llvm.org/viewvc/llvm-project?rev=147801&view=rev Log: Adding IV chain generation to LSR. After collecting chains, check if any should be materialized. If so, hide the chained IV users from the LSR solver. LSR will only solve for the head of the chain. GenerateIVChains will then materialize the chained IV users by computing the IV relative to its previous value in the chain. In theory, chained IV users could be exposed to LSR's solver. This would be considerably complicated to implement and I'm not aware of a case where we need it. In practice it's more important to intelligently prune the search space of nontrivial loops before running the solver, otherwise the solver is often forced to prune the most optimal solutions. Hiding the chained users does this well, so that LSR is more likely to find the best IV for the chain as a whole. Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=147801&r1=147800&r2=147801&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Jan 9 15:18:52 2012 @@ -91,6 +91,15 @@ "enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination")); +#ifndef NDEBUG +// Stress test IV chain generation. +static cl::opt StressIVChain( + "stress-ivchain", cl::Hidden, cl::init(false), + cl::desc("Stress test LSR IV chains")); +#else +static bool StressIVChain = false; +#endif + namespace { /// RegSortData - This class holds data which is used to order reuse candidates. @@ -1415,6 +1424,9 @@ /// IVChainVec - IV users can form a chain of IV increments. SmallVector IVChainVec; + /// IVIncSet - IV users that belong to profitable IVChains. + SmallPtrSet IVIncSet; + void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); @@ -1422,7 +1434,10 @@ void ChainInstruction(Instruction *UserInst, Instruction *IVOper, SmallVectorImpl &ChainUsersVec); + void FinalizeChain(IVChain &Chain); void CollectChains(); + void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, + SmallVectorImpl &DeadInsts); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -2189,6 +2204,48 @@ return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy()); } +/// Return true if the chain increment is profitable to expand into a loop +/// invariant value, which may require its own register. A profitable chain +/// increment will be an offset relative to the same base. We allow such offsets +/// to potentially be used as chain increment as long as it's not obviously +/// expensive to expand using real instructions. +static const SCEV * +getProfitableChainIncrement(Value *NextIV, Value *PrevIV, + const IVChain &Chain, Loop *L, + ScalarEvolution &SE, const TargetLowering *TLI) { + const SCEV *IncExpr = SE.getMinusSCEV(SE.getSCEV(NextIV), SE.getSCEV(PrevIV)); + if (!SE.isLoopInvariant(IncExpr, L)) + return 0; + + // We are not able to expand an increment unless it is loop invariant, + // however, the following checks are purely for profitability. + if (StressIVChain) + return IncExpr; + + // Unimplemented + return 0; +} + +/// Return true if the number of registers needed for the chain is estimated to +/// be less than the number required for the individual IV users. First prohibit +/// any IV users that keep the IV live across increments (the Users set should +/// be empty). Next count the number and type of increments in the chain. +/// +/// Chaining IVs can lead to considerable code bloat if ISEL doesn't +/// effectively use postinc addressing modes. Only consider it profitable it the +/// increments can be computed in fewer registers when chained. +/// +/// TODO: Consider IVInc free if it's already used in another chains. +static bool +isProfitableChain(IVChain &Chain, SmallPtrSet &Users, + ScalarEvolution &SE, const TargetLowering *TLI) { + if (StressIVChain) + return true; + + // Unimplemented + return false; +} + /// ChainInstruction - Add this IV user to an existing chain or make it the head /// of a new chain. void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, @@ -2211,9 +2268,9 @@ && isa(IVChainVec[ChainIdx].back().UserInst)) continue; - const SCEV *IncExpr = SE.getMinusSCEV(SE.getSCEV(NextIV), - SE.getSCEV(PrevIV)); - if (SE.isLoopInvariant(IncExpr, L)) { + if (const SCEV *IncExpr = + getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx], + L, SE, TLI)) { LastIncExpr = IncExpr; break; } @@ -2223,7 +2280,7 @@ if (ChainIdx == NChains) { if (isa(UserInst)) return; - if (NChains >= MaxChains) { + if (NChains >= MaxChains && !StressIVChain) { DEBUG(dbgs() << "IV Chain Limit\n"); return; } @@ -2349,13 +2406,173 @@ if (IncV) ChainInstruction(PN, IncV, ChainUsersVec); } + // Remove any unprofitable chains. + unsigned ChainIdx = 0; + for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); + UsersIdx < NChains; ++UsersIdx) { + if (!isProfitableChain(IVChainVec[UsersIdx], + ChainUsersVec[UsersIdx].FarUsers, SE, TLI)) + continue; + // Preserve the chain at UsesIdx. + if (ChainIdx != UsersIdx) + IVChainVec[ChainIdx] = IVChainVec[UsersIdx]; + FinalizeChain(IVChainVec[ChainIdx]); + ++ChainIdx; + } + IVChainVec.resize(ChainIdx); +} + +void LSRInstance::FinalizeChain(IVChain &Chain) { + assert(!Chain.empty() && "empty IV chains are not allowed"); + DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n"); + + for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end(); + I != E; ++I) { + DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n"); + User::op_iterator UseI = + std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand); + assert(UseI != I->UserInst->op_end() && "cannot find IV operand"); + IVIncSet.insert(UseI); + } +} + +/// Return true if the IVInc can be folded into an addressing mode. +static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, + Value *Operand, const TargetLowering *TLI) { + const SCEVConstant *IncConst = dyn_cast(IncExpr); + if (!IncConst || !isAddressUse(UserInst, Operand)) + return false; + + if (IncConst->getValue()->getValue().getMinSignedBits() > 64) + return false; + + int64_t IncOffset = IncConst->getValue()->getSExtValue(); + if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false, + LSRUse::Address, getAccessType(UserInst), TLI)) + return false; + + return true; +} + +/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to +/// materialize the IV user's operand from the previous IV user's operand. +void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, + SmallVectorImpl &DeadInsts) { + // Find the new IVOperand for the head of the chain. It may have been replaced + // by LSR. + const IVInc &Head = Chain[0]; + User::op_iterator IVOpEnd = Head.UserInst->op_end(); + User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), + IVOpEnd, L, SE); + Value *IVSrc = 0; + while (IVOpIter != IVOpEnd) { + IVSrc = getWideOperand(*IVOpIter); + + // If this operand computes the expression that the chain needs, we may use + // it. (Check this after setting IVSrc which is used below.) + // + // Note that if Head.IncExpr is wider than IVSrc, then this phi is too + // narrow for the chain, so we can no longer use it. We do allow using a + // wider phi, assuming the LSR checked for free truncation. In that case we + // should already have a truncate on this operand such that + // getSCEV(IVSrc) == IncExpr. + if (SE.getSCEV(*IVOpIter) == Head.IncExpr + || SE.getSCEV(IVSrc) == Head.IncExpr) { + break; + } + IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE); + } + if (IVOpIter == IVOpEnd) { + // Gracefully give up on this chain. + DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); + return; + } + + DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); + Type *IVTy = IVSrc->getType(); + Type *IntTy = SE.getEffectiveSCEVType(IVTy); + const SCEV *LeftOverExpr = 0; + for (IVChain::const_iterator IncI = llvm::next(Chain.begin()), + IncE = Chain.end(); IncI != IncE; ++IncI) { + + Instruction *InsertPt = IncI->UserInst; + if (isa(InsertPt)) + InsertPt = L->getLoopLatch()->getTerminator(); + + // IVOper will replace the current IV User's operand. IVSrc is the IV + // value currently held in a register. + Value *IVOper = IVSrc; + if (!IncI->IncExpr->isZero()) { + // IncExpr was the result of subtraction of two narrow values, so must + // be signed. + const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy); + LeftOverExpr = LeftOverExpr ? + SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr; + } + if (LeftOverExpr && !LeftOverExpr->isZero()) { + // Expand the IV increment. + Rewriter.clearPostInc(); + Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt); + const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc), + SE.getUnknown(IncV)); + IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); + + // If an IV increment can't be folded, use it as the next IV value. + if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand, + TLI)) { + assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); + IVSrc = IVOper; + LeftOverExpr = 0; + } + } + Type *OperTy = IncI->IVOperand->getType(); + if (IVTy != OperTy) { + assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && + "cannot extend a chained IV"); + IRBuilder<> Builder(InsertPt); + IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain"); + } + IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper); + DeadInsts.push_back(IncI->IVOperand); + } + // If LSR created a new, wider phi, we may also replace its postinc. We only + // do this if we also found a wide value for the head of the chain. + if (isa(Chain.back().UserInst)) { + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *Phi = dyn_cast(I); ++I) { + if (!isCompatibleIVType(Phi, IVSrc)) + continue; + Instruction *PostIncV = dyn_cast( + Phi->getIncomingValueForBlock(L->getLoopLatch())); + if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc))) + continue; + Value *IVOper = IVSrc; + Type *PostIncTy = PostIncV->getType(); + if (IVTy != PostIncTy) { + assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types"); + IRBuilder<> Builder(L->getLoopLatch()->getTerminator()); + Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc()); + IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); + } + Phi->replaceUsesOfWith(PostIncV, IVOper); + DeadInsts.push_back(PostIncV); + } + } } void LSRInstance::CollectFixupsAndInitialFormulae() { for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + Instruction *UserInst = UI->getUser(); + // Skip IV users that are part of profitable IV Chains. + User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(), + UI->getOperandValToReplace()); + assert(UseI != UserInst->op_end() && "cannot find IV operand"); + if (IVIncSet.count(UseI)) + continue; + // Record the uses. LSRFixup &LF = getNewFixup(); - LF.UserInst = UI->getUser(); + LF.UserInst = UserInst; LF.OperandValToReplace = UI->getOperandValToReplace(); LF.PostIncLoops = UI->getPostIncLoops(); @@ -4073,6 +4290,11 @@ Changed = true; } + for (SmallVectorImpl::const_iterator ChainI = IVChainVec.begin(), + ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) { + GenerateIVChain(*ChainI, Rewriter, DeadInsts); + Changed = true; + } // Clean up after ourselves. This must be done before deleting any // instructions. Rewriter.clear(); @@ -4123,6 +4345,7 @@ CollectFixupsAndInitialFormulae(); CollectLoopInvariantFixupsAndFormulae(); + assert(!Uses.empty() && "IVUsers reported at least one use"); DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; print_uses(dbgs())); Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll?rev=147801&view=auto ============================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll (added) +++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll Mon Jan 9 15:18:52 2012 @@ -0,0 +1,96 @@ +; REQUIRES: asserts +; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32 + +; @sharedidx is an unrolled variant of this loop: +; for (unsigned long i = 0; i < len; i += s) { +; c[i] = a[i] + b[i]; +; } +; where 's' cannot be folded into the addressing mode. +; +; This is not quite profitable to chain. But with -stress-ivchain, we +; can form three address chains in place of the shared induction +; variable. + +; X64: sharedidx: +; X64: %for.body.preheader +; X64-NOT: leal ({{.*}},4) +; X64: %for.body.1 + +; X32: sharedidx: +; X32: %for.body.2 +; X32: add +; X32: add +; X32: add +; X32: add +; X32: add +; X32: %for.body.3 +define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp { +entry: + %cmp8 = icmp eq i32 %len, 0 + br i1 %cmp8, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body.3 + %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8* %a, i32 %i.09 + %0 = load i8* %arrayidx, align 1 + %conv6 = zext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09 + %1 = load i8* %arrayidx1, align 1 + %conv27 = zext i8 %1 to i32 + %add = add nsw i32 %conv27, %conv6 + %conv3 = trunc i32 %add to i8 + %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09 + store i8 %conv3, i8* %arrayidx4, align 1 + %add5 = add i32 %i.09, %s + %cmp = icmp ult i32 %add5, %len + br i1 %cmp, label %for.body.1, label %for.end + +for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry + ret void + +for.body.1: ; preds = %for.body + %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5 + %2 = load i8* %arrayidx.1, align 1 + %conv6.1 = zext i8 %2 to i32 + %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5 + %3 = load i8* %arrayidx1.1, align 1 + %conv27.1 = zext i8 %3 to i32 + %add.1 = add nsw i32 %conv27.1, %conv6.1 + %conv3.1 = trunc i32 %add.1 to i8 + %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5 + store i8 %conv3.1, i8* %arrayidx4.1, align 1 + %add5.1 = add i32 %add5, %s + %cmp.1 = icmp ult i32 %add5.1, %len + br i1 %cmp.1, label %for.body.2, label %for.end + +for.body.2: ; preds = %for.body.1 + %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1 + %4 = load i8* %arrayidx.2, align 1 + %conv6.2 = zext i8 %4 to i32 + %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1 + %5 = load i8* %arrayidx1.2, align 1 + %conv27.2 = zext i8 %5 to i32 + %add.2 = add nsw i32 %conv27.2, %conv6.2 + %conv3.2 = trunc i32 %add.2 to i8 + %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1 + store i8 %conv3.2, i8* %arrayidx4.2, align 1 + %add5.2 = add i32 %add5.1, %s + %cmp.2 = icmp ult i32 %add5.2, %len + br i1 %cmp.2, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.body.2 + %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2 + %6 = load i8* %arrayidx.3, align 1 + %conv6.3 = zext i8 %6 to i32 + %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2 + %7 = load i8* %arrayidx1.3, align 1 + %conv27.3 = zext i8 %7 to i32 + %add.3 = add nsw i32 %conv27.3, %conv6.3 + %conv3.3 = trunc i32 %add.3 to i8 + %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2 + store i8 %conv3.3, i8* %arrayidx4.3, align 1 + %add5.3 = add i32 %add5.2, %s + %cmp.3 = icmp ult i32 %add5.3, %len + br i1 %cmp.3, label %for.body, label %for.end +} From dpatel at apple.com Mon Jan 9 15:30:46 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 09 Jan 2012 21:30:46 -0000 Subject: [llvm-commits] [llvm] r147802 - /llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Message-ID: <20120109213046.813711BE003@llvm.org> Author: dpatel Date: Mon Jan 9 15:30:46 2012 New Revision: 147802 URL: http://llvm.org/viewvc/llvm-project?rev=147802&view=rev Log: Use descriptive variable name and remove incorrect operand number check. Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp?rev=147802&r1=147801&r2=147802&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Mon Jan 9 15:30:46 2012 @@ -806,20 +806,17 @@ return true; } -/// extractSingletonRegisterForAsmOperand - Extract singleton register, if present, -/// from specified token. +/// extractSingletonRegisterForAsmOperand - Extract singleton register, +/// if present, from specified token. void MatchableInfo:: -extractSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info, +extractSingletonRegisterForAsmOperand(unsigned OperandNo, + const AsmMatcherInfo &Info, std::string &RegisterPrefix) { - StringRef Tok = AsmOperands[i].Token; + StringRef Tok = AsmOperands[OperandNo].Token; if (RegisterPrefix.empty()) { - if (i) { - std::string LoweredTok = Tok.lower(); - if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok)) - AsmOperands[i].SingletonReg = Reg->TheDef; - } else - if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(Tok)) - AsmOperands[i].SingletonReg = Reg->TheDef; + std::string LoweredTok = Tok.lower(); + if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok)) + AsmOperands[OperandNo].SingletonReg = Reg->TheDef; return; } @@ -828,7 +825,7 @@ StringRef RegName = Tok.substr(RegisterPrefix.size()); if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName)) - AsmOperands[i].SingletonReg = Reg->TheDef; + AsmOperands[OperandNo].SingletonReg = Reg->TheDef; // If there is no register prefix (i.e. "%" in "%eax"), then this may // be some random non-register token, just ignore it. From dpatel at apple.com Mon Jan 9 15:32:02 2012 From: dpatel at apple.com (Devang Patel) Date: Mon, 09 Jan 2012 21:32:02 -0000 Subject: [llvm-commits] [llvm] r147805 - in /llvm/trunk/lib/Target/X86: X86InstrSystem.td X86InstrVMX.td Message-ID: <20120109213202.67F4C1BE003@llvm.org> Author: dpatel Date: Mon Jan 9 15:32:02 2012 New Revision: 147805 URL: http://llvm.org/viewvc/llvm-project?rev=147805&view=rev Log: Fix asm string wrt variants. Modified: llvm/trunk/lib/Target/X86/X86InstrSystem.td llvm/trunk/lib/Target/X86/X86InstrVMX.td Modified: llvm/trunk/lib/Target/X86/X86InstrSystem.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSystem.td?rev=147805&r1=147804&r2=147805&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSystem.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSystem.td Mon Jan 9 15:32:02 2012 @@ -214,18 +214,18 @@ def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins), - "str{w}\t{$dst}", []>, TB, OpSize; + "str{w}\t$dst", []>, TB, OpSize; def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins), - "str{l}\t{$dst}", []>, TB; + "str{l}\t$dst", []>, TB; def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins), - "str{q}\t{$dst}", []>, TB; + "str{q}\t$dst", []>, TB; def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), - "str{w}\t{$dst}", []>, TB; + "str{w}\t$dst", []>, TB; def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), - "ltr{w}\t{$src}", []>, TB; + "ltr{w}\t$src", []>, TB; def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), - "ltr{w}\t{$src}", []>, TB; + "ltr{w}\t$src", []>, TB; def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize; Modified: llvm/trunk/lib/Target/X86/X86InstrVMX.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrVMX.td?rev=147805&r1=147804&r2=147805&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrVMX.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrVMX.td Mon Jan 9 15:32:02 2012 @@ -60,5 +60,5 @@ // 0F 01 C4 def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), - "vmxon\t{$vmxon}", []>, XS; + "vmxon\t$vmxon", []>, XS; From rafael.espindola at gmail.com Mon Jan 9 15:51:53 2012 From: rafael.espindola at gmail.com (=?ISO-8859-1?Q?Rafael_=C1vila_de_Esp=EDndola?=) Date: Mon, 09 Jan 2012 16:51:53 -0500 Subject: [llvm-commits] [PATCH] Segmented stack fixes, support for Mac, Win32, FreeBSD In-Reply-To: References: Message-ID: <4F0B6179.2010308@gmail.com> > Thanks for the review, Anton. Attached are new patches with style fixes, including converting tabs to spaces. Some of the original comments still apply: On patch 01: What was failing? Do you have a testcase that can be added? On patch 05: Please name variables according to: http://llvm.org/docs/CodingStandards.html#ll_naming What is the "primary" argument for? Please add a comment On patch 06 (was 07) Is the assert assert(false && "Unhandled case in adjustForSegmentedStacks"); user visible? i.e., can I hit it running llc targeting an unsupported OS? Needs a test. Cheers, Rafael From stoklund at 2pi.dk Mon Jan 9 16:16:24 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 09 Jan 2012 22:16:24 -0000 Subject: [llvm-commits] [llvm] r147806 - /llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp Message-ID: <20120109221624.5C5C21BE003@llvm.org> Author: stoklund Date: Mon Jan 9 16:16:24 2012 New Revision: 147806 URL: http://llvm.org/viewvc/llvm-project?rev=147806&view=rev Log: Catch runaway ARMConstantIslandPass even in -Asserts builds. The pass is prone to looping, and it is better to crash than loop forever, even in a -Asserts build. Modified: llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp Modified: llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp?rev=147806&r1=147805&r2=147806&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp Mon Jan 9 16:16:24 2012 @@ -435,7 +435,7 @@ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) CPChange |= HandleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) - llvm_unreachable("Constant Island pass failed to converge!"); + report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); // Clear NewWaterList now. If we split a block for branches, it should @@ -447,7 +447,7 @@ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) BRChange |= FixUpImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) - llvm_unreachable("Branch Fix Up pass failed to converge!"); + report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); if (!CPChange && !BRChange) From kcc at google.com Mon Jan 9 16:20:49 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 22:20:49 -0000 Subject: [llvm-commits] [compiler-rt] r147807 - in /compiler-rt/trunk/lib/asan: asan_interceptors.cc asan_interceptors.h asan_rtl.cc Message-ID: <20120109222049.AE01E1BE003@llvm.org> Author: kcc Date: Mon Jan 9 16:20:49 2012 New Revision: 147807 URL: http://llvm.org/viewvc/llvm-project?rev=147807&view=rev Log: [asan] don't use strstr/strncat from libc, use our own versions instead Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/asan_interceptors.h compiler-rt/trunk/lib/asan/asan_rtl.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=147807&r1=147806&r2=147807&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 9 16:20:49 2012 @@ -162,6 +162,26 @@ return 0; } +char *internal_strstr(const char *haystack, const char *needle) { + // This is O(N^2), but we are not using it in hot places. + size_t len1 = internal_strlen(haystack); + size_t len2 = internal_strlen(needle); + if (len1 < len2) return 0; + for (size_t pos = 0; pos <= len1 - len2; pos++) { + if (internal_memcmp(haystack + pos, needle, len2) == 0) + return (char*)haystack + pos; + } + return 0; +} + +char *internal_strncat(char *dst, const char *src, size_t n) { + size_t len = internal_strlen(dst); + size_t i; + for (i = 0; i < n && src[i]; i++) + dst[len + i] = src[i]; + dst[len + i] = 0; + return dst; +} } // namespace __asan Modified: compiler-rt/trunk/lib/asan/asan_interceptors.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.h?rev=147807&r1=147806&r2=147807&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.h (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.h Mon Jan 9 16:20:49 2012 @@ -113,6 +113,9 @@ size_t internal_strnlen(const char *s, size_t maxlen); void* internal_memchr(const void* s, int c, size_t n); int internal_memcmp(const void* s1, const void* s2, size_t n); +char *internal_strstr(const char *haystack, const char *needle); +char *internal_strncat(char *dst, const char *src, size_t n); + // Initializes pointers to str*/mem* functions. void InitializeAsanInterceptors(); Modified: compiler-rt/trunk/lib/asan/asan_rtl.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_rtl.cc?rev=147807&r1=147806&r2=147807&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_rtl.cc (original) +++ compiler-rt/trunk/lib/asan/asan_rtl.cc Mon Jan 9 16:20:49 2012 @@ -24,8 +24,6 @@ #include "asan_thread.h" #include "asan_thread_registry.h" -#include - namespace __asan { // -------------------------- Flags ------------------------- {{{1 @@ -172,8 +170,9 @@ const char *name_end = real_strchr(frame_descr, ' '); CHECK(name_end); buf[0] = 0; - strncat(buf, frame_descr, - Min(kBufSize, static_cast(name_end - frame_descr))); + internal_strncat(buf, frame_descr, + Min(kBufSize, + static_cast(name_end - frame_descr))); Printf("Address %p is located at offset %ld " "in frame <%s> of T%d's stack:\n", addr, offset, buf, t->tid()); @@ -196,7 +195,7 @@ } p++; buf[0] = 0; - strncat(buf, p, Min(kBufSize, len)); + internal_strncat(buf, p, Min(kBufSize, len)); p += len; Printf(" [%ld, %ld) '%s'\n", beg, beg + size, buf); } @@ -269,7 +268,7 @@ static int64_t IntFlagValue(const char *flags, const char *flag, int64_t default_val) { if (!flags) return default_val; - const char *str = strstr(flags, flag); + const char *str = internal_strstr(flags, flag); if (!str) return default_val; return atoll(str + internal_strlen(flag)); } From kcc at google.com Mon Jan 9 16:36:51 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 22:36:51 -0000 Subject: [llvm-commits] [compiler-rt] r147809 - /compiler-rt/trunk/lib/asan/asan_interceptors.cc Message-ID: <20120109223651.C52571BE003@llvm.org> Author: kcc Date: Mon Jan 9 16:36:51 2012 New Revision: 147809 URL: http://llvm.org/viewvc/llvm-project?rev=147809&view=rev Log: [asan] don't include string.h and strings.h Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=147809&r1=147808&r2=147809&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 9 16:36:51 2012 @@ -25,8 +25,6 @@ #include #include #include -#include -#include namespace __asan { @@ -330,6 +328,7 @@ return c1_low - c2_low; } +extern "C" int WRAP(memcmp)(const void *a1, const void *a2, size_t size) { ENSURE_ASAN_INITED(); unsigned char c1 = 0, c2 = 0; @@ -346,6 +345,7 @@ return CharCmp(c1, c2); } +extern "C" void *WRAP(memcpy)(void *to, const void *from, size_t size) { // memcpy is called during __asan_init() from the internals // of printf(...). @@ -361,6 +361,7 @@ return real_memcpy(to, from, size); } +extern "C" void *WRAP(memmove)(void *to, const void *from, size_t size) { ENSURE_ASAN_INITED(); if (FLAG_replace_intrin) { @@ -370,6 +371,7 @@ return real_memmove(to, from, size); } +extern "C" void *WRAP(memset)(void *block, int c, size_t size) { // memset is called inside INTERCEPT_FUNCTION on Mac. if (asan_init_is_running) { @@ -382,15 +384,13 @@ return real_memset(block, c, size); } -// Note that on Linux index and strchr are definined differently depending on -// the compiler (gcc vs clang). -// see __CORRECT_ISO_CPP_STRING_H_PROTO in /usr/include/string.h - #ifndef __APPLE__ +extern "C" char *WRAP(index)(const char *str, int c) __attribute__((alias(WRAPPER_NAME(strchr)))); #endif +extern "C" char *WRAP(strchr)(const char *str, int c) { ENSURE_ASAN_INITED(); char *result = real_strchr(str, c); @@ -401,6 +401,7 @@ return result; } +extern "C" int WRAP(strcasecmp)(const char *s1, const char *s2) { ENSURE_ASAN_INITED(); unsigned char c1, c2; @@ -415,6 +416,7 @@ return CharCaseCmp(c1, c2); } +extern "C" char *WRAP(strcat)(char *to, const char *from) { // NOLINT ENSURE_ASAN_INITED(); if (FLAG_replace_str) { @@ -430,6 +432,7 @@ return real_strcat(to, from); } +extern "C" int WRAP(strcmp)(const char *s1, const char *s2) { // strcmp is called from malloc_default_purgeable_zone() // in __asan::ReplaceSystemAlloc() on Mac. @@ -448,6 +451,7 @@ return CharCmp(c1, c2); } +extern "C" char *WRAP(strcpy)(char *to, const char *from) { // NOLINT // strcpy is called from malloc_default_purgeable_zone() // in __asan::ReplaceSystemAlloc() on Mac. @@ -464,6 +468,7 @@ return real_strcpy(to, from); } +extern "C" char *WRAP(strdup)(const char *s) { ENSURE_ASAN_INITED(); if (FLAG_replace_str) { @@ -473,6 +478,7 @@ return real_strdup(s); } +extern "C" size_t WRAP(strlen)(const char *s) { // strlen is called from malloc_default_purgeable_zone() // in __asan::ReplaceSystemAlloc() on Mac. @@ -487,6 +493,7 @@ return length; } +extern "C" int WRAP(strncasecmp)(const char *s1, const char *s2, size_t size) { ENSURE_ASAN_INITED(); unsigned char c1 = 0, c2 = 0; @@ -501,6 +508,7 @@ return CharCaseCmp(c1, c2); } +extern "C" int WRAP(strncmp)(const char *s1, const char *s2, size_t size) { // strncmp is called from malloc_default_purgeable_zone() // in __asan::ReplaceSystemAlloc() on Mac. @@ -519,6 +527,7 @@ return CharCmp(c1, c2); } +extern "C" char *WRAP(strncpy)(char *to, const char *from, size_t size) { ENSURE_ASAN_INITED(); if (FLAG_replace_str) { @@ -531,6 +540,7 @@ } #ifndef __APPLE__ +extern "C" size_t WRAP(strnlen)(const char *s, size_t maxlen) { ENSURE_ASAN_INITED(); size_t length = real_strnlen(s, maxlen); From kcc at google.com Mon Jan 9 16:45:05 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 22:45:05 -0000 Subject: [llvm-commits] [compiler-rt] r147810 - /compiler-rt/trunk/lib/asan/asan_interceptors.cc Message-ID: <20120109224505.DA91A1BE003@llvm.org> Author: kcc Date: Mon Jan 9 16:45:05 2012 New Revision: 147810 URL: http://llvm.org/viewvc/llvm-project?rev=147810&view=rev Log: [asan] temporary reinstate string.h/strings.h. Removal of those caused a Mac build failulre which I failed to observe before the commit Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=147810&r1=147809&r2=147810&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jan 9 16:45:05 2012 @@ -26,6 +26,9 @@ #include #include +#include +#include + namespace __asan { typedef void (*longjmp_f)(void *env, int val); From eli.friedman at gmail.com Mon Jan 9 16:58:41 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Mon, 9 Jan 2012 14:58:41 -0800 Subject: [llvm-commits] [llvm] r147749 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp test/Transforms/InstCombine/sign-test-and-or.ll In-Reply-To: <20120108183224.701CE2A6C12C@llvm.org> References: <20120108183224.701CE2A6C12C@llvm.org> Message-ID: On Sun, Jan 8, 2012 at 10:32 AM, Benjamin Kramer wrote: > Author: d0k > Date: Sun Jan ?8 12:32:24 2012 > New Revision: 147749 > > URL: http://llvm.org/viewvc/llvm-project?rev=147749&view=rev > Log: > InstCombine: If we have a bit test and a sign test anded/ored together, merge the sign bit into the bit test. > > This is common in bit field code, e.g. checking if the first or the last bit of a bit field is set. > > Modified: > ? ?llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp > ? ?llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll > > Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=147749&r1=147748&r2=147749&view=diff > ============================================================================== > --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original) > +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Sun Jan ?8 12:32:24 2012 > @@ -743,6 +743,22 @@ > ? ? ? } > ? ? } > ? } > + > + ?// (X & C) == 0 & X > -1 ?-> ?(X & (C | SignBit)) == 0 > + ?if (LHS->hasOneUse() && RHS->hasOneUse() && > + ? ? ?((LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero() && > + ? ? ? ?RHSCC == ICmpInst::ICMP_SGT && RHSCst->isAllOnesValue()) || > + ? ? ? (RHSCC == ICmpInst::ICMP_EQ && RHSCst->isZero() && > + ? ? ? ?LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()))) { > + ? ?BinaryOperator *BO = > + ? ? ?dyn_cast(LHSCC == ICmpInst::ICMP_EQ ? Val : Val2); > + ? ?ConstantInt *AndCst; > + ? ?if (BO && match(BO, m_OneUse(m_And(m_Value(), m_ConstantInt(AndCst))))) { > + ? ? ?APInt New = AndCst->getValue() | APInt::getSignBit(AndCst->getBitWidth()); > + ? ? ?BO->setOperand(1, ConstantInt::get(AndCst->getContext(), New)); > + ? ? ?return BO == Val ? LHS : RHS; > + ? ?} > + ?} There's a rather nasty mistake in this transform as written: it will transform "(X & C) == 0 & Y > -1 ?-> ?(X & (C | SignBit)) == 0". I believe this is already fixed by r147777, but I figured I would note it here anyway. -Eli From eli.friedman at gmail.com Mon Jan 9 17:05:03 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Mon, 9 Jan 2012 15:05:03 -0800 Subject: [llvm-commits] [llvm] r147777 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp test/Transforms/InstCombine/sign-test-and-or.ll In-Reply-To: <20120109172327.A6DA21BE003@llvm.org> References: <20120109172327.A6DA21BE003@llvm.org> Message-ID: On Mon, Jan 9, 2012 at 9:23 AM, Benjamin Kramer wrote: > Author: d0k > Date: Mon Jan ?9 11:23:27 2012 > New Revision: 147777 > > URL: http://llvm.org/viewvc/llvm-project?rev=147777&view=rev > Log: > InstCombine: Teach foldLogOpOfMaskedICmpsHelper that sign bit tests are bit tests. > > This subsumes several other transforms while enabling us to catch more cases. This commit appears to be causing failures on http://lab.llvm.org:8011/builders/llvm-arm-linux/builds/419 . Any ideas? -Eli > Modified: > ? ?llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp > ? ?llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll > > Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=147777&r1=147776&r2=147777&view=diff > ============================================================================== > --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original) > +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Mon Jan ?9 11:23:27 2012 > @@ -496,6 +496,38 @@ > ? return result; > ?} > > +/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z) > +/// if possible. The returned predicate is either == or !=. Returns false if > +/// decomposition fails. > +static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Value *&X, Value *&Y, Value *&Z) { > + ?// X < 0 is equivalent to (X & SignBit) != 0. > + ?if (I->getPredicate() == ICmpInst::ICMP_SLT) > + ? ?if (ConstantInt *C = dyn_cast(I->getOperand(1))) > + ? ? ?if (C->isZero()) { > + ? ? ? ?X = I->getOperand(0); > + ? ? ? ?Y = ConstantInt::get(I->getContext(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? APInt::getSignBit(C->getBitWidth())); > + ? ? ? ?Pred = ICmpInst::ICMP_NE; > + ? ? ? ?Z = C; > + ? ? ? ?return true; > + ? ? ?} > + > + ?// X > -1 is equivalent to (X & SignBit) == 0. > + ?if (I->getPredicate() == ICmpInst::ICMP_SGT) > + ? ?if (ConstantInt *C = dyn_cast(I->getOperand(1))) > + ? ? ?if (C->isAllOnesValue()) { > + ? ? ? ?X = I->getOperand(0); > + ? ? ? ?Y = ConstantInt::get(I->getContext(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? APInt::getSignBit(C->getBitWidth())); > + ? ? ? ?Pred = ICmpInst::ICMP_EQ; > + ? ? ? ?Z = ConstantInt::getNullValue(C->getType()); > + ? ? ? ?return true; > + ? ? ?} > + > + ?return false; > +} > + > ?/// foldLogOpOfMaskedICmpsHelper: > ?/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) > ?/// return the set of pattern classes (from MaskedICmpType) > @@ -503,10 +535,9 @@ > ?static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Value*& B, Value*& C, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Value*& D, Value*& E, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ICmpInst *LHS, ICmpInst *RHS) { > - ?ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); > - ?if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0; > - ?if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0; > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ICmpInst *LHS, ICmpInst *RHS, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ICmpInst::Predicate &LHSCC, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ICmpInst::Predicate &RHSCC) { > ? if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; > ? // vectors are not (yet?) supported > ? if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; > @@ -520,40 +551,60 @@ > ? Value *L1 = LHS->getOperand(0); > ? Value *L2 = LHS->getOperand(1); > ? Value *L11,*L12,*L21,*L22; > - ?if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { > - ? ?if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) > + ?// Check whether the icmp can be decomposed into a bit test. > + ?if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { > + ? ?L21 = L22 = L1 = 0; > + ?} else { > + ? ?// Look for ANDs in the LHS icmp. > + ? ?if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { > + ? ? ?if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) > + ? ? ? ?L21 = L22 = 0; > + ? ?} else { > + ? ? ?if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) > + ? ? ? ?return 0; > + ? ? ?std::swap(L1, L2); > ? ? ? L21 = L22 = 0; > - ?} > - ?else { > - ? ?if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) > - ? ? ?return 0; > - ? ?std::swap(L1, L2); > - ? ?L21 = L22 = 0; > + ? ?} > ? } > > + ?// Bail if LHS was a icmp that can't be decomposed into an equality. > + ?if (!ICmpInst::isEquality(LHSCC)) > + ? ?return 0; > + > ? Value *R1 = RHS->getOperand(0); > ? Value *R2 = RHS->getOperand(1); > ? Value *R11,*R12; > ? bool ok = false; > - ?if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { > - ? ?if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { > - ? ? ?A = R11; D = R12; E = R2; ok = true; > + ?if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) { > + ? ?if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { > + ? ? ?A = R11; D = R12; > + ? ?} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { > + ? ? ?A = R12; D = R11; > + ? ?} else { > + ? ? ?return 0; > ? ? } > - ? ?else > - ? ?if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { > + ? ?E = R2; R1 = 0; ok = true; > + ?} else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { > + ? ?if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { > + ? ? ?A = R11; D = R12; E = R2; ok = true; > + ? ?} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { > ? ? ? A = R12; D = R11; E = R2; ok = true; > ? ? } > ? } > + > + ?// Bail if RHS was a icmp that can't be decomposed into an equality. > + ?if (!ICmpInst::isEquality(RHSCC)) > + ? ?return 0; > + > + ?// Look for ANDs in on the right side of the RHS icmp. > ? if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { > - ? ?if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { > - ? ? ? A = R11; D = R12; E = R1; ok = true; > - ? ?} > - ? ?else > - ? ?if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { > + ? ?if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { > + ? ? ?A = R11; D = R12; E = R1; ok = true; > + ? ?} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { > ? ? ? A = R12; D = R11; E = R1; ok = true; > - ? ?} > - ? ?else > + ? ?} else { > ? ? ? return 0; > + ? ?} > ? } > ? if (!ok) > ? ? return 0; > @@ -582,7 +633,11 @@ > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ICmpInst::Predicate NEWCC, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?llvm::InstCombiner::BuilderTy* Builder) { > ? Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; > - ?unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS); > + ?ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); > + ?unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? LHSCC, RHSCC); > + ?assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && > + ? ? ? ? "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); > ? if (mask == 0) return 0; > > ? if (NEWCC == ICmpInst::ICMP_NE) > @@ -631,11 +686,11 @@ > > ? ? ConstantInt *CCst = dyn_cast(C); > ? ? if (CCst == 0) return 0; > - ? ?if (LHS->getPredicate() != NEWCC) > + ? ?if (LHSCC != NEWCC) > ? ? ? CCst = dyn_cast( ConstantExpr::getXor(BCst, CCst) ); > ? ? ConstantInt *ECst = dyn_cast(E); > ? ? if (ECst == 0) return 0; > - ? ?if (RHS->getPredicate() != NEWCC) > + ? ?if (RHSCC != NEWCC) > ? ? ? ECst = dyn_cast( ConstantExpr::getXor(DCst, ECst) ); > ? ? ConstantInt* MCst = dyn_cast( > ? ? ? ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), > @@ -694,18 +749,6 @@ > ? ? ? Value *NewOr = Builder->CreateOr(Val, Val2); > ? ? ? return Builder->CreateICmp(LHSCC, NewOr, LHSCst); > ? ? } > - > - ? ?// (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) > - ? ?if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { > - ? ? ?Value *NewAnd = Builder->CreateAnd(Val, Val2); > - ? ? ?return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); > - ? ?} > - > - ? ?// (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) > - ? ?if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { > - ? ? ?Value *NewOr = Builder->CreateOr(Val, Val2); > - ? ? ?return Builder->CreateICmp(LHSCC, NewOr, LHSCst); > - ? ?} > ? } > > ? // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 > @@ -744,21 +787,6 @@ > ? ? } > ? } > > - ?// (X & C) == 0 & X > -1 ?-> ?(X & (C | SignBit)) == 0 > - ?if ((LHSCC == ICmpInst::ICMP_EQ ?&& LHSCst->isZero() && > - ? ? ? RHSCC == ICmpInst::ICMP_SGT && RHSCst->isAllOnesValue()) || > - ? ? ?(RHSCC == ICmpInst::ICMP_EQ ?&& RHSCst->isZero() && > - ? ? ? LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue())) { > - ? ?ICmpInst *I = LHSCC == ICmpInst::ICMP_EQ ? LHS : RHS; > - ? ?Value *X; ConstantInt *C; > - ? ?if (I->hasOneUse() && > - ? ? ? ?match(I->getOperand(0), m_OneUse(m_And(m_Value(X), m_ConstantInt(C))))){ > - ? ? ?APInt New = C->getValue() | APInt::getSignBit(C->getBitWidth()); > - ? ? ?return Builder->CreateICmpEQ(Builder->CreateAnd(X, Builder->getInt(New)), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? I->getOperand(1)); > - ? ?} > - ?} > - > ? // From here on, we only handle: > ? // ? ?(icmp1 A, C1) & (icmp2 A, C2) --> something simpler. > ? if (Val != Val2) return 0; > @@ -1443,33 +1471,6 @@ > ? ? ? Value *NewOr = Builder->CreateOr(Val, Val2); > ? ? ? return Builder->CreateICmp(LHSCC, NewOr, LHSCst); > ? ? } > - > - ? ?// (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) > - ? ?if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { > - ? ? ?Value *NewOr = Builder->CreateOr(Val, Val2); > - ? ? ?return Builder->CreateICmp(LHSCC, NewOr, LHSCst); > - ? ?} > - > - ? ?// (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) > - ? ?if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { > - ? ? ?Value *NewAnd = Builder->CreateAnd(Val, Val2); > - ? ? ?return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); > - ? ?} > - ?} > - > - ?// (X & C) != 0 | X < 0 ?-> ?(X & (C | SignBit)) != 0 > - ?if ((LHSCC == ICmpInst::ICMP_NE ?&& LHSCst->isZero() && > - ? ? ? RHSCC == ICmpInst::ICMP_SLT && RHSCst->isZero()) || > - ? ? ?(RHSCC == ICmpInst::ICMP_NE ?&& RHSCst->isZero() && > - ? ? ? LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero())) { > - ? ?ICmpInst *I = LHSCC == ICmpInst::ICMP_NE ? LHS : RHS; > - ? ?Value *X; ConstantInt *C; > - ? ?if (I->hasOneUse() && > - ? ? ? ?match(I->getOperand(0), m_OneUse(m_And(m_Value(X), m_ConstantInt(C))))){ > - ? ? ?APInt New = C->getValue() | APInt::getSignBit(C->getBitWidth()); > - ? ? ?return Builder->CreateICmpNE(Builder->CreateAnd(X, Builder->getInt(New)), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? I->getOperand(1)); > - ? ?} > ? } > > ? // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) > > Modified: llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll?rev=147777&r1=147776&r2=147777&view=diff > ============================================================================== > --- llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll (original) > +++ llvm/trunk/test/Transforms/InstCombine/sign-test-and-or.ll Mon Jan ?9 11:23:27 2012 > @@ -157,3 +157,23 @@ > ?if.end: > ? ret void > ?} > + > +define void @test9(i32 %a) nounwind { > + ?%1 = and i32 %a, 1073741824 > + ?%2 = icmp ne i32 %1, 0 > + ?%3 = icmp sgt i32 %a, -1 > + ?%or.cond = and i1 %2, %3 > + ?br i1 %or.cond, label %if.then, label %if.end > + > +; CHECK: @test9 > +; CHECK-NEXT: %1 = and i32 %a, -1073741824 > +; CHECK-NEXT: %2 = icmp eq i32 %1, 1073741824 > +; CHECK-NEXT: br i1 %2, label %if.then, label %if.end > + > +if.then: > + ?tail call void @foo() nounwind > + ?ret void > + > +if.end: > + ?ret void > +} > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From kcc at google.com Mon Jan 9 17:11:26 2012 From: kcc at google.com (Kostya Serebryany) Date: Mon, 09 Jan 2012 23:11:26 -0000 Subject: [llvm-commits] [compiler-rt] r147811 - in /compiler-rt/trunk/lib/asan: asan_internal.h asan_linux.cc asan_mac.cc asan_posix.cc asan_printf.cc asan_rtl.cc Message-ID: <20120109231126.7E2FF1BE003@llvm.org> Author: kcc Date: Mon Jan 9 17:11:26 2012 New Revision: 147811 URL: http://llvm.org/viewvc/llvm-project?rev=147811&view=rev Log: [asan] don't include unistd.h in the headers Modified: compiler-rt/trunk/lib/asan/asan_internal.h compiler-rt/trunk/lib/asan/asan_linux.cc compiler-rt/trunk/lib/asan/asan_mac.cc compiler-rt/trunk/lib/asan/asan_posix.cc compiler-rt/trunk/lib/asan/asan_printf.cc compiler-rt/trunk/lib/asan/asan_rtl.cc Modified: compiler-rt/trunk/lib/asan/asan_internal.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_internal.h?rev=147811&r1=147810&r2=147811&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_internal.h (original) +++ compiler-rt/trunk/lib/asan/asan_internal.h Mon Jan 9 17:11:26 2012 @@ -20,7 +20,6 @@ #include // for __WORDSIZE #include // for size_t -#include // for _exit // If __WORDSIZE was undefined by the platform, define it in terms of the // compiler built-in __LP64__. @@ -102,19 +101,20 @@ void AsanDisableCoreDumper(); void GetPcSpBp(void *context, uintptr_t *pc, uintptr_t *sp, uintptr_t *bp); -ssize_t AsanRead(int fd, void *buf, size_t count); -ssize_t AsanWrite(int fd, const void *buf, size_t count); +size_t AsanRead(int fd, void *buf, size_t count); +size_t AsanWrite(int fd, const void *buf, size_t count); int AsanClose(int fd); bool AsanInterceptsSignal(int signum); void InstallSignalHandlers(); +int GetPid(); // Opens the file 'file_name" and reads up to 'max_len' bytes. // The resulting buffer is mmaped and stored in '*buff'. // The size of the mmaped region is stored in '*buff_size', -// Returns the number of read bytes or -1 if file can not be opened. -ssize_t ReadFileToBuffer(const char *file_name, char **buff, - size_t *buff_size, size_t max_len); +// Returns the number of read bytes or 0 if file can not be opened. +size_t ReadFileToBuffer(const char *file_name, char **buff, + size_t *buff_size, size_t max_len); // asan_printf.cc void RawWrite(const char *buffer); @@ -162,9 +162,7 @@ enum LinkerInitialized { LINKER_INITIALIZED = 0 }; -#ifndef ASAN_DIE -#define ASAN_DIE _exit(FLAG_exitcode) -#endif // ASAN_DIE +void AsanDie(); #define CHECK(cond) do { if (!(cond)) { \ CheckFailed(#cond, __FILE__, __LINE__); \ @@ -173,7 +171,7 @@ #define RAW_CHECK_MSG(expr, msg) do { \ if (!(expr)) { \ RawWrite(msg); \ - ASAN_DIE; \ + AsanDie(); \ } \ } while (0) Modified: compiler-rt/trunk/lib/asan/asan_linux.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_linux.cc?rev=147811&r1=147810&r2=147811&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_linux.cc (original) +++ compiler-rt/trunk/lib/asan/asan_linux.cc Mon Jan 9 17:11:26 2012 @@ -114,20 +114,20 @@ int res = syscall(__NR_munmap, addr, size); if (res != 0) { Report("Failed to unmap\n"); - ASAN_DIE; + AsanDie(); } } -ssize_t AsanWrite(int fd, const void *buf, size_t count) { - return (ssize_t)syscall(__NR_write, fd, buf, count); +size_t AsanWrite(int fd, const void *buf, size_t count) { + return (size_t)syscall(__NR_write, fd, buf, count); } int AsanOpenReadonly(const char* filename) { return open(filename, O_RDONLY); } -ssize_t AsanRead(int fd, void *buf, size_t count) { - return (ssize_t)syscall(__NR_read, fd, buf, count); +size_t AsanRead(int fd, void *buf, size_t count) { + return (size_t)syscall(__NR_read, fd, buf, count); } int AsanClose(int fd) { @@ -202,8 +202,8 @@ if (count == 0) { // The first item (the main executable) does not have a so name, // but we can just read it from /proc/self/exe. - ssize_t path_len = readlink("/proc/self/exe", - data->filename, data->filename_size - 1); + size_t path_len = readlink("/proc/self/exe", + data->filename, data->filename_size - 1); data->filename[path_len] = 0; } else { CHECK(info->dlpi_name); Modified: compiler-rt/trunk/lib/asan/asan_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.cc?rev=147811&r1=147810&r2=147811&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_mac.cc Mon Jan 9 17:11:26 2012 @@ -50,7 +50,6 @@ # endif // __WORDSIZE } - // No-op. Mac does not support static linkage anyway. void *AsanDoesNotSupportStaticLinkage() { return NULL; @@ -65,7 +64,7 @@ return mmap(addr, length, prot, flags, fd, offset); } -ssize_t AsanWrite(int fd, const void *buf, size_t count) { +size_t AsanWrite(int fd, const void *buf, size_t count) { return write(fd, buf, count); } @@ -106,7 +105,7 @@ int res = munmap(addr, size); if (res != 0) { Report("Failed to unmap\n"); - ASAN_DIE; + AsanDie(); } } @@ -114,7 +113,7 @@ return open(filename, O_RDONLY); } -ssize_t AsanRead(int fd, void *buf, size_t count) { +size_t AsanRead(int fd, void *buf, size_t count) { return read(fd, buf, count); } Modified: compiler-rt/trunk/lib/asan/asan_posix.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_posix.cc?rev=147811&r1=147810&r2=147811&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_posix.cc (original) +++ compiler-rt/trunk/lib/asan/asan_posix.cc Mon Jan 9 17:11:26 2012 @@ -21,6 +21,7 @@ #include #include #include +#include namespace __asan { @@ -38,7 +39,7 @@ static void ASAN_OnSIGSEGV(int, siginfo_t *siginfo, void *context) { uintptr_t addr = (uintptr_t)siginfo->si_addr; // Write the first message using the bullet-proof write. - if (13 != AsanWrite(2, "ASAN:SIGSEGV\n", 13)) ASAN_DIE; + if (13 != AsanWrite(2, "ASAN:SIGSEGV\n", 13)) AsanDie(); uintptr_t pc, sp, bp; GetPcSpBp(context, &pc, &sp, &bp); Report("ERROR: AddressSanitizer crashed on unknown address %p" @@ -63,6 +64,14 @@ setrlimit(RLIMIT_CORE, &nocore); } +void AsanDie() { + _exit(FLAG_exitcode); +} + +int GetPid() { + return getpid(); +} + } // namespace __asan #endif // __linux__ || __APPLE_ Modified: compiler-rt/trunk/lib/asan/asan_printf.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_printf.cc?rev=147811&r1=147810&r2=147811&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_printf.cc (original) +++ compiler-rt/trunk/lib/asan/asan_printf.cc Mon Jan 9 17:11:26 2012 @@ -24,10 +24,10 @@ void RawWrite(const char *buffer) { static const char *kRawWriteError = "RawWrite can't output requested buffer!"; - ssize_t length = (ssize_t)internal_strlen(buffer); + size_t length = (size_t)internal_strlen(buffer); if (length != AsanWrite(2, buffer, length)) { AsanWrite(2, kRawWriteError, internal_strlen(kRawWriteError)); - ASAN_DIE; + AsanDie(); } } @@ -168,7 +168,7 @@ void Report(const char *format, ...) { const int kLen = 1024 * 4; char buffer[kLen]; - int needed_length = SNPrintf(buffer, kLen, "==%d== ", getpid()); + int needed_length = SNPrintf(buffer, kLen, "==%d== ", GetPid()); RAW_CHECK_MSG(needed_length < kLen, "Buffer in Report is too short!\n"); va_list args; va_start(args, format); Modified: compiler-rt/trunk/lib/asan/asan_rtl.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_rtl.cc?rev=147811&r1=147810&r2=147811&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_rtl.cc (original) +++ compiler-rt/trunk/lib/asan/asan_rtl.cc Mon Jan 9 17:11:26 2012 @@ -57,7 +57,7 @@ // -------------------------- Misc ---------------- {{{1 void ShowStatsAndAbort() { __asan_print_accumulated_stats(); - ASAN_DIE; + AsanDie(); } static void PrintBytes(const char *before, uintptr_t *a) { @@ -70,10 +70,10 @@ Printf("\n"); } -ssize_t ReadFileToBuffer(const char *file_name, char **buff, +size_t ReadFileToBuffer(const char *file_name, char **buff, size_t *buff_size, size_t max_len) { const size_t kMinFileLen = kPageSize; - ssize_t read_len = -1; + size_t read_len = 0; *buff = 0; *buff_size = 0; // The files we usually open are not seekable, so try different buffer sizes. @@ -386,7 +386,7 @@ PrintBytes(" ", (uintptr_t*)(aligned_shadow+2*kWordSize)); PrintBytes(" ", (uintptr_t*)(aligned_shadow+3*kWordSize)); PrintBytes(" ", (uintptr_t*)(aligned_shadow+4*kWordSize)); - ASAN_DIE; + AsanDie(); } void __asan_init() { From rafael.espindola at gmail.com Mon Jan 9 18:40:39 2012 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Tue, 10 Jan 2012 00:40:39 -0000 Subject: [llvm-commits] [llvm] r147820 - in /llvm/trunk: include/llvm/MC/MCStreamer.h lib/CodeGen/LLVMTargetMachine.cpp lib/MC/CMakeLists.txt lib/MC/MCLoggingStreamer.cpp lib/Target/PTX/PTXTargetMachine.cpp tools/llvm-mc/llvm-mc.cpp Message-ID: <20120110004039.705E91BE003@llvm.org> Author: rafael Date: Mon Jan 9 18:40:39 2012 New Revision: 147820 URL: http://llvm.org/viewvc/llvm-project?rev=147820&view=rev Log: Remove the logging streamer. Removed: llvm/trunk/lib/MC/MCLoggingStreamer.cpp Modified: llvm/trunk/include/llvm/MC/MCStreamer.h llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp llvm/trunk/lib/MC/CMakeLists.txt llvm/trunk/lib/Target/PTX/PTXTargetMachine.cpp llvm/trunk/tools/llvm-mc/llvm-mc.cpp Modified: llvm/trunk/include/llvm/MC/MCStreamer.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCStreamer.h?rev=147820&r1=147819&r2=147820&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCStreamer.h (original) +++ llvm/trunk/include/llvm/MC/MCStreamer.h Mon Jan 9 18:40:39 2012 @@ -657,12 +657,6 @@ raw_ostream &OS, MCCodeEmitter *CE, bool RelaxAll, bool NoExecStack); - /// createLoggingStreamer - Create a machine code streamer which just logs the - /// API calls and then dispatches to another streamer. - /// - /// The new streamer takes ownership of the \arg Child. - MCStreamer *createLoggingStreamer(MCStreamer *Child, raw_ostream &OS); - /// createPureStreamer - Create a machine code streamer which will generate /// "pure" MC object files, for use with MC-JIT and testing tools. /// Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=147820&r1=147819&r2=147820&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original) +++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Mon Jan 9 18:40:39 2012 @@ -82,8 +82,6 @@ cl::desc("Show encoding in .s output")); static cl::opt ShowMCInst("show-mc-inst", cl::Hidden, cl::desc("Show instruction structure in .s output")); -static cl::opt EnableMCLogging("enable-mc-api-logging", cl::Hidden, - cl::desc("Enable MC API logging")); static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); @@ -191,9 +189,6 @@ break; } - if (EnableMCLogging) - AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); if (Printer == 0) Modified: llvm/trunk/lib/MC/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/CMakeLists.txt?rev=147820&r1=147819&r2=147820&view=diff ============================================================================== --- llvm/trunk/lib/MC/CMakeLists.txt (original) +++ llvm/trunk/lib/MC/CMakeLists.txt Mon Jan 9 18:40:39 2012 @@ -20,7 +20,6 @@ MCInstPrinter.cpp MCInstrAnalysis.cpp MCLabel.cpp - MCLoggingStreamer.cpp MCMachOStreamer.cpp MCMachObjectTargetWriter.cpp MCModule.cpp Removed: llvm/trunk/lib/MC/MCLoggingStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCLoggingStreamer.cpp?rev=147819&view=auto ============================================================================== --- llvm/trunk/lib/MC/MCLoggingStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCLoggingStreamer.cpp (removed) @@ -1,257 +0,0 @@ -//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCStreamer.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - -class MCLoggingStreamer : public MCStreamer { - llvm::OwningPtr Child; - - raw_ostream &OS; - -public: - MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS) - : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {} - - void LogCall(const char *Function) { - OS << Function << "\n"; - } - - void LogCall(const char *Function, const Twine &Message) { - OS << Function << ": " << Message << "\n"; - } - - virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); } - - virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); } - - virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); } - - virtual void AddComment(const Twine &T) { - LogCall("AddComment", T); - return Child->AddComment(T); - } - - virtual void AddBlankLine() { - LogCall("AddBlankLine"); - return Child->AddBlankLine(); - } - - virtual void ChangeSection(const MCSection *Section) { - LogCall("ChangeSection"); - return Child->ChangeSection(Section); - } - - virtual void InitSections() { - LogCall("InitSections"); - return Child->InitSections(); - } - - virtual void EmitLabel(MCSymbol *Symbol) { - LogCall("EmitLabel"); - return Child->EmitLabel(Symbol); - } - - virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { - LogCall("EmitAssemblerFlag"); - return Child->EmitAssemblerFlag(Flag); - } - - virtual void EmitThumbFunc(MCSymbol *Func) { - LogCall("EmitThumbFunc"); - return Child->EmitThumbFunc(Func); - } - - virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - LogCall("EmitAssignment"); - return Child->EmitAssignment(Symbol, Value); - } - - virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { - LogCall("EmitWeakReference"); - return Child->EmitWeakReference(Alias, Symbol); - } - - virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, - const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize) { - LogCall("EmitDwarfAdvanceLineAddr"); - return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, - PointerSize); - } - - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { - LogCall("EmitSymbolAttribute"); - return Child->EmitSymbolAttribute(Symbol, Attribute); - } - - virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { - LogCall("EmitSymbolDesc"); - return Child->EmitSymbolDesc(Symbol, DescValue); - } - - virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { - LogCall("BeginCOFFSymbolDef"); - return Child->BeginCOFFSymbolDef(Symbol); - } - - virtual void EmitCOFFSymbolStorageClass(int StorageClass) { - LogCall("EmitCOFFSymbolStorageClass"); - return Child->EmitCOFFSymbolStorageClass(StorageClass); - } - - virtual void EmitCOFFSymbolType(int Type) { - LogCall("EmitCOFFSymbolType"); - return Child->EmitCOFFSymbolType(Type); - } - - virtual void EndCOFFSymbolDef() { - LogCall("EndCOFFSymbolDef"); - return Child->EndCOFFSymbolDef(); - } - - virtual void EmitCOFFSecRel32(MCSymbol const *Symbol) { - LogCall("EmitCOFFSecRel32"); - return Child->EmitCOFFSecRel32(Symbol); - } - - virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - LogCall("EmitELFSize"); - return Child->EmitELFSize(Symbol, Value); - } - - virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) { - LogCall("EmitCommonSymbol"); - return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment); - } - - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) { - LogCall("EmitLocalCommonSymbol"); - return Child->EmitLocalCommonSymbol(Symbol, Size, ByteAlignment); - } - - virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) { - LogCall("EmitZerofill"); - return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment); - } - - virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment = 0) { - LogCall("EmitTBSSSymbol"); - return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment); - } - - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { - LogCall("EmitBytes"); - return Child->EmitBytes(Data, AddrSpace); - } - - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace){ - LogCall("EmitValue"); - return Child->EmitValueImpl(Value, Size, AddrSpace); - } - - virtual void EmitULEB128Value(const MCExpr *Value) { - LogCall("EmitULEB128Value"); - return Child->EmitULEB128Value(Value); - } - - virtual void EmitSLEB128Value(const MCExpr *Value) { - LogCall("EmitSLEB128Value"); - return Child->EmitSLEB128Value(Value); - } - - virtual void EmitGPRel32Value(const MCExpr *Value) { - LogCall("EmitGPRel32Value"); - return Child->EmitGPRel32Value(Value); - } - - virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace) { - LogCall("EmitFill"); - return Child->EmitFill(NumBytes, FillValue, AddrSpace); - } - - virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0) { - LogCall("EmitValueToAlignment"); - return Child->EmitValueToAlignment(ByteAlignment, Value, - ValueSize, MaxBytesToEmit); - } - - virtual void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0) { - LogCall("EmitCodeAlignment"); - return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit); - } - - virtual void EmitValueToOffset(const MCExpr *Offset, - unsigned char Value = 0) { - LogCall("EmitValueToOffset"); - return Child->EmitValueToOffset(Offset, Value); - } - - virtual void EmitFileDirective(StringRef Filename) { - LogCall("EmitFileDirective", "FileName:" + Filename); - return Child->EmitFileDirective(Filename); - } - - virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename) { - LogCall("EmitDwarfFileDirective", - "FileNo:" + Twine(FileNo) + " Directory:" + Directory + - " Filename:" + Filename); - return Child->EmitDwarfFileDirective(FileNo, Directory, Filename); - } - - virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, - unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator, - StringRef FileName) { - LogCall("EmitDwarfLocDirective", - "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) + - " Column:" + Twine(Column) + " Flags:" + Twine(Flags) + - " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator)); - return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags, - Isa, Discriminator, FileName); - } - - virtual void EmitInstruction(const MCInst &Inst) { - LogCall("EmitInstruction"); - return Child->EmitInstruction(Inst); - } - - virtual void EmitRawText(StringRef String) { - LogCall("EmitRawText", "\"" + String + "\""); - return Child->EmitRawText(String); - } - - virtual void FinishImpl() { - LogCall("FinishImpl"); - return Child->FinishImpl(); - } - -}; - -} // end anonymous namespace. - -MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) { - return new MCLoggingStreamer(Child, OS); -} Modified: llvm/trunk/lib/Target/PTX/PTXTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXTargetMachine.cpp?rev=147820&r1=147819&r2=147820&view=diff ============================================================================== --- llvm/trunk/lib/Target/PTX/PTXTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/PTX/PTXTargetMachine.cpp Mon Jan 9 18:40:39 2012 @@ -166,9 +166,6 @@ break; } - // MC Logging - //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); if (Printer == 0) Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/llvm-mc.cpp?rev=147820&r1=147819&r2=147820&view=diff ============================================================================== --- llvm/trunk/tools/llvm-mc/llvm-mc.cpp (original) +++ llvm/trunk/tools/llvm-mc/llvm-mc.cpp Mon Jan 9 18:40:39 2012 @@ -70,9 +70,6 @@ static cl::opt NoExecStack("mc-no-exec-stack", cl::desc("File doesn't need an exec stack")); -static cl::opt -EnableLogging("enable-api-logging", cl::desc("Enable MC API logging")); - enum OutputFileType { OFT_Null, OFT_AssemblyFile, @@ -443,10 +440,6 @@ NoExecStack)); } - if (EnableLogging) { - Str.reset(createLoggingStreamer(Str.take(), errs())); - } - OwningPtr Parser(createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); OwningPtr TAP(TheTarget->createMCAsmParser(*STI, *Parser)); From bigcheesegs at gmail.com Mon Jan 9 19:04:52 2012 From: bigcheesegs at gmail.com (Michael Spencer) Date: Mon, 9 Jan 2012 17:04:52 -0800 Subject: [llvm-commits] [PATCH] ELFObjectFile with dynamic loading support In-Reply-To: <9BBE4537D1BAAB479E9E8F9D4234619D07D484@HASMSX103.ger.corp.intel.com> References: <9BBE4537D1BAAB479E9E8F9D4234619D07D484@HASMSX103.ger.corp.intel.com> Message-ID: On Mon, Jan 9, 2012 at 7:15 AM, Bendersky, Eli wrote: > Hello, > > > > Following the email I sent to LLVMdev earlier today > (http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046671.html), > please find attached the first patch in the MCJIT/ELF series. It presents a > subclass of ELFObjectFile, named DyldELFObject, which supports basic dynamic > loading. This class is used by MCJIT/ELF to load an ELF image generated by > MC into memory and executing it. > > > > Please note that there are no stand-alone tests for this class yet. It is > being tested extensively in the ExecutionEngine tests run on MCJIT/ELF, > which will be part of the next patch in the series, once this one is > accepted and committed. > > > > Thanks in advance, > > Eli I have a couple issues with this patch. They are inline below. > + DyldELFObject(MemoryBuffer *Object, std::vector *MemoryMap, > + error_code &ec); > + > + static inline bool classof(const Binary *v) { > + return v->getType() == Binary::isELF; This is incorrect. An ELFObjectFile is not a DyldELFObject. > + } > + static inline bool classof( > + const ELFObjectFile *v) { return true; } > + static inline bool classof(const DyldELFObject *v) { return true; } > + }; > + > + // Mark the image as a dynamic shared library > + const int32_t temp = ELF::ET_DYN; > + memcpy(&(Header->e_type), &temp, sizeof(Elf_Half)); This bypasses the purpose of packed_endian_specific_integral. Two things need to happen to fix this. operator =(IntegralT) needs to be overloaded for PESI. And DyldELFObject needs a way to get access to non-const versions of these structs. This also occurs other places. > + > + rebaseObject(MemoryMap); > +} I'm also not sure about the main interfaces. Although I can't really think of anything better. I would like Daniel Dunbar to take a look too. - Michael Spencer From fjahanian at apple.com Mon Jan 9 19:01:46 2012 From: fjahanian at apple.com (Fariborz Jahanian) Date: Tue, 10 Jan 2012 01:01:46 -0000 Subject: [llvm-commits] [test-suite] r147822 - /test-suite/trunk/SingleSource/UnitTests/ObjC++/property-reference-object.mm Message-ID: <20120110010146.B9C701BE003@llvm.org> Author: fjahanian Date: Mon Jan 9 19:01:46 2012 New Revision: 147822 URL: http://llvm.org/viewvc/llvm-project?rev=147822&view=rev Log: Fix the test case for // rdar://6137845 Modified: test-suite/trunk/SingleSource/UnitTests/ObjC++/property-reference-object.mm Modified: test-suite/trunk/SingleSource/UnitTests/ObjC++/property-reference-object.mm URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/UnitTests/ObjC%2B%2B/property-reference-object.mm?rev=147822&r1=147821&r2=147822&view=diff ============================================================================== --- test-suite/trunk/SingleSource/UnitTests/ObjC++/property-reference-object.mm (original) +++ test-suite/trunk/SingleSource/UnitTests/ObjC++/property-reference-object.mm Mon Jan 9 19:01:46 2012 @@ -48,7 +48,7 @@ } @property (assign, readwrite, nonatomic) const Foo& cppObjectNonAtomic; - at property (assign, readwrite) const Foo& cppObjectAtomic; + at property (nonatomic, assign, readwrite) const Foo& cppObjectAtomic; @property (assign, readwrite, nonatomic) const Foo& cppObjectDynamic; @end From jcarter at mips.com Mon Jan 9 19:12:45 2012 From: jcarter at mips.com (Carter, Jack) Date: Tue, 10 Jan 2012 01:12:45 +0000 Subject: [llvm-commits] [Mips] Direct object big endian review and submittal request In-Reply-To: <4F07CD3B.5030701@gmail.com> References: <86AC779C188FE74F88F6494478B46332E9156D@exchdb03.mips.com> <86AC779C188FE74F88F6494478B46332E91C5F@exchdb03.mips.com>, <4F07CD3B.5030701@gmail.com> Message-ID: <86AC779C188FE74F88F6494478B46332E92DE7@exchdb03.mips.com> Rafael, I'll resubmit with the requested test changes and the subclass suggestion. The byte swap issue is due to the size of the object. In this case the object is a 4 byte word that has a 16 bit fixup area. The whole word needs to be re-sexed rather than just the fixup area. Jack ________________________________________ From: Rafael ?vila de Esp?ndola [rafael.espindola at gmail.com] Sent: Friday, January 06, 2012 8:42 PM To: Carter, Jack Cc: bruno.cardoso at gmail.com; Hatanaka, Akira; Kotler, Reed Subject: Re: [Mips] Direct object big endian review and submittal request On 05/01/12 09:43 PM, Carter, Jack wrote: > Please use this attached patch instead of the previous one. > > In my initial patch I had 2 parameters reversed. Evidently I ran the > wrong llc on my final test. I have several source trees and got mixed up. I looked into it a bit more. The main comment is that it needs more tests. Make sure there is at least one relocation, so that the bits in it are tested. Can you make the test use llvm-mc instead of llc? That is a lot better for testing the MC infrastructure. It would be nice if you organized the code a bit more like the other architectures. Create subclasses of MipsAsmBackend and implement createObjectWriter in them. You probably just want NumBytes = (getFixupKindInfo(Kind).TargetSize+7)/8; No? I am not sure I understand why the byte swapping when you are reading an writing one byte at a time. Again, add some tests that check it. For reference, I played a bit with the code. The result is attached. > Thanks, > > Jack > Cheers, Rafael From stoklund at 2pi.dk Mon Jan 9 19:34:59 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 10 Jan 2012 01:34:59 -0000 Subject: [llvm-commits] [llvm] r147825 - /llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp Message-ID: <20120110013500.097761BE003@llvm.org> Author: stoklund Date: Mon Jan 9 19:34:59 2012 New Revision: 147825 URL: http://llvm.org/viewvc/llvm-project?rev=147825&view=rev Log: Accurately model hardware alignment rounding. On Thumb, the displacement computation hardware uses the address of the current instruction rouned down to a multiple of 4. Include this rounding in the UserOffset we compute for each instruction. When inline asm is present, the instruction alignment may not be known. Constrain the maximum displacement instead in that case. This makes it possible for CreateNewWater() and OffsetIsInRange() to agree about the valid displacements. When they disagree, infinite looping happens. As always, test cases for this stuff are insane. Modified: llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp Modified: llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp?rev=147825&r1=147824&r2=147825&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp Mon Jan 9 19:34:59 2012 @@ -195,14 +195,23 @@ MachineInstr *MI; MachineInstr *CPEMI; MachineBasicBlock *HighWaterMark; + private: unsigned MaxDisp; + public: bool NegOk; bool IsSoImm; + bool KnownAlignment; CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) - : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) { + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), + KnownAlignment(false) { HighWaterMark = CPEMI->getParent(); } + /// getMaxDisp - Returns the maximum displacement supported by MI. + /// Correct for unknown alignment. + unsigned getMaxDisp() const { + return KnownAlignment ? MaxDisp : MaxDisp - 2; + } }; /// CPUsers - Keep track of all of the machine instructions that use various @@ -309,6 +318,7 @@ void ComputeBlockSize(MachineBasicBlock *MBB); unsigned GetOffsetOf(MachineInstr *MI) const; + unsigned GetUserOffset(CPUser&) const; void dumpBBs(); void verify(); @@ -317,7 +327,7 @@ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, const CPUser &U) { return OffsetIsInRange(UserOffset, TrialOffset, - U.MaxDisp, U.NegOk, U.IsSoImm); + U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; char ARMConstantIslands::ID = 0; @@ -336,11 +346,11 @@ } for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; - unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8); + unsigned UserOffset = GetUserOffset(U); unsigned CPEOffset = GetOffsetOf(U.CPEMI); unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset : UserOffset - CPEOffset; - assert(Disp <= U.MaxDisp || "Constant pool entry out of range!"); + assert(Disp <= U.getMaxDisp() || "Constant pool entry out of range!"); } #endif } @@ -546,7 +556,8 @@ CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); ++NumCPEs; - DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n"); + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " + << Size << ", align = " << Align <<'\n'); } DEBUG(BB->dump()); } @@ -924,19 +935,39 @@ return NewBB; } +/// GetUserOffset - Compute the offset of U.MI as seen by the hardware +/// displacement computation. Update U.KnownAlignment to match its current +/// basic block location. +unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { + unsigned UserOffset = GetOffsetOf(U.MI); + const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; + unsigned KnownBits = BBI.internalKnownBits(); + + // The value read from PC is offset from the actual instruction address. + UserOffset += (isThumb ? 4 : 8); + + // Because of inline assembly, we may not know the alignment (mod 4) of U.MI. + // Make sure U.getMaxDisp() returns a constrained range. + U.KnownAlignment = (KnownBits >= 2); + + // On Thumb, offsets==2 mod 4 are rounded down by the hardware for + // purposes of the displacement computation; compensate for that here. + // For unknown alignments, getMaxDisp() constrains the range instead. + if (isThumb && U.KnownAlignment) + UserOffset &= ~3u; + + return UserOffset; +} + /// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). +/// UserOffset is computed by GetUserOffset above to include PC adjustments. If +/// the mod 4 alignment of UserOffset is not known, the uncertainty must be +/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { - // On Thumb offsets==2 mod 4 are rounded down by the hardware for - // purposes of the displacement computation; compensate for that here. - // Effectively, the valid range of displacements is 2 bytes smaller for such - // references. - if (isThumb && UserOffset%4 !=0) - UserOffset -= 2; - if (UserOffset <= TrialOffset) { // User before the Trial. if (TrialOffset - UserOffset <= MaxDisp) @@ -1086,7 +1117,7 @@ MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { + if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, true)) { DEBUG(dbgs() << "In range\n"); return 1; } @@ -1101,7 +1132,8 @@ // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { + if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), + U.NegOk)) { DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement @@ -1202,8 +1234,7 @@ // If the block does not end in an unconditional branch already, and if the // end of the block is within range, make new water there. (The addition // below is for the unconditional branch we will be adding: 4 bytes on ARM + - // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for - // inside OffsetIsInRange. + // Thumb2, 2 on Thumb1. if (BBHasFallthrough(UserMBB)) { // Size of branch to insert. unsigned Delta = isThumb1 ? 2 : 4; @@ -1256,7 +1287,7 @@ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); unsigned KnownBits = UserBBI.internalKnownBits(); unsigned UPad = UnknownPadding(LogAlign, KnownBits); - unsigned BaseInsertOffset = UserOffset + U.MaxDisp; + unsigned BaseInsertOffset = UserOffset + U.getMaxDisp(); DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1337,9 +1368,8 @@ MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); - // Compute this only once, it's expensive. The 4 or 8 is the value the - // hardware keeps in the PC. - unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); + // Compute this only once, it's expensive. + unsigned UserOffset = GetUserOffset(U); // See if the current entry is within range, or there is a clone of it // in range. @@ -1677,8 +1707,13 @@ if (!NewOpc) continue; - unsigned UserOffset = GetOffsetOf(U.MI) + 4; + unsigned UserOffset = GetUserOffset(U); unsigned MaxOffs = ((1 << Bits) - 1) * Scale; + + // Be conservative with inline asm. + if (!U.KnownAlignment) + MaxOffs -= 2; + // FIXME: Check if offset is multiple of scale if scale is not 4. if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { U.MI->setDesc(TII->get(NewOpc)); From atrick at apple.com Mon Jan 9 19:45:09 2012 From: atrick at apple.com (Andrew Trick) Date: Tue, 10 Jan 2012 01:45:09 -0000 Subject: [llvm-commits] [llvm] r147826 - in /llvm/trunk: include/llvm/Analysis/ScalarEvolutionExpander.h lib/Analysis/ScalarEvolutionExpander.cpp lib/Transforms/Scalar/LoopStrengthReduce.cpp test/Transforms/LoopStrengthReduce/ARM/ test/Transforms/LoopStrengthReduce/ARM/dg.exp test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll Message-ID: <20120110014509.6BFF41BE003@llvm.org> Author: atrick Date: Mon Jan 9 19:45:08 2012 New Revision: 147826 URL: http://llvm.org/viewvc/llvm-project?rev=147826&view=rev Log: Enable LSR IV Chains with sufficient heuristics. These heuristics are sufficient for enabling IV chains by default. Performance analysis has been done for i386, x86_64, and thumbv7. The optimization is rarely important, but can significantly speed up certain cases by eliminating spill code within the loop. Unrolled loops are prime candidates for IV chains. In many cases, the final code could still be improved with more target specific optimization following LSR. The goal of this feature is for LSR to make the best choice of induction variables. Instruction selection may not completely take advantage of this feature yet. As a result, there could be cases of slight code size increase. Code size can be worse on x86 because it doesn't support postincrement addressing. In fact, when chains are formed, you may see redundant address plus stride addition in the addressing mode. GenerateIVChains tries to compensate for the common cases. On ARM, code size increase can be mitigated by using postincrement addressing, but downstream codegen currently misses some opportunities. Added: llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/dg.exp llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h?rev=147826&r1=147825&r2=147826&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h (original) +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h Mon Jan 9 19:45:08 2012 @@ -60,6 +60,9 @@ /// insert the IV increment at this position. Instruction *IVIncInsertPos; + /// Phis that complete an IV chain. Reuse + std::set > ChainedPhis; + /// CanonicalMode - When true, expressions are expanded in "canonical" /// form. In particular, addrecs are expanded as arithmetic based on /// a canonical induction variable. When false, expression are expanded @@ -102,6 +105,7 @@ InsertedExpressions.clear(); InsertedValues.clear(); InsertedPostIncValues.clear(); + ChainedPhis.clear(); } /// getOrInsertCanonicalInductionVariable - This method returns the @@ -164,6 +168,9 @@ void clearInsertPoint() { Builder.ClearInsertionPoint(); } + + void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); } + private: LLVMContext &getContext() const { return SE.getContext(); } Modified: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp?rev=147826&r1=147825&r2=147826&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp Mon Jan 9 19:45:08 2012 @@ -874,6 +874,9 @@ /// expandAddtoGEP. bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L) { + if (ChainedPhis.count(PN)) + return true; + switch (IncV->getOpcode()) { // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: @@ -1638,8 +1641,8 @@ const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType()); if (OrigInc != IsomorphicInc - && TruncExpr == SE.getSCEV(IsomorphicInc) && - hoistStep(OrigInc, IsomorphicInc, DT)) { + && TruncExpr == SE.getSCEV(IsomorphicInc) + && hoistStep(OrigInc, IsomorphicInc, DT)) { DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " << *IsomorphicInc << '\n'); Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=147826&r1=147825&r2=147826&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Jan 9 19:45:08 2012 @@ -658,6 +658,77 @@ return false; } +/// Check if expanding this expression is likely to incur significant cost. This +/// is tricky because SCEV doesn't track which expressions are actually computed +/// by the current IR. +/// +/// We currently allow expansion of IV increments that involve adds, +/// multiplication by constants, and AddRecs from existing phis. +/// +/// TODO: Allow UDivExpr if we can find an existing IV increment that is an +/// obvious multiple of the UDivExpr. +static bool isHighCostExpansion(const SCEV *S, + SmallPtrSet &Processed, + ScalarEvolution &SE) { + // Zero/One operand expressions + switch (S->getSCEVType()) { + case scUnknown: + case scConstant: + return false; + case scTruncate: + return isHighCostExpansion(cast(S)->getOperand(), + Processed, SE); + case scZeroExtend: + return isHighCostExpansion(cast(S)->getOperand(), + Processed, SE); + case scSignExtend: + return isHighCostExpansion(cast(S)->getOperand(), + Processed, SE); + } + + if (!Processed.insert(S)) + return false; + + if (const SCEVAddExpr *Add = dyn_cast(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + if (isHighCostExpansion(*I, Processed, SE)) + return true; + } + return false; + } + + if (const SCEVMulExpr *Mul = dyn_cast(S)) { + if (Mul->getNumOperands() == 2) { + // Multiplication by a constant is ok + if (isa(Mul->getOperand(0))) + return isHighCostExpansion(Mul->getOperand(1), Processed, SE); + + // If we have the value of one operand, check if an existing + // multiplication already generates this expression. + if (const SCEVUnknown *U = dyn_cast(Mul->getOperand(1))) { + Value *UVal = U->getValue(); + for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end(); + UI != UE; ++UI) { + Instruction *User = cast(*UI); + if (User->getOpcode() == Instruction::Mul + && SE.isSCEVable(User->getType())) { + return SE.getSCEV(User) == Mul; + } + } + } + } + } + + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + if (isExistingPhi(AR, SE)) + return false; + } + + // Fow now, consider any other type of expression (div/mul/min/max) high cost. + return true; +} + /// DeleteTriviallyDeadInstructions - If any of the instructions is the /// specified set are trivially dead, delete them and see if this makes any of /// their operands subsequently dead. @@ -2204,6 +2275,49 @@ return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy()); } +/// getExprBase - Return an approximation of this SCEV expression's "base", or +/// NULL for any constant. Returning the expression itself is +/// conservative. Returning a deeper subexpression is more precise and valid as +/// long as it isn't less complex than another subexpression. For expressions +/// involving multiple unscaled values, we need to return the pointer-type +/// SCEVUnknown. This avoids forming chains across objects, such as: +/// PrevOper==a[i], IVOper==b[i], IVInc==b-a. +/// +/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost +/// SCEVUnknown, we simply return the rightmost SCEV operand. +static const SCEV *getExprBase(const SCEV *S) { + switch (S->getSCEVType()) { + default: // uncluding scUnknown. + return S; + case scConstant: + return 0; + case scTruncate: + return getExprBase(cast(S)->getOperand()); + case scZeroExtend: + return getExprBase(cast(S)->getOperand()); + case scSignExtend: + return getExprBase(cast(S)->getOperand()); + case scAddExpr: { + // Skip over scaled operands (scMulExpr) to follow add operands as long as + // there's nothing more complex. + // FIXME: not sure if we want to recognize negation. + const SCEVAddExpr *Add = cast(S); + for (std::reverse_iterator I(Add->op_end()), + E(Add->op_begin()); I != E; ++I) { + const SCEV *SubExpr = *I; + if (SubExpr->getSCEVType() == scAddExpr) + return getExprBase(SubExpr); + + if (SubExpr->getSCEVType() != scMulExpr) + return SubExpr; + } + return S; // all operands are scaled, be conservative. + } + case scAddRecExpr: + return getExprBase(cast(S)->getStart()); + } +} + /// Return true if the chain increment is profitable to expand into a loop /// invariant value, which may require its own register. A profitable chain /// increment will be an offset relative to the same base. We allow such offsets @@ -2213,7 +2327,16 @@ getProfitableChainIncrement(Value *NextIV, Value *PrevIV, const IVChain &Chain, Loop *L, ScalarEvolution &SE, const TargetLowering *TLI) { - const SCEV *IncExpr = SE.getMinusSCEV(SE.getSCEV(NextIV), SE.getSCEV(PrevIV)); + // Prune the solution space aggressively by checking that both IV operands + // are expressions that operate on the same unscaled SCEVUnknown. This + // "base" will be canceled by the subsequent getMinusSCEV call. Checking first + // avoids creating extra SCEV expressions. + const SCEV *OperExpr = SE.getSCEV(NextIV); + const SCEV *PrevExpr = SE.getSCEV(PrevIV); + if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain) + return 0; + + const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); if (!SE.isLoopInvariant(IncExpr, L)) return 0; @@ -2222,8 +2345,19 @@ if (StressIVChain) return IncExpr; - // Unimplemented - return 0; + // Do not replace a constant offset from IV head with a nonconstant IV + // increment. + if (!isa(IncExpr)) { + const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand)); + if (isa(SE.getMinusSCEV(OperExpr, HeadExpr))) + return 0; + } + + SmallPtrSet Processed; + if (isHighCostExpansion(IncExpr, Processed, SE)) + return 0; + + return IncExpr; } /// Return true if the number of registers needed for the chain is estimated to @@ -2242,8 +2376,72 @@ if (StressIVChain) return true; - // Unimplemented - return false; + if (Chain.size() <= 2) + return false; + + if (!Users.empty()) { + DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n"; + for (SmallPtrSet::const_iterator I = Users.begin(), + E = Users.end(); I != E; ++I) { + dbgs() << " " << **I << "\n"; + }); + return false; + } + assert(!Chain.empty() && "empty IV chains are not allowed"); + + // The chain itself may require a register, so intialize cost to 1. + int cost = 1; + + // A complete chain likely eliminates the need for keeping the original IV in + // a register. LSR does not currently know how to form a complete chain unless + // the header phi already exists. + if (isa(Chain.back().UserInst) + && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) { + --cost; + } + const SCEV *LastIncExpr = 0; + unsigned NumConstIncrements = 0; + unsigned NumVarIncrements = 0; + unsigned NumReusedIncrements = 0; + for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end(); + I != E; ++I) { + + if (I->IncExpr->isZero()) + continue; + + // Incrementing by zero or some constant is neutral. We assume constants can + // be folded into an addressing mode or an add's immediate operand. + if (isa(I->IncExpr)) { + ++NumConstIncrements; + continue; + } + + if (I->IncExpr == LastIncExpr) + ++NumReusedIncrements; + else + ++NumVarIncrements; + + LastIncExpr = I->IncExpr; + } + // An IV chain with a single increment is handled by LSR's postinc + // uses. However, a chain with multiple increments requires keeping the IV's + // value live longer than it needs to be if chained. + if (NumConstIncrements > 1) + --cost; + + // Materializing increment expressions in the preheader that didn't exist in + // the original code may cost a register. For example, sign-extended array + // indices can produce ridiculous increments like this: + // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64))) + cost += NumVarIncrements; + + // Reusing variable increments likely saves a register to hold the multiple of + // the stride. + cost -= NumReusedIncrements; + + DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n"); + + return cost < 0; } /// ChainInstruction - Add this IV user to an existing chain or make it the head @@ -4280,6 +4478,13 @@ Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); + // Mark phi nodes that terminate chains so the expander tries to reuse them. + for (SmallVectorImpl::const_iterator ChainI = IVChainVec.begin(), + ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) { + if (PHINode *PN = dyn_cast(ChainI->back().UserInst)) + Rewriter.setChainedPhi(PN); + } + // Expand the new value definitions and update the users. for (SmallVectorImpl::const_iterator I = Fixups.begin(), E = Fixups.end(); I != E; ++I) { Added: llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/dg.exp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/dg.exp?rev=147826&view=auto ============================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/dg.exp (added) +++ llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/dg.exp Mon Jan 9 19:45:08 2012 @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target ARM] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]] +} Added: llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll?rev=147826&view=auto ============================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll (added) +++ llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll Mon Jan 9 19:45:08 2012 @@ -0,0 +1,292 @@ +; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 + +; @simple is the most basic chain of address induction variables. Chaining +; saves at least one register and avoids complex addressing and setup +; code. +; +; A9: @simple +; no expensive address computation in the preheader +; A9: lsl +; A9-NOT: lsl +; A9: %loop +; no complex address modes +; A9-NOT: lsl +define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind { +entry: + br label %loop +loop: + %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ] + %s = phi i32 [ 0, %entry ], [ %s4, %loop ] + %v = load i32* %iv + %iv1 = getelementptr inbounds i32* %iv, i32 %x + %v1 = load i32* %iv1 + %iv2 = getelementptr inbounds i32* %iv1, i32 %x + %v2 = load i32* %iv2 + %iv3 = getelementptr inbounds i32* %iv2, i32 %x + %v3 = load i32* %iv3 + %s1 = add i32 %s, %v + %s2 = add i32 %s1, %v1 + %s3 = add i32 %s2, %v2 + %s4 = add i32 %s3, %v3 + %iv4 = getelementptr inbounds i32* %iv3, i32 %x + %cmp = icmp eq i32* %iv4, %b + br i1 %cmp, label %exit, label %loop +exit: + ret i32 %s4 +} + +; @user is not currently chained because the IV is live across memory ops. +; +; A9: @user +; stride multiples computed in the preheader +; A9: lsl +; A9: lsl +; A9: %loop +; complex address modes +; A9: lsl +; A9: lsl +define i32 @user(i32* %a, i32* %b, i32 %x) nounwind { +entry: + br label %loop +loop: + %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ] + %s = phi i32 [ 0, %entry ], [ %s4, %loop ] + %v = load i32* %iv + %iv1 = getelementptr inbounds i32* %iv, i32 %x + %v1 = load i32* %iv1 + %iv2 = getelementptr inbounds i32* %iv1, i32 %x + %v2 = load i32* %iv2 + %iv3 = getelementptr inbounds i32* %iv2, i32 %x + %v3 = load i32* %iv3 + %s1 = add i32 %s, %v + %s2 = add i32 %s1, %v1 + %s3 = add i32 %s2, %v2 + %s4 = add i32 %s3, %v3 + %iv4 = getelementptr inbounds i32* %iv3, i32 %x + store i32 %s4, i32* %iv + %cmp = icmp eq i32* %iv4, %b + br i1 %cmp, label %exit, label %loop +exit: + ret i32 %s4 +} + +; @extrastride is a slightly more interesting case of a single +; complete chain with multiple strides. The test case IR is what LSR +; used to do, and exactly what we don't want to do. LSR's new IV +; chaining feature should now undo the damage. +; +; A9: extrastride: +; no spills +; A9-NOT: str +; only one stride multiple in the preheader +; A9: lsl +; A9-NOT: {{str r|lsl}} +; A9: %for.body{{$}} +; no complex address modes or reloads +; A9-NOT: {{ldr .*[sp]|lsl}} +define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { +entry: + %cmp8 = icmp eq i32 %z, 0 + br i1 %cmp8, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %add.ptr.sum = shl i32 %main_stride, 1 ; s*2 + %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3 + %add.ptr2.sum = add i32 %x, %main_stride ; s + x + %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4 + %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ] + %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ] + %0 = bitcast i8* %main.addr.011 to i32* + %1 = load i32* %0, align 4 + %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride + %2 = bitcast i8* %add.ptr to i32* + %3 = load i32* %2, align 4 + %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum + %4 = bitcast i8* %add.ptr1 to i32* + %5 = load i32* %4, align 4 + %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum + %6 = bitcast i8* %add.ptr2 to i32* + %7 = load i32* %6, align 4 + %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum + %8 = bitcast i8* %add.ptr3 to i32* + %9 = load i32* %8, align 4 + %add = add i32 %3, %1 + %add4 = add i32 %add, %5 + %add5 = add i32 %add4, %7 + %add6 = add i32 %add5, %9 + store i32 %add6, i32* %res.addr.09, align 4 + %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum + %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y + %inc = add i32 %i.010, 1 + %cmp = icmp eq i32 %inc, %z + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; @foldedidx is an unrolled variant of this loop: +; for (unsigned long i = 0; i < len; i += s) { +; c[i] = a[i] + b[i]; +; } +; where 's' can be folded into the addressing mode. +; Consequently, we should *not* form any chains. +; +; A9: foldedidx: +; A9: ldrb.w {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3] +define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ] + %arrayidx = getelementptr inbounds i8* %a, i32 %i.07 + %0 = load i8* %arrayidx, align 1 + %conv5 = zext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07 + %1 = load i8* %arrayidx1, align 1 + %conv26 = zext i8 %1 to i32 + %add = add nsw i32 %conv26, %conv5 + %conv3 = trunc i32 %add to i8 + %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07 + store i8 %conv3, i8* %arrayidx4, align 1 + %inc1 = or i32 %i.07, 1 + %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1 + %2 = load i8* %arrayidx.1, align 1 + %conv5.1 = zext i8 %2 to i32 + %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1 + %3 = load i8* %arrayidx1.1, align 1 + %conv26.1 = zext i8 %3 to i32 + %add.1 = add nsw i32 %conv26.1, %conv5.1 + %conv3.1 = trunc i32 %add.1 to i8 + %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1 + store i8 %conv3.1, i8* %arrayidx4.1, align 1 + %inc.12 = or i32 %i.07, 2 + %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12 + %4 = load i8* %arrayidx.2, align 1 + %conv5.2 = zext i8 %4 to i32 + %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12 + %5 = load i8* %arrayidx1.2, align 1 + %conv26.2 = zext i8 %5 to i32 + %add.2 = add nsw i32 %conv26.2, %conv5.2 + %conv3.2 = trunc i32 %add.2 to i8 + %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12 + store i8 %conv3.2, i8* %arrayidx4.2, align 1 + %inc.23 = or i32 %i.07, 3 + %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23 + %6 = load i8* %arrayidx.3, align 1 + %conv5.3 = zext i8 %6 to i32 + %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23 + %7 = load i8* %arrayidx1.3, align 1 + %conv26.3 = zext i8 %7 to i32 + %add.3 = add nsw i32 %conv26.3, %conv5.3 + %conv3.3 = trunc i32 %add.3 to i8 + %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23 + store i8 %conv3.3, i8* %arrayidx4.3, align 1 + %inc.3 = add nsw i32 %i.07, 4 + %exitcond.3 = icmp eq i32 %inc.3, 400 + br i1 %exitcond.3, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; @testNeon is an important example of the nead for ivchains. +; +; Currently we have three extra add.w's that keep the store address +; live past the next increment because ISEL is unfortunately undoing +; the store chain. ISEL also fails to convert the stores to +; post-increment addressing. However, the loads should use +; post-increment addressing, no add's or add.w's beyond the three +; mentioned. Most importantly, there should be no spills or reloads! +; +; CHECK: testNeon: +; CHECK: %.lr.ph +; CHECK-NOT: lsl.w +; CHECK-NOT: {{ldr|str|adds|add r}} +; CHECK: add.w r +; CHECK-NOT: {{ldr|str|adds|add r}} +; CHECK: add.w r +; CHECK-NOT: {{ldr|str|adds|add r}} +; CHECK: add.w r +; CHECK-NOT: {{ldr|str|adds|add r}} +; CHECK-NOT: add.w r +; CHECK: bne +define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize { + %1 = icmp sgt i32 %limit, 0 + br i1 %1, label %.lr.ph, label %45 + +.lr.ph: ; preds = %0 + %2 = shl nsw i32 %ref_stride, 1 + %3 = mul nsw i32 %ref_stride, 3 + %4 = shl nsw i32 %ref_stride, 2 + %5 = mul nsw i32 %ref_stride, 5 + %6 = mul nsw i32 %ref_stride, 6 + %7 = mul nsw i32 %ref_stride, 7 + %8 = shl nsw i32 %ref_stride, 3 + %9 = sub i32 0, %8 + %10 = mul i32 %limit, -64 + br label %11 + +;