From stoklund at 2pi.dk Mon Apr 27 00:07:08 2009 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 27 Apr 2009 07:07:08 +0200 Subject: [llvm-commits] [PATCH] Add GR8_H register class to X86 backend In-Reply-To: References: <74F36AB3-29C6-4FB0-8956-6F3CA337D62C@2pi.dk> Message-ID: <31D5DF3C-DC5D-474F-BE7F-C68E32D2DA1B@2pi.dk> On 27/04/2009, at 02.37, Dan Gohman wrote: > Thanks for pointing this out. This looks right to me. I'll apply the > patch > soon. Do you happen to have a testcase which shows the extra copies? The coalescer is actually quite effective at eliminating the bad vregs, so I have not been able to find a difference when running normally. You can see the difference by disabling the coalescer: llc -join-liveintervals=0 < test/CodeGen/X86/h-registers-2.ll Before applying this patch, an extra "movb %ah, %al" is inserted by LowerSubregs. My other submitted patch, coalesce-phys-virt.patch, causes a bunch of test failures without this one. Thanks, /jakob -------------- next part -------------- A non-text attachment was scrubbed... Name: h-registers-3.ll Type: application/octet-stream Size: 317 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20090427/89373946/attachment.obj -------------- next part -------------- From nicholas at mxc.ca Mon Apr 27 00:09:44 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 27 Apr 2009 05:09:44 -0000 Subject: [llvm-commits] [llvm] r70185 - /llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Message-ID: <200904270509.n3R59iZK008740@zion.cs.uiuc.edu> Author: nicholas Date: Mon Apr 27 00:09:44 2009 New Revision: 70185 URL: http://llvm.org/viewvc/llvm-project?rev=70185&view=rev Log: Under unusual circumstances (jitting a function that causes the creation of another stub, but then never calling the jitted function) can cause the JIT to leave a stub in place. Judging by the comments this is a known deficiency, so we're just not going to use AssertingVH for the StubToFunctionTy map. Also shorten some lines longer than 80 columns. This fixes the "make check" failure with ocaml on x86-64 linux. Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp?rev=70185&r1=70184&r2=70185&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Mon Apr 27 00:09:44 2009 @@ -55,32 +55,35 @@ // namespace { class JITResolverState { + public: + typedef std::map, void*> FunctionToStubMapTy; + typedef std::map StubToFunctionMapTy; + typedef std::map, void*> GlobalToIndirectSymMapTy; private: /// FunctionToStubMap - Keep track of the stub created for a particular /// function so that we can reuse them if necessary. - std::map, void*> FunctionToStubMap; + FunctionToStubMapTy FunctionToStubMap; /// StubToFunctionMap - Keep track of the function that each stub /// corresponds to. - std::map > StubToFunctionMap; + StubToFunctionMapTy StubToFunctionMap; /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a /// particular GlobalVariable so that we can reuse them if necessary. - std::map GlobalToIndirectSymMap; + GlobalToIndirectSymMapTy GlobalToIndirectSymMap; public: - std::map, void*>& getFunctionToStubMap(const MutexGuard& locked) { + FunctionToStubMapTy& getFunctionToStubMap(const MutexGuard& locked) { assert(locked.holds(TheJIT->lock)); return FunctionToStubMap; } - std::map >& getStubToFunctionMap(const MutexGuard& locked) { + StubToFunctionMapTy& getStubToFunctionMap(const MutexGuard& locked) { assert(locked.holds(TheJIT->lock)); return StubToFunctionMap; } - std::map& - getGlobalToIndirectSymMap(const MutexGuard& locked) { + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) { assert(locked.holds(TheJIT->lock)); return GlobalToIndirectSymMap; } @@ -89,6 +92,10 @@ /// JITResolver - Keep track of, and resolve, call sites for functions that /// have not yet been compiled. class JITResolver { + typedef JITResolverState::FunctionToStubMapTy FunctionToStubMapTy; + typedef JITResolverState::StubToFunctionMapTy StubToFunctionMapTy; + typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy; + /// LazyResolverFn - The target lazy resolver function that we actually /// rewrite instructions to use. TargetJITInfo::LazyResolverFn LazyResolverFn; @@ -276,18 +283,17 @@ SmallVectorImpl &Ptrs) { MutexGuard locked(TheJIT->lock); - std::map,void*> &FM =state.getFunctionToStubMap(locked); - std::map &GM = state.getGlobalToIndirectSymMap(locked); + FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); + GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - for (std::map,void*>::iterator i = FM.begin(), - e = FM.end(); i != e; ++i) { + for (FunctionToStubMapTy::iterator i = FM.begin(), e = FM.end(); i != e; ++i){ Function *F = i->first; if (F->isDeclaration() && F->hasExternalLinkage()) { GVs.push_back(i->first); Ptrs.push_back(i->second); } } - for (std::map::iterator i = GM.begin(), e = GM.end(); + for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); i != e; ++i) { GVs.push_back(i->first); Ptrs.push_back(i->second); @@ -297,9 +303,9 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) { MutexGuard locked(TheJIT->lock); - std::map,void*> &FM =state.getFunctionToStubMap(locked); - std::map > &SM=state.getStubToFunctionMap(locked); - std::map &GM = state.getGlobalToIndirectSymMap(locked); + FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); + StubToFunctionMapTy &SM = state.getStubToFunctionMap(locked); + GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); // Look up the cheap way first, to see if it's a function stub we are // invalidating. If so, remove it from both the forward and reverse maps. @@ -311,7 +317,7 @@ } // Otherwise, it might be an indirect symbol stub. Find it and remove it. - for (std::map::iterator i = GM.begin(), e = GM.end(); + for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); i != e; ++i) { if (i->second != Stub) continue; @@ -349,7 +355,7 @@ // The address given to us for the stub may not be exactly right, it might be // a little bit after the stub. As such, use upper_bound to find it. - std::map >::iterator I = + StubToFunctionMapTy::iterator I = JR.state.getStubToFunctionMap(locked).upper_bound(Stub); assert(I != JR.state.getStubToFunctionMap(locked).begin() && "This is not a known stub!"); From wangmp at apple.com Mon Apr 27 02:22:11 2009 From: wangmp at apple.com (Mon P Wang) Date: Mon, 27 Apr 2009 07:22:11 -0000 Subject: [llvm-commits] [llvm] r70197 - /llvm/trunk/lib/Target/X86/X86InstrInfo.td Message-ID: <200904270722.n3R7MCbB013330@zion.cs.uiuc.edu> Author: wangmp Date: Mon Apr 27 02:22:10 2009 New Revision: 70197 URL: http://llvm.org/viewvc/llvm-project?rev=70197&view=rev Log: Revised 68749 to allow matching of load/stores for address spaces < 256. Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=70197&r1=70196&r2=70197&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Apr 27 02:22:10 2009 @@ -286,7 +286,7 @@ LoadSDNode *LD = cast(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) @@ -300,7 +300,7 @@ LoadSDNode *LD = cast(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::EXTLOAD) @@ -312,7 +312,7 @@ LoadSDNode *LD = cast(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) @@ -326,7 +326,7 @@ LoadSDNode *LD = cast(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; if (LD->isVolatile()) return false; @@ -348,14 +348,14 @@ def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{ if (const Value *Src = cast(N)->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; return true; }]>; def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{ if (const Value *Src = cast(N)->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; return true; }]>; @@ -363,21 +363,21 @@ def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{ if (const Value *Src = cast(N)->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; return true; }]>; def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{ if (const Value *Src = cast(N)->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; return true; }]>; def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{ if (const Value *Src = cast(N)->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) - if (PT->getAddressSpace() != 0) + if (PT->getAddressSpace() > 255) return false; return true; }]>; From baldrick at free.fr Mon Apr 27 03:52:58 2009 From: baldrick at free.fr (Duncan Sands) Date: Mon, 27 Apr 2009 10:52:58 +0200 Subject: [llvm-commits] [llvm] r70165 - in /llvm/trunk: docs/BitCodeFormat.html include/llvm/Bitcode/BitCodes.h include/llvm/Bitcode/BitstreamReader.h tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp In-Reply-To: <200904262221.n3QMLx0a028970@zion.cs.uiuc.edu> References: <200904262221.n3QMLx0a028970@zion.cs.uiuc.edu> Message-ID: <200904271052.58445.baldrick@free.fr> > Add two new record types to the blockinfo block: > BLOCKNAME and SETRECORDNAME. This allows a bitcode > file to be self describing with pretty names for > records and blocks in addition to numbers. This > enhances llvm-bcanalyzer to use this to print prettily. Testcase? Ciao, Duncan. From baldrick at free.fr Mon Apr 27 06:21:46 2009 From: baldrick at free.fr (Duncan Sands) Date: Mon, 27 Apr 2009 11:21:46 -0000 Subject: [llvm-commits] [llvm] r70205 - /llvm/trunk/docs/GCCFEBuildInstrs.html Message-ID: <200904271121.n3RBLlrV032630@zion.cs.uiuc.edu> Author: baldrick Date: Mon Apr 27 06:21:35 2009 New Revision: 70205 URL: http://llvm.org/viewvc/llvm-project?rev=70205&view=rev Log: Fix some confusion in the Ada f-e build instructions between building without optimization and building with checking. Modified: llvm/trunk/docs/GCCFEBuildInstrs.html Modified: llvm/trunk/docs/GCCFEBuildInstrs.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/GCCFEBuildInstrs.html?rev=70205&r1=70204&r2=70205&view=diff ============================================================================== --- llvm/trunk/docs/GCCFEBuildInstrs.html (original) +++ llvm/trunk/docs/GCCFEBuildInstrs.html Mon Apr 27 06:21:35 2009 @@ -157,29 +157,32 @@
  • Configure LLVM (here it is configured to install into /usr/local):

    -../llvm/configure --prefix=/usr/local
    +../llvm/configure --prefix=/usr/local --enable-optimized --enable-assertions
     

    If you have a multi-compiler setup and the C++ compiler is not the default, then you can configure like this:

    -CXX=PATH_TO_C++_COMPILER ../llvm/configure --prefix=/usr/local
    +CXX=PATH_TO_C++_COMPILER ../llvm/configure --prefix=/usr/local --enable-optimized --enable-assertions
     
    + +

    To compile without checking (not recommended), replace + --enable-assertions with --disable-assertions.

    +
  • -
  • Build LLVM with checking enabled (use ENABLE_OPTIMIZED=1 to - build without checking):

    +
  • Build LLVM:

    -make ENABLE_OPTIMIZED=0
    +make
     
  • Install LLVM (optional):

    -make ENABLE_OPTIMIZED=0 install
    +make install
     
  • @@ -195,7 +198,8 @@
  • Configure llvm-gcc (here it is configured to install into /usr/local). The --enable-checking flag turns on sanity checks inside the compiler. - If you omit it then LLVM should be built with make ENABLE_OPTIMIZED=1. + To turn off these checks (not recommended), replace --enable-checking + with --disable-checking. Additional languages can be appended to the --enable-languages switch, for example --enable-languages=ada,c,c++.

    From gohman at apple.com Mon Apr 27 10:08:45 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 27 Apr 2009 15:08:45 -0000 Subject: [llvm-commits] [llvm] r70208 - /llvm/trunk/test/CodeGen/X86/2009-04-24.ll Message-ID: <200904271508.n3RF8m98007543@zion.cs.uiuc.edu> Author: djg Date: Mon Apr 27 10:08:34 2009 New Revision: 70208 URL: http://llvm.org/viewvc/llvm-project?rev=70208&view=rev Log: Fix the syntax for a PR number in a test. Modified: llvm/trunk/test/CodeGen/X86/2009-04-24.ll Modified: llvm/trunk/test/CodeGen/X86/2009-04-24.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-04-24.ll?rev=70208&r1=70207&r2=70208&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2009-04-24.ll (original) +++ llvm/trunk/test/CodeGen/X86/2009-04-24.ll Mon Apr 27 10:08:34 2009 @@ -2,7 +2,7 @@ ; RUN: grep {leal.*TLSGD.*___tls_get_addr} %t ; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2 ; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2 -; PR/4004 +; PR4004 @i = thread_local global i32 15 From gohman at apple.com Mon Apr 27 10:13:35 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 27 Apr 2009 15:13:35 -0000 Subject: [llvm-commits] [llvm] r70209 - in /llvm/trunk/lib/Target/X86: X86Instr64bit.td X86InstrInfo.td Message-ID: <200904271513.n3RFDa7h007692@zion.cs.uiuc.edu> Author: djg Date: Mon Apr 27 10:13:28 2009 New Revision: 70209 URL: http://llvm.org/viewvc/llvm-project?rev=70209&view=rev Log: Break up long multi-mnemonic strings into separate lines for readability. Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td llvm/trunk/lib/Target/X86/X86InstrInfo.td Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=70209&r1=70208&r2=70209&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Apr 27 10:13:28 2009 @@ -1313,7 +1313,11 @@ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [RSP] in def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym), - ".byte\t0x66; leaq\t${sym:mem}(%rip), %rdi; .word\t0x6666; rex64;call\t__tls_get_addr at PLT", + ".byte\t0x66; " + "leaq\t${sym:mem}(%rip), %rdi; " + ".word\t0x6666; " + "rex64; " + "call\t__tls_get_addr at PLT", [(X86tlsaddr tglobaltlsaddr:$sym)]>, Requires<[In64BitMode]>; @@ -1328,14 +1332,16 @@ let Defs = [RAX, EFLAGS], Uses = [RAX] in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), - "lock\n\tcmpxchgq\t$swap,$ptr", + "lock\n\t" + "cmpxchgq\t$swap,$ptr", [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; } let Constraints = "$val = $dst" in { let Defs = [EFLAGS] in def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), - "lock\n\txadd\t$val, $ptr", + "lock\n\t" + "xadd\t$val, $ptr", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=70209&r1=70208&r2=70209&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Apr 27 10:13:28 2009 @@ -469,7 +469,8 @@ // PIC base let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label), - "call\t$label\n\tpop{l}\t$reg", []>; + "call\t$label\n\t" + "pop{l}\t$reg", []>; //===----------------------------------------------------------------------===// // Control Flow Instructions... @@ -2986,7 +2987,8 @@ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP, EBX] in def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym), - "leal\t${sym:mem}(,%ebx,1), %eax; call\t___tls_get_addr at PLT", + "leal\t${sym:mem}(,%ebx,1), %eax; " + "call\t___tls_get_addr at PLT", [(X86tlsaddr tglobaltlsaddr:$sym)]>, Requires<[In32BitMode]>; @@ -3038,38 +3040,45 @@ // Atomic compare and swap. let Defs = [EAX, EFLAGS], Uses = [EAX] in { def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), - "lock\n\tcmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", + "lock\n\t" + "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr), - "lock\n\tcmpxchg8b\t$ptr", + "lock\n\t" + "cmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; } let Defs = [AX, EFLAGS], Uses = [AX] in { def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), - "lock\n\tcmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", + "lock\n\t" + "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; } let Defs = [AL, EFLAGS], Uses = [AL] in { def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), - "lock\n\tcmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", + "lock\n\t" + "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; } // Atomic exchange and add let Constraints = "$val = $dst", Defs = [EFLAGS] in { def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), - "lock\n\txadd{l}\t{$val, $ptr|$ptr, $val}", + "lock\n\t" + "xadd{l}\t{$val, $ptr|$ptr, $val}", [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, TB, LOCK; def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), - "lock\n\txadd{w}\t{$val, $ptr|$ptr, $val}", + "lock\n\t" + "xadd{w}\t{$val, $ptr|$ptr, $val}", [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, TB, OpSize, LOCK; def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), - "lock\n\txadd{b}\t{$val, $ptr|$ptr, $val}", + "lock\n\t" + "xadd{b}\t{$val, $ptr|$ptr, $val}", [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, TB, LOCK; } From gohman at apple.com Mon Apr 27 11:33:27 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 27 Apr 2009 16:33:27 -0000 Subject: [llvm-commits] [llvm] r70210 - in /llvm/trunk/lib/Target/X86: X86FastISel.cpp X86Instr64bit.td X86InstrInfo.cpp X86InstrInfo.td X86RegisterInfo.td Message-ID: <200904271633.n3RGXU4M010445@zion.cs.uiuc.edu> Author: djg Date: Mon Apr 27 11:33:14 2009 New Revision: 70210 URL: http://llvm.org/viewvc/llvm-project?rev=70210&view=rev Log: Rename GR8_, GR16_, GR32_, and GR64_ to GR8_ABCD, GR16_ABCD, GR32_ABCD, and GR64_ABCD, respectively, to help describe them. Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp llvm/trunk/lib/Target/X86/X86Instr64bit.td llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/lib/Target/X86/X86RegisterInfo.td Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=70210&r1=70209&r2=70210&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original) +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Apr 27 11:33:14 2009 @@ -996,10 +996,10 @@ // Unhandled operand. Halt "fast" selection and bail. return false; - // First issue a copy to GR16_ or GR32_. + // First issue a copy to GR16_ABCD or GR32_ABCD. unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) - ? X86::GR16_RegisterClass : X86::GR32_RegisterClass; + ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; unsigned CopyReg = createResultReg(CopyRC); BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=70210&r1=70209&r2=70210&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Apr 27 11:33:14 2009 @@ -1594,18 +1594,18 @@ (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD), x86_subreg_8bit_hi)), x86_subreg_32bit)>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl_su GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi)), x86_subreg_16bit)>, Requires<[In64BitMode]>; @@ -1614,18 +1614,18 @@ def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD), x86_subreg_8bit_hi))>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=70210&r1=70209&r2=70210&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Apr 27 11:33:14 2009 @@ -1681,13 +1681,13 @@ Opc = X86::MOV8rr_NOREX; else Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_RegClass) { + } else if (CommonRC == &X86::GR64_ABCDRegClass) { Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_RegClass) { + } else if (CommonRC == &X86::GR32_ABCDRegClass) { Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_RegClass) { + } else if (CommonRC == &X86::GR16_ABCDRegClass) { Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_RegClass) { + } else if (CommonRC == &X86::GR8_ABCDRegClass) { Opc = X86::MOV8rr; } else if (CommonRC == &X86::GR64_NOREXRegClass) { Opc = X86::MOV64rr; @@ -1802,13 +1802,13 @@ Opc = X86::MOV16mr; } else if (RC == &X86::GR8RegClass) { Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_RegClass) { + } else if (RC == &X86::GR64_ABCDRegClass) { Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_RegClass) { + } else if (RC == &X86::GR32_ABCDRegClass) { Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_RegClass) { + } else if (RC == &X86::GR16_ABCDRegClass) { Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_RegClass) { + } else if (RC == &X86::GR8_ABCDRegClass) { Opc = X86::MOV8mr; } else if (RC == &X86::GR64_NOREXRegClass) { Opc = X86::MOV64mr; @@ -1882,13 +1882,13 @@ Opc = X86::MOV16rm; } else if (RC == &X86::GR8RegClass) { Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_RegClass) { + } else if (RC == &X86::GR64_ABCDRegClass) { Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_RegClass) { + } else if (RC == &X86::GR32_ABCDRegClass) { Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_RegClass) { + } else if (RC == &X86::GR16_ABCDRegClass) { Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_RegClass) { + } else if (RC == &X86::GR8_ABCDRegClass) { Opc = X86::MOV8rm; } else if (RC == &X86::GR64_NOREXRegClass) { Opc = X86::MOV64rm; Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=70210&r1=70209&r2=70210&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Apr 27 11:33:14 2009 @@ -3372,12 +3372,12 @@ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src1, GR32_), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD), x86_subreg_8bit))>, Requires<[In32BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src1, GR16_), + (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD), x86_subreg_8bit))>, Requires<[In32BitMode]>; @@ -3385,11 +3385,11 @@ def : Pat<(sext_inreg GR32:$src, i16), (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_), + (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit))>, Requires<[In32BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_), + (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit))>, Requires<[In32BitMode]>; @@ -3397,32 +3397,32 @@ def : Pat<(i16 (trunc GR32:$src)), (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>; def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit)>, Requires<[In32BitMode]>; // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(srl_su GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 - (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_), + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi)), x86_subreg_16bit)>, Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=70210&r1=70209&r2=70210&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Mon Apr 27 11:33:14 2009 @@ -461,21 +461,21 @@ } -// GR8_, GR16_, GR32_, GR64_ - Subclasses of GR8, GR16, GR32, and GR64 -// which contain just the "a" "b", "c", and "d" registers. On x86-32, -// GR16_ and GR32_ are classes for registers that support 8-bit subreg -// operations. On x86-64, GR16_, GR32_, and GR64_ are classes for registers -// that support 8-bit h-register operations. -def GR8_ : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { +// GR8_ABCD, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of GR8, GR16, GR32, +// and GR64 which contain just the "a" "b", "c", and "d" registers. On x86-32, +// GR16_ABCD and GR32_ABCD are classes for registers that support 8-bit subreg +// operations. On x86-64, GR16_ABCD, GR32_ABCD, and GR64_ABCD are classes for +// registers that support 8-bit h-register operations. +def GR8_ABCD : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { } -def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { - let SubRegClassList = [GR8_, GR8_]; +def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { + let SubRegClassList = [GR8_ABCD, GR8_ABCD]; } -def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { - let SubRegClassList = [GR8_, GR8_, GR16_]; +def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { + let SubRegClassList = [GR8_ABCD, GR8_ABCD, GR16_ABCD]; } -def GR64_ : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { - let SubRegClassList = [GR8_, GR8_, GR16_, GR32_]; +def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { + let SubRegClassList = [GR8_ABCD, GR8_ABCD, GR16_ABCD, GR32_ABCD]; } // GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of From gohman at apple.com Mon Apr 27 11:41:37 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 27 Apr 2009 16:41:37 -0000 Subject: [llvm-commits] [llvm] r70211 - in /llvm/trunk/lib/Target/X86: X86InstrInfo.cpp X86InstrInfo.td X86RegisterInfo.td Message-ID: <200904271641.n3RGfbf9010832@zion.cs.uiuc.edu> Author: djg Date: Mon Apr 27 11:41:36 2009 New Revision: 70211 URL: http://llvm.org/viewvc/llvm-project?rev=70211&view=rev Log: Rename GR8_ABCD to GR8_ABCD_L and create GR8_ABCD_H, and use these to precisely describe the h-register subreg register classes. Thanks to Jakob Stoklund Olesen for spotting this and for the initial patch! Also, make getStoreRegOpcode and getLoadRegOpcode aware of the needs of h registers. Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/lib/Target/X86/X86RegisterInfo.td Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=70211&r1=70210&r2=70211&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Apr 27 11:41:36 2009 @@ -1645,7 +1645,7 @@ /// isHReg - Test if the given register is a physical h register. static bool isHReg(unsigned Reg) { - return Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH; + return X86::GR8_ABCD_HRegClass.contains(Reg); } bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, @@ -1674,7 +1674,7 @@ } else if (CommonRC == &X86::GR16RegClass) { Opc = X86::MOV16rr; } else if (CommonRC == &X86::GR8RegClass) { - // Copying two or from a physical H register on x86-64 requires a NOREX + // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if ((isHReg(DestReg) || isHReg(SrcReg)) && TM.getSubtarget().is64Bit()) @@ -1687,8 +1687,13 @@ Opc = X86::MOV32rr; } else if (CommonRC == &X86::GR16_ABCDRegClass) { Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_ABCDRegClass) { + } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8rr; + } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rr_NOREX; + else + Opc = X86::MOV8rr; } else if (CommonRC == &X86::GR64_NOREXRegClass) { Opc = X86::MOV64rr; } else if (CommonRC == &X86::GR32_NOREXRegClass) { @@ -1791,8 +1796,10 @@ return false; } -static unsigned getStoreRegOpcode(const TargetRegisterClass *RC, - bool isStackAligned) { +static unsigned getStoreRegOpcode(unsigned SrcReg, + const TargetRegisterClass *RC, + bool isStackAligned, + TargetMachine &TM) { unsigned Opc = 0; if (RC == &X86::GR64RegClass) { Opc = X86::MOV64mr; @@ -1801,15 +1808,26 @@ } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16mr; } else if (RC == &X86::GR8RegClass) { - Opc = X86::MOV8mr; + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if (isHReg(SrcReg) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8mr_NOREX; + else + Opc = X86::MOV8mr; } else if (RC == &X86::GR64_ABCDRegClass) { Opc = X86::MOV64mr; } else if (RC == &X86::GR32_ABCDRegClass) { Opc = X86::MOV32mr; } else if (RC == &X86::GR16_ABCDRegClass) { Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_ABCDRegClass) { + } else if (RC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8mr; + } else if (RC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8mr_NOREX; + else + Opc = X86::MOV8mr; } else if (RC == &X86::GR64_NOREXRegClass) { Opc = X86::MOV64mr; } else if (RC == &X86::GR32_NOREXRegClass) { @@ -1848,7 +1866,7 @@ const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getStoreRegOpcode(RC, isAligned); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) @@ -1862,7 +1880,7 @@ SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getStoreRegOpcode(RC, isAligned); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -1871,8 +1889,10 @@ NewMIs.push_back(MIB); } -static unsigned getLoadRegOpcode(const TargetRegisterClass *RC, - bool isStackAligned) { +static unsigned getLoadRegOpcode(unsigned DestReg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM) { unsigned Opc = 0; if (RC == &X86::GR64RegClass) { Opc = X86::MOV64rm; @@ -1881,15 +1901,26 @@ } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rm; } else if (RC == &X86::GR8RegClass) { - Opc = X86::MOV8rm; + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if (isHReg(DestReg) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rm_NOREX; + else + Opc = X86::MOV8rm; } else if (RC == &X86::GR64_ABCDRegClass) { Opc = X86::MOV64rm; } else if (RC == &X86::GR32_ABCDRegClass) { Opc = X86::MOV32rm; } else if (RC == &X86::GR16_ABCDRegClass) { Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_ABCDRegClass) { + } else if (RC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8rm; + } else if (RC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rm_NOREX; + else + Opc = X86::MOV8rm; } else if (RC == &X86::GR64_NOREXRegClass) { Opc = X86::MOV64rm; } else if (RC == &X86::GR32_NOREXRegClass) { @@ -1928,7 +1959,7 @@ const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getLoadRegOpcode(RC, isAligned); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); @@ -1940,7 +1971,7 @@ SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getLoadRegOpcode(RC, isAligned); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -2455,9 +2486,8 @@ MVT VT = *RC->vt_begin(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - Load = DAG.getTargetNode(getLoadRegOpcode(RC, isAligned), dl, - VT, MVT::Other, - &AddrOps[0], AddrOps.size()); + Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); } @@ -2489,8 +2519,10 @@ AddrOps.push_back(Chain); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(DstRC, isAligned), dl, - MVT::Other, &AddrOps[0], AddrOps.size()); + SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC, + isAligned, TM), + dl, MVT::Other, + &AddrOps[0], AddrOps.size()); NewNodes.push_back(Store); } Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=70211&r1=70210&r2=70211&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Apr 27 11:41:36 2009 @@ -784,9 +784,9 @@ "mov{l}\t{$src, $dst|$dst, $src}", [(store GR32:$src, addr:$dst)]>; -// Versions of MOV8rr and MOV8mr that use i8mem_NOREX and GR8_NOREX so that they -// can be used for copying and storing h registers, which can't be encoded when -// a REX prefix is present. +// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so +// that they can be used for copying and storing h registers, which can't be +// encoded when a REX prefix is present. let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), @@ -794,6 +794,10 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +def MOV8rm_NOREX : I<0x8A, MRMSrcMem, + (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions... Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=70211&r1=70210&r2=70211&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Mon Apr 27 11:41:36 2009 @@ -461,21 +461,24 @@ } -// GR8_ABCD, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of GR8, GR16, GR32, -// and GR64 which contain just the "a" "b", "c", and "d" registers. On x86-32, -// GR16_ABCD and GR32_ABCD are classes for registers that support 8-bit subreg -// operations. On x86-64, GR16_ABCD, GR32_ABCD, and GR64_ABCD are classes for -// registers that support 8-bit h-register operations. -def GR8_ABCD : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { +// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of +// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" +// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers +// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD, +// and GR64_ABCD are classes for registers that support 8-bit h-register +// operations. +def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { +} +def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> { } def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { - let SubRegClassList = [GR8_ABCD, GR8_ABCD]; + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H]; } def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { - let SubRegClassList = [GR8_ABCD, GR8_ABCD, GR16_ABCD]; + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; } def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { - let SubRegClassList = [GR8_ABCD, GR8_ABCD, GR16_ABCD, GR32_ABCD]; + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; } // GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of From stoklund at 2pi.dk Mon Apr 27 12:01:43 2009 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 27 Apr 2009 19:01:43 +0200 Subject: [llvm-commits] [PATCH] Annotate virtual registers with register class in debug output Message-ID: I am using this patch when working on the Blackfin backend. Now that X86 is gaining a decent set of register classes, it might be generally useful. It prints machine code like this: %reg1025GR32 = MOV32rr %ESI %reg1024GR64 = MOV64rr %RDI %reg1026GR32_ABCD = MOV32rr %reg1025GR32 %reg1027GR8_ABCD_H = EXTRACT_SUBREG %reg1026GR32_ABCD, 2 %reg1028GR32_NOREX = MOVZX32_NOREXrr8 %reg1027GR8_ABCD_H %EAX = MOV32rr %reg1028GR32_NOREX RET What do you think? Too much clutter? -------------- next part -------------- A non-text attachment was scrubbed... Name: regclass-print.patch Type: application/octet-stream Size: 3631 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20090427/120db8e4/attachment.obj -------------- next part -------------- From evan.cheng at apple.com Mon Apr 27 12:35:21 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 27 Apr 2009 17:35:21 -0000 Subject: [llvm-commits] [llvm] r70212 - /llvm/trunk/lib/CodeGen/LiveInterval.cpp Message-ID: <200904271735.n3RHZLX5013201@zion.cs.uiuc.edu> Author: evancheng Date: Mon Apr 27 12:35:19 2009 New Revision: 70212 URL: http://llvm.org/viewvc/llvm-project?rev=70212&view=rev Log: Also delete last unused val#. Modified: llvm/trunk/lib/CodeGen/LiveInterval.cpp Modified: llvm/trunk/lib/CodeGen/LiveInterval.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveInterval.cpp?rev=70212&r1=70211&r2=70212&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveInterval.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveInterval.cpp Mon Apr 27 12:35:19 2009 @@ -629,6 +629,12 @@ UnusedValNo = 0; } } + + if (UnusedValNo) { + // Delete the last unused val#. + valnos.pop_back(); + UnusedValNo->~VNInfo(); + } } void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End, From evan.cheng at apple.com Mon Apr 27 12:36:47 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 27 Apr 2009 17:36:47 -0000 Subject: [llvm-commits] [llvm] r70213 - in /llvm/trunk: lib/CodeGen/LiveIntervalAnalysis.cpp test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll Message-ID: <200904271736.n3RHamOw013268@zion.cs.uiuc.edu> Author: evancheng Date: Mon Apr 27 12:36:47 2009 New Revision: 70213 URL: http://llvm.org/viewvc/llvm-project?rev=70213&view=rev Log: Fix PR4056. It's possible a physical register def is dead if its implicit use is deleted by two-address pass. Added: llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=70213&r1=70212&r2=70213&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Mon Apr 27 12:36:47 2009 @@ -627,8 +627,8 @@ // The only case we should have a dead physreg here without a killing or // instruction where we know it's dead is if it is live-in to the function - // and never used. - assert(!CopyMI && "physreg was not killed in defining block!"); + // and never used. Another possible case is the implicit use of the + // physical register has been deleted by two-address pass. end = start + 1; exit: Added: llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll?rev=70213&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll (added) +++ llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll Mon Apr 27 12:36:47 2009 @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 +; PR4056 + +define void @int163(i32 %p_4, i32 %p_5) nounwind { +entry: + %0 = tail call i32 @bar(i32 1) nounwind ; [#uses=2] + %1 = icmp sgt i32 %0, 7 ; [#uses=1] + br i1 %1, label %foo.exit, label %bb.i + +bb.i: ; preds = %entry + %2 = lshr i32 1, %0 ; [#uses=1] + %3 = icmp eq i32 %2, 0 ; [#uses=1] + %4 = zext i1 %3 to i32 ; [#uses=1] + %.p_5 = shl i32 %p_5, %4 ; [#uses=1] + br label %foo.exit + +foo.exit: ; preds = %bb.i, %entry + %5 = phi i32 [ %.p_5, %bb.i ], [ %p_5, %entry ] ; [#uses=1] + %6 = icmp eq i32 %5, 0 ; [#uses=0] + %7 = tail call i32 @bar(i32 %p_5) nounwind ; [#uses=0] + ret void +} + +declare i32 @bar(i32) From sabre at nondot.org Mon Apr 27 12:59:34 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 27 Apr 2009 17:59:34 -0000 Subject: [llvm-commits] [llvm] r70214 - /llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Message-ID: <200904271759.n3RHxYlY014086@zion.cs.uiuc.edu> Author: lattner Date: Mon Apr 27 12:59:34 2009 New Revision: 70214 URL: http://llvm.org/viewvc/llvm-project?rev=70214&view=rev Log: clean up the output of llvm-bcanalyzer by using printf instead of ostreams for formatting. Modified: llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Modified: llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp?rev=70214&r1=70213&r2=70214&view=diff ============================================================================== --- llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp (original) +++ llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Mon Apr 27 12:59:34 2009 @@ -245,6 +245,9 @@ } } +struct PerRecordStats { + unsigned NumInstances; +}; struct PerBlockIDStats { /// NumInstances - This the number of times this block ID has been seen. @@ -264,7 +267,7 @@ unsigned NumRecords, NumAbbreviatedRecords; /// CodeFreq - Keep track of the number of times we see each code. - std::vector CodeFreq; + std::vector CodeFreq; PerBlockIDStats() : NumInstances(0), NumBits(0), @@ -375,7 +378,7 @@ // Increment the # occurrences of this code. if (BlockStats.CodeFreq.size() <= Code) BlockStats.CodeFreq.resize(Code+1); - BlockStats.CodeFreq[Code]++; + BlockStats.CodeFreq[Code].NumInstances++; if (Dump) { std::cerr << Indent << " <"; @@ -419,7 +422,11 @@ } static void PrintSize(double Bits) { - std::cerr << Bits << "b/" << Bits/8 << "B/" << Bits/32 << "W"; + fprintf(stderr, "%.2f/%.2fB/%lluW", Bits, Bits/8,(unsigned long long)Bits/32); +} +static void PrintSize(uint64_t Bits) { + fprintf(stderr, "%llub/%.2fB/%lluW", (unsigned long long)Bits, + (double)Bits/8, (unsigned long long)Bits/32); } @@ -532,19 +539,21 @@ if (!NoHistogram && !Stats.CodeFreq.empty()) { std::vector > FreqPairs; // for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i) - if (unsigned Freq = Stats.CodeFreq[i]) + if (unsigned Freq = Stats.CodeFreq[i].NumInstances) FreqPairs.push_back(std::make_pair(Freq, i)); std::stable_sort(FreqPairs.begin(), FreqPairs.end()); std::reverse(FreqPairs.begin(), FreqPairs.end()); - std::cerr << "\tCode Histogram:\n"; + std::cerr << "\tRecord Histogram:\n"; + fprintf(stderr, "\t\t Count Record Kind\n"); for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) { - std::cerr << "\t\t" << FreqPairs[i].first << "\t"; + fprintf(stderr, "\t\t%7d ", FreqPairs[i].first); + if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first, StreamFile)) - std::cerr << CodeName << "\n"; + fprintf(stderr, "%s\n", CodeName); else - std::cerr << "UnknownCode" << FreqPairs[i].second << "\n"; + fprintf(stderr, "UnknownCode%d\n", FreqPairs[i].second); } std::cerr << "\n"; From sabre at nondot.org Mon Apr 27 13:15:28 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 27 Apr 2009 18:15:28 -0000 Subject: [llvm-commits] [llvm] r70215 - /llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Message-ID: <200904271815.n3RIFSmo014579@zion.cs.uiuc.edu> Author: lattner Date: Mon Apr 27 13:15:27 2009 New Revision: 70215 URL: http://llvm.org/viewvc/llvm-project?rev=70215&view=rev Log: Print statistics for each record kind saying the number of bits and % abbreviated. For example: Record Histogram: Count # Bits % Abv Record Kind 25738 3424174 100.00 SM_SLOC_INSTANTIATION_ENTRY 814 562079 100.00 SM_SLOC_FILE_ENTRY 798 34110 SM_HEADER_FILE_INFO 3 91104 100.00 SM_SLOC_BUFFER_BLOB 3 498 100.00 SM_SLOC_BUFFER_ENTRY 1 465 SM_LINE_TABLE Modified: llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Modified: llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp?rev=70215&r1=70214&r2=70215&view=diff ============================================================================== --- llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp (original) +++ llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Mon Apr 27 13:15:27 2009 @@ -247,6 +247,10 @@ struct PerRecordStats { unsigned NumInstances; + unsigned NumAbbrev; + uint64_t TotalBits; + + PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {} }; struct PerBlockIDStats { @@ -332,6 +336,8 @@ if (Stream.AtEndOfStream()) return Error("Premature end of bitstream"); + uint64_t RecordStartBit = Stream.GetCurrentBitNo(); + // Read the code for this record. unsigned AbbrevID = Stream.ReadCode(); switch (AbbrevID) { @@ -375,11 +381,17 @@ unsigned BlobLen = 0; unsigned Code = Stream.ReadRecord(AbbrevID, Record, BlobStart, BlobLen); + + // Increment the # occurrences of this code. if (BlockStats.CodeFreq.size() <= Code) BlockStats.CodeFreq.resize(Code+1); BlockStats.CodeFreq[Code].NumInstances++; - + BlockStats.CodeFreq[Code].TotalBits += + Stream.GetCurrentBitNo()-RecordStartBit; + if (AbbrevID != bitc::UNABBREV_RECORD) + BlockStats.CodeFreq[Code].NumAbbrev++; + if (Dump) { std::cerr << Indent << " <"; if (const char *CodeName = @@ -545,9 +557,18 @@ std::reverse(FreqPairs.begin(), FreqPairs.end()); std::cerr << "\tRecord Histogram:\n"; - fprintf(stderr, "\t\t Count Record Kind\n"); + fprintf(stderr, "\t\t Count # Bits %% Abv Record Kind\n"); for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) { - fprintf(stderr, "\t\t%7d ", FreqPairs[i].first); + const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second]; + + fprintf(stderr, "\t\t%7d %9llu ", RecStats.NumInstances, + RecStats.TotalBits); + + if (RecStats.NumAbbrev) + fprintf(stderr, "%7.2f ", + (double)RecStats.NumAbbrev/RecStats.NumInstances*100); + else + fprintf(stderr, " "); if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first, StreamFile)) From isanbard at gmail.com Mon Apr 27 13:35:46 2009 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 27 Apr 2009 18:35:46 -0000 Subject: [llvm-commits] [llvm] r70221 - /llvm/tags/Apple/llvmCore-2107.2/ Message-ID: <200904271835.n3RIZkxF015357@zion.cs.uiuc.edu> Author: void Date: Mon Apr 27 13:35:46 2009 New Revision: 70221 URL: http://llvm.org/viewvc/llvm-project?rev=70221&view=rev Log: Creating llvmCore-2107.2 from Dib. Added: llvm/tags/Apple/llvmCore-2107.2/ - copied from r70220, llvm/branches/Apple/Dib/ From isanbard at gmail.com Mon Apr 27 13:36:28 2009 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 27 Apr 2009 18:36:28 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r70222 - /llvm-gcc-4.2/tags/Apple/llvmgcc42-2107.2/ Message-ID: <200904271836.n3RIaSTG015386@zion.cs.uiuc.edu> Author: void Date: Mon Apr 27 13:36:28 2009 New Revision: 70222 URL: http://llvm.org/viewvc/llvm-project?rev=70222&view=rev Log: Creating llvmgcc42-2107.2 from Dib. Added: llvm-gcc-4.2/tags/Apple/llvmgcc42-2107.2/ - copied from r70215, llvm-gcc-4.2/branches/Apple/Dib/ From natebegeman at mac.com Mon Apr 27 13:41:30 2009 From: natebegeman at mac.com (Nate Begeman) Date: Mon, 27 Apr 2009 18:41:30 -0000 Subject: [llvm-commits] [llvm] r70225 - in /llvm/trunk: include/llvm/CodeGen/ include/llvm/Target/ lib/CodeGen/SelectionDAG/ lib/Target/CellSPU/ lib/Target/PowerPC/ lib/Target/X86/ test/CodeGen/Generic/ test/CodeGen/X86/ utils/TableGen/ Message-ID: <200904271841.n3RIfW0N015595@zion.cs.uiuc.edu> Author: sampo Date: Mon Apr 27 13:41:29 2009 New Revision: 70225 URL: http://llvm.org/viewvc/llvm-project?rev=70225&view=rev Log: 2nd attempt, fixing SSE4.1 issues and implementing feedback from duncan. PR2957 ISD::VECTOR_SHUFFLE now stores an array of integers representing the shuffle mask internal to the node, rather than taking a BUILD_VECTOR of ConstantSDNodes as the shuffle mask. A value of -1 represents UNDEF. In addition to eliminating the creation of illegal BUILD_VECTORS just to represent shuffle masks, we are better about canonicalizing the shuffle mask, resulting in substantially better code for some classes of shuffles. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h llvm/trunk/include/llvm/Target/TargetLowering.h llvm/trunk/include/llvm/Target/TargetSelectionDAG.td llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/lib/Target/X86/X86InstrMMX.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/test/CodeGen/Generic/vector-casts.ll llvm/trunk/test/CodeGen/X86/vec_clear.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-10.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-16.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-30.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-31.ll llvm/trunk/utils/TableGen/CodeGenDAGPatterns.cpp llvm/trunk/utils/TableGen/CodeGenDAGPatterns.h Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Apr 27 13:41:29 2009 @@ -353,6 +353,13 @@ SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code); + + /// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node. The number of + /// elements in VT, which must be a vector type, must match the number of + /// mask elements NumElts. A negative integer mask element is treated as + /// undefined. + SDValue getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, + const int *MaskElts); /// getZeroExtendInReg - Return the expression required to zero extend the Op /// value assuming it was the smaller SrcTy value. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Mon Apr 27 13:41:29 2009 @@ -24,6 +24,7 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -1703,6 +1704,41 @@ } }; +class ShuffleVectorSDNode : public SDNode { + SDUse Ops[2]; + int *Mask; +protected: + friend class SelectionDAG; + ShuffleVectorSDNode(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, int *M) + : SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) { + InitOperands(Ops, N1, N2); + } +public: + + void getMask(SmallVectorImpl &M) const { + MVT VT = getValueType(0); + M.clear(); + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) + M.push_back(Mask[i]); + } + int getMaskElt(unsigned Idx) const { + assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!"); + return Mask[Idx]; + } + + bool isSplat() const { return isSplatMask(Mask, getValueType(0)); } + int getSplatIndex() const { + assert(isSplat() && "Cannot get splat index for non-splat!"); + return Mask[0]; + } + static bool isSplatMask(const int *Mask, MVT VT); + + static bool classof(const ShuffleVectorSDNode *) { return true; } + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VECTOR_SHUFFLE; + } +}; + class ConstantSDNode : public SDNode { const ConstantInt *Value; friend class SelectionDAG; @@ -2084,7 +2120,7 @@ return N->getOpcode() == ISD::CONDCODE; } }; - + /// CvtRndSatSDNode - NOTE: avoid using this node as this may disappear in the /// future and most targets don't support it. class CvtRndSatSDNode : public SDNode { Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Mon Apr 27 13:41:29 2009 @@ -28,6 +28,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/DebugLoc.h" #include "llvm/Target/TargetMachine.h" @@ -328,7 +329,7 @@ /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. - virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const { + virtual bool isShuffleMaskLegal(SmallVectorImpl &Mask, MVT VT) const { return true; } @@ -336,9 +337,7 @@ /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. - virtual bool isVectorClearMaskLegal(const std::vector &BVOps, - MVT EVT, - SelectionDAG &DAG) const { + virtual bool isVectorClearMaskLegal(SmallVectorImpl &M, MVT VT) const { return false; } Modified: llvm/trunk/include/llvm/Target/TargetSelectionDAG.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSelectionDAG.td?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetSelectionDAG.td (original) +++ llvm/trunk/include/llvm/Target/TargetSelectionDAG.td Mon Apr 27 13:41:29 2009 @@ -51,15 +51,6 @@ int BigOperandNum = BigOp; } -/// SDTCisIntVectorOfSameSize - This indicates that ThisOp and OtherOp are -/// vector types, and that ThisOp is the result of -/// MVT::getIntVectorWithNumElements with the number of elements -/// that ThisOp has. -class SDTCisIntVectorOfSameSize - : SDTypeConstraint { - int OtherOpNum = OtherOp; -} - /// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same /// type as the element type of OtherOp, which is a vector type. class SDTCisEltOfVec @@ -175,8 +166,8 @@ SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; -def SDTVecShuffle : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisIntVectorOfSameSize<3, 0> +def SDTVecShuffle : SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 27 13:41:29 2009 @@ -5102,7 +5102,21 @@ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), InVec.getValueType(), &Ops[0], Ops.size()); } + // If the invec is an UNDEF and if EltNo is a constant, create a new + // BUILD_VECTOR with undef elements and the inserted element. + if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + isa(EltNo)) { + MVT VT = InVec.getValueType(); + MVT EVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + SmallVector Ops(NElts, DAG.getUNDEF(EVT)); + unsigned Elt = cast(EltNo)->getZExtValue(); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } return SDValue(); } @@ -5164,9 +5178,8 @@ // to examine the mask. if (BCNumEltsChanged) return SDValue(); - unsigned Idx = cast(InVec.getOperand(2). - getOperand(Elt))->getZExtValue(); - unsigned NumElems = InVec.getOperand(2).getNumOperands(); + int Idx = cast(InVec)->getMaskElt(Elt); + int NumElems = InVec.getValueType().getVectorNumElements(); InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); if (InVec.getOpcode() == ISD::BIT_CONVERT) InVec = InVec.getOperand(0); @@ -5213,7 +5226,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); MVT VT = N->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); MVT EltType = VT.getVectorElementType(); // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -5256,56 +5268,36 @@ } // If everything is good, we can make a shuffle operation. - MVT IndexVT = MVT::i32; if (VecIn1.getNode()) { - SmallVector BuildVecIndices; + SmallVector Mask; for (unsigned i = 0; i != NumInScalars; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) { - BuildVecIndices.push_back(DAG.getUNDEF(IndexVT)); + Mask.push_back(-1); continue; } - SDValue Extract = N->getOperand(i); - // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); SDValue ExtVal = Extract.getOperand(1); if (Extract.getOperand(0) == VecIn1) { - if (ExtVal.getValueType() == IndexVT) - BuildVecIndices.push_back(ExtVal); - else { - unsigned Idx = cast(ExtVal)->getZExtValue(); - BuildVecIndices.push_back(DAG.getConstant(Idx, IndexVT)); - } + Mask.push_back(cast(ExtVal)->getZExtValue()); continue; } // Otherwise, use InIdx + VecSize unsigned Idx = cast(ExtVal)->getZExtValue(); - BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars, IndexVT)); + Mask.push_back(Idx+NumInScalars); } // Add count and size info. - MVT BuildVecVT = MVT::getVectorVT(IndexVT, NumElts); - if (!TLI.isTypeLegal(BuildVecVT) && LegalTypes) + if (!TLI.isTypeLegal(VT) && LegalTypes) return SDValue(); // Return the new VECTOR_SHUFFLE node. - SDValue Ops[5]; + SDValue Ops[2]; Ops[0] = VecIn1; - if (VecIn2.getNode()) { - Ops[1] = VecIn2; - } else { - // Use an undef build_vector as input for the second operand. - std::vector UnOps(NumInScalars, - DAG.getUNDEF(EltType)); - Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &UnOps[0], UnOps.size()); - AddToWorkList(Ops[1].getNode()); - } - - Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), BuildVecVT, - &BuildVecIndices[0], BuildVecIndices.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), VT, Ops, 3); + Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); } return SDValue(); @@ -5325,8 +5317,10 @@ } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { - SDValue ShufMask = N->getOperand(2); - unsigned NumElts = ShufMask.getNumOperands(); + return SDValue(); + + MVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5334,60 +5328,13 @@ assert(N0.getValueType().getVectorNumElements() == NumElts && "Vector shuffle must be normalized in DAG"); - // If the shuffle mask is an identity operation on the LHS, return the LHS. - bool isIdentity = true; - for (unsigned i = 0; i != NumElts; ++i) { - if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && - cast(ShufMask.getOperand(i))->getZExtValue() != i) { - isIdentity = false; - break; - } - } - if (isIdentity) return N->getOperand(0); - - // If the shuffle mask is an identity operation on the RHS, return the RHS. - isIdentity = true; - for (unsigned i = 0; i != NumElts; ++i) { - if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && - cast(ShufMask.getOperand(i))->getZExtValue() != - i+NumElts) { - isIdentity = false; - break; - } - } - if (isIdentity) return N->getOperand(1); - - // Check if the shuffle is a unary shuffle, i.e. one of the vectors is not - // needed at all. - bool isUnary = true; - bool isSplat = true; - int VecNum = -1; - unsigned BaseIdx = 0; - for (unsigned i = 0; i != NumElts; ++i) - if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) { - unsigned Idx=cast(ShufMask.getOperand(i))->getZExtValue(); - int V = (Idx < NumElts) ? 0 : 1; - if (VecNum == -1) { - VecNum = V; - BaseIdx = Idx; - } else { - if (BaseIdx != Idx) - isSplat = false; - if (VecNum != V) { - isUnary = false; - break; - } - } - } - - // Normalize unary shuffle so the RHS is undef. - if (isUnary && VecNum == 1) - std::swap(N0, N1); + // FIXME: implement canonicalizations from DAG.getVectorShuffle() // If it is a splat, check if the argument vector is a build_vector with // all scalar elements the same. - if (isSplat) { + if (cast(N)->isSplat()) { SDNode *V = N0.getNode(); + // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to @@ -5401,6 +5348,7 @@ if (V->getOpcode() == ISD::BUILD_VECTOR) { unsigned NumElems = V->getNumOperands(); + unsigned BaseIdx = cast(N)->getSplatIndex(); if (NumElems > BaseIdx) { SDValue Base; bool AllSame = true; @@ -5425,38 +5373,6 @@ } } } - - // If it is a unary or the LHS and the RHS are the same node, turn the RHS - // into an undef. - if (isUnary || N0 == N1) { - // Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the - // first operand. - SmallVector MappedOps; - - for (unsigned i = 0; i != NumElts; ++i) { - if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF || - cast(ShufMask.getOperand(i))->getZExtValue() < - NumElts) { - MappedOps.push_back(ShufMask.getOperand(i)); - } else { - unsigned NewIdx = - cast(ShufMask.getOperand(i))->getZExtValue() - - NumElts; - MappedOps.push_back(DAG.getConstant(NewIdx, - ShufMask.getOperand(i).getValueType())); - } - } - - ShufMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - ShufMask.getValueType(), - &MappedOps[0], MappedOps.size()); - AddToWorkList(ShufMask.getNode()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), - N->getValueType(0), N0, - DAG.getUNDEF(N->getValueType(0)), - ShufMask); - } - return SDValue(); } @@ -5465,52 +5381,42 @@ /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { if (RHS.getOpcode() == ISD::BIT_CONVERT) RHS = RHS.getOperand(0); if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - std::vector IdxOps; - unsigned NumOps = RHS.getNumOperands(); - unsigned NumElts = NumOps; + SmallVector Indices; + unsigned NumElts = RHS.getNumOperands(); for (unsigned i = 0; i != NumElts; ++i) { SDValue Elt = RHS.getOperand(i); if (!isa(Elt)) return SDValue(); else if (cast(Elt)->isAllOnesValue()) - IdxOps.push_back(DAG.getIntPtrConstant(i)); + Indices.push_back(i); else if (cast(Elt)->isNullValue()) - IdxOps.push_back(DAG.getIntPtrConstant(NumElts)); + Indices.push_back(NumElts); else return SDValue(); } // Let's see if the target supports this vector_shuffle. - if (!TLI.isVectorClearMaskLegal(IdxOps, TLI.getPointerTy(), DAG)) + MVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. - MVT EVT = RHS.getValueType().getVectorElementType(); - MVT VT = MVT::getVectorVT(EVT, NumElts); - MVT MaskVT = MVT::getVectorVT(TLI.getPointerTy(), NumElts); - std::vector Ops; - LHS = DAG.getNode(ISD::BIT_CONVERT, LHS.getDebugLoc(), VT, LHS); - Ops.push_back(LHS); - AddToWorkList(LHS.getNode()); - std::vector ZeroOps(NumElts, DAG.getConstant(0, EVT)); - Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VT, &ZeroOps[0], ZeroOps.size())); - Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - MaskVT, &IdxOps[0], IdxOps.size())); - SDValue Result = DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), - VT, &Ops[0], Ops.size()); - - if (VT != N->getValueType(0)) - Result = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), - N->getValueType(0), Result); - - return Result; + MVT EVT = RVT.getVectorElementType(); + SmallVector ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + RVT, &ZeroOps[0], ZeroOps.size()); + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); } } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Mon Apr 27 13:41:29 2009 @@ -267,16 +267,10 @@ bool isVolatile, SDValue ValOp, unsigned StWidth, DebugLoc dl); - /// isShuffleLegal - Return non-null if a vector shuffle is legal with the - /// specified mask and type. Targets can specify exactly which masks they - /// support and the code generator is tasked with not creating illegal masks. - /// - /// Note that this will also return true for shuffles that are promoted to a - /// different type. - /// - /// If this is a legal shuffle, this method returns the (possibly promoted) - /// build_vector Mask. If it's not a legal shuffle, it returns null. - SDNode *isShuffleLegal(MVT VT, SDValue Mask) const; + /// promoteShuffle - Promote a shuffle mask of a vector VT to perform the + /// same shuffle on a vector of NVT. Must not create an illegal shuffle mask. + SDValue promoteShuffle(MVT NVT, MVT VT, DebugLoc dl, SDValue N1, SDValue N2, + SmallVectorImpl &Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, SmallPtrSet &NodesLeadingTo); @@ -319,50 +313,35 @@ }; } -/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the -/// specified mask and type. Targets can specify exactly which masks they -/// support and the code generator is tasked with not creating illegal masks. -/// -/// Note that this will also return true for shuffles that are promoted to a -/// different type. -SDNode *SelectionDAGLegalize::isShuffleLegal(MVT VT, SDValue Mask) const { - switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) { - default: return 0; - case TargetLowering::Legal: - case TargetLowering::Custom: - break; - case TargetLowering::Promote: { - // If this is promoted to a different type, convert the shuffle mask and - // ask if it is legal in the promoted type! - MVT NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT); - MVT EltVT = NVT.getVectorElementType(); - - // If we changed # elements, change the shuffle mask. - unsigned NumEltsGrowth = - NVT.getVectorNumElements() / VT.getVectorNumElements(); - assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); - if (NumEltsGrowth > 1) { - // Renumber the elements. - SmallVector Ops; - for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) { - SDValue InOp = Mask.getOperand(i); - for (unsigned j = 0; j != NumEltsGrowth; ++j) { - if (InOp.getOpcode() == ISD::UNDEF) - Ops.push_back(DAG.getUNDEF(EltVT)); - else { - unsigned InEltNo = cast(InOp)->getZExtValue(); - Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, EltVT)); - } - } - } - Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), - NVT, &Ops[0], Ops.size()); +/// promoteShuffle - Promote a shuffle mask of a vector VT to perform the +/// same shuffle on a vector of NVT. Must not create an illegal shuffle mask. +/// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue SelectionDAGLegalize::promoteShuffle(MVT NVT, MVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl &Mask) const { + MVT EltVT = NVT.getVectorElementType(); + int NumMaskElts = VT.getVectorNumElements(); + int NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + + SmallVector NewMask; + for (int i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); } - VT = NVT; - break; - } } - return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.getNode() : 0; + assert((int)NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, @@ -1652,25 +1631,15 @@ Tmp1.getValueType(), Tmp2); unsigned NumElts = Tmp1.getValueType().getVectorNumElements(); - MVT ShufMaskVT = - MVT::getIntVectorWithNumElements(NumElts); - MVT ShufMaskEltVT = ShufMaskVT.getVectorElementType(); - // We generate a shuffle of InVec and ScVec, so the shuffle mask // should be 0,1,2,3,4,5... with the appropriate element replaced with // elt 0 of the RHS. - SmallVector ShufOps; - for (unsigned i = 0; i != NumElts; ++i) { - if (i != InsertPos->getZExtValue()) - ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT)); - else - ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT)); - } - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, ShufMaskVT, - &ShufOps[0], ShufOps.size()); - - Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Tmp1.getValueType(), - Tmp1, ScVec, ShufMask); + SmallVector ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + Result = DAG.getVectorShuffle(Tmp1.getValueType(), dl, Tmp1, ScVec, + &ShufOps[0]); Result = LegalizeOp(Result); break; } @@ -1705,16 +1674,21 @@ break; } break; - case ISD::VECTOR_SHUFFLE: + case ISD::VECTOR_SHUFFLE: { Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors, Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask. - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + MVT VT = Result.getValueType(); + + // Copy the Mask to a local SmallVector for use wi + SmallVector Mask; + cast(Result)->getMask(Mask); // Allow targets to custom lower the SHUFFLEs they support. - switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, Result.getValueType())){ + switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) { default: assert(0 && "Unknown operation action!"); case TargetLowering::Legal: - assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) && + assert(TLI.isShuffleMaskLegal(Mask, VT) && "vector shuffle should not be created if not legal!"); break; case TargetLowering::Custom: @@ -1725,26 +1699,21 @@ } // FALLTHROUGH case TargetLowering::Expand: { - MVT VT = Node->getValueType(0); MVT EltVT = VT.getVectorElementType(); - MVT PtrVT = TLI.getPointerTy(); - SDValue Mask = Node->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); + int NumElems = VT.getVectorNumElements(); SmallVector Ops; - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { + for (int i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); - } else { - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Idx = cast(Arg)->getZExtValue(); - if (Idx < NumElems) - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1, - DAG.getConstant(Idx, PtrVT))); - else - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2, - DAG.getConstant(Idx - NumElems, PtrVT))); + continue; } + int Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1, + DAG.getIntPtrConstant(Idx))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2, + DAG.getIntPtrConstant(Idx - NumElems))); } Result = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); break; @@ -1759,15 +1728,13 @@ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Tmp2); // Convert the shuffle mask to the right # elements. - Tmp3 = SDValue(isShuffleLegal(OVT, Node->getOperand(2)), 0); - assert(Tmp3.getNode() && "Shuffle not legal?"); - Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NVT, Tmp1, Tmp2, Tmp3); + Result = promoteShuffle(NVT, OVT, dl, Tmp1, Tmp2, Mask); Result = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Result); break; } } break; - + } case ISD::EXTRACT_VECTOR_ELT: Tmp1 = Node->getOperand(0); Tmp2 = LegalizeOp(Node->getOperand(1)); @@ -5490,6 +5457,7 @@ // FIXME: it would be far nicer to change this into map // and use a bitmask instead of a list of elements. + // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat. std::map > Values; Values[SplatValue].push_back(0); bool isConstant = true; @@ -5546,21 +5514,17 @@ if (SplatValue.getNode()) { // Splat of one value? // Build the shuffle constant vector: <0, 0, 0, 0> - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - SDValue Zero = DAG.getConstant(0, MaskVT.getVectorElementType()); - std::vector ZeroVec(NumElems, Zero); - SDValue SplatMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &ZeroVec[0], ZeroVec.size()); + SmallVector ZeroVec(NumElems, 0); // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. - if (isShuffleLegal(VT, SplatMask)) { + if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) { // Get the splatted value into the low element of a vector register. SDValue LowValVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue); // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LowValVec, - DAG.getUNDEF(VT), SplatMask); + return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT), + &ZeroVec[0]); } } @@ -5582,35 +5546,25 @@ std::swap(Val1, Val2); // Build the shuffle constant vector: e.g. <0, 4, 0, 4> - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT MaskEltVT = MaskVT.getVectorElementType(); - std::vector MaskVec(NumElems); + SmallVector ShuffleMask(NumElems, -1); // Set elements of the shuffle mask for Val1. std::vector &Val1Elts = Values[Val1]; for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i) - MaskVec[Val1Elts[i]] = DAG.getConstant(0, MaskEltVT); + ShuffleMask[Val1Elts[i]] = 0; // Set elements of the shuffle mask for Val2. std::vector &Val2Elts = Values[Val2]; for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i) if (Val2.getOpcode() != ISD::UNDEF) - MaskVec[Val2Elts[i]] = DAG.getConstant(NumElems, MaskEltVT); - else - MaskVec[Val2Elts[i]] = DAG.getUNDEF(MaskEltVT); - - SDValue ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + ShuffleMask[Val2Elts[i]] = NumElems; // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it. if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) && - isShuffleLegal(VT, ShuffleMask)) { + TLI.isShuffleMaskLegal(ShuffleMask, VT)) { Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1); Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2); - SDValue Ops[] = { Val1, Val2, ShuffleMask }; - - // Return shuffle(LoValVec, HiValVec, <0,1,0,1>) - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Ops, 3); + return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]); } } @@ -8066,36 +8020,19 @@ case ISD::VECTOR_SHUFFLE: { SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT); SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT); - // VECTOR_SHUFFLE 3rd operand must be a constant build vector that is - // used as permutation array. We build the vector here instead of widening - // because we don't want to legalize and have it turned to something else. - SDValue PermOp = Node->getOperand(2); - SDValueVector NewOps; - MVT PVT = PermOp.getValueType().getVectorElementType(); + ShuffleVectorSDNode *SVOp = cast(Node); + SmallVector NewMask; for (unsigned i = 0; i < NumElts; ++i) { - if (PermOp.getOperand(i).getOpcode() == ISD::UNDEF) { - NewOps.push_back(PermOp.getOperand(i)); - } else { - unsigned Idx = - cast(PermOp.getOperand(i))->getZExtValue(); - if (Idx < NumElts) { - NewOps.push_back(PermOp.getOperand(i)); - } - else { - NewOps.push_back(DAG.getConstant(Idx + NewNumElts - NumElts, - PermOp.getOperand(i).getValueType())); - } - } - } - for (unsigned i = NumElts; i < NewNumElts; ++i) { - NewOps.push_back(DAG.getUNDEF(PVT)); + int Idx = SVOp->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx + NewNumElts - NumElts); } - - SDValue Tmp3 = DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(PVT, NewOps.size()), - &NewOps[0], NewOps.size()); - - Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, Tmp1, Tmp2, Tmp3); + for (unsigned i = NumElts; i < NewNumElts; ++i) + NewMask.push_back(-1); + + Result = DAG.getVectorShuffle(WidenVT, dl, Tmp1, Tmp2, &NewMask[0]); break; } case ISD::LOAD: { Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Mon Apr 27 13:41:29 2009 @@ -761,6 +761,7 @@ SDValue &Hi) { // The low and high parts of the original input give four input vectors. SDValue Inputs[4]; + ShuffleVectorSDNode *SVN = cast(N); DebugLoc dl = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); @@ -772,10 +773,7 @@ // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. - SDValue Mask = N->getOperand(2); - MVT IdxVT = Mask.getValueType().getVectorElementType(); - SmallVector Ops; - Ops.reserve(NewElts); + SmallVector Ops; for (unsigned High = 0; High < 2; ++High) { SDValue &Output = High ? Hi : Lo; @@ -787,18 +785,15 @@ unsigned FirstMaskIdx = High * NewElts; bool useBuildVector = false; for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset); - // The mask element. This indexes into the input. - unsigned Idx = Arg.getOpcode() == ISD::UNDEF ? - -1U : cast(Arg)->getZExtValue(); + int Idx = SVN->getMaskElt(FirstMaskIdx + MaskOffset); // The input vector this mask element indexes into. - unsigned Input = Idx / NewElts; + unsigned Input = (unsigned)Idx / NewElts; if (Input >= array_lengthof(Inputs)) { // The mask element does not index into any input vector. - Ops.push_back(DAG.getUNDEF(IdxVT)); + Ops.push_back(-1); continue; } @@ -826,27 +821,24 @@ } // Add the mask index for the new shuffle vector. - Ops.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT)); + Ops.push_back(Idx + OpNo * NewElts); } if (useBuildVector) { MVT EltVT = NewVT.getVectorElementType(); - Ops.clear(); + SmallVector SVOps; // Extract the input elements by hand. for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset); - // The mask element. This indexes into the input. - unsigned Idx = Arg.getOpcode() == ISD::UNDEF ? - -1U : cast(Arg)->getZExtValue(); + int Idx = SVN->getMaskElt(FirstMaskIdx + MaskOffset); // The input vector this mask element indexes into. - unsigned Input = Idx / NewElts; + unsigned Input = (unsigned)Idx / NewElts; if (Input >= array_lengthof(Inputs)) { // The mask element is "undef" or indexes off the end of the input. - Ops.push_back(DAG.getUNDEF(EltVT)); + SVOps.push_back(DAG.getUNDEF(EltVT)); continue; } @@ -854,25 +846,22 @@ Idx -= Input * NewElts; // Extract the vector element by hand. - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getIntPtrConstant(Idx))); + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getIntPtrConstant(Idx))); } // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, &Ops[0], Ops.size()); + Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = DAG.getUNDEF(NewVT); } else { - // At least one input vector was used. Create a new shuffle vector. - SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(IdxVT, Ops.size()), - &Ops[0], Ops.size()); SDValue Op0 = Inputs[InputUsed[0]]; // If only one input was used, use an undefined vector for the other. SDValue Op1 = InputUsed[1] == -1U ? DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; - Output = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, Op0, Op1, NewMask); + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); } Ops.clear(); @@ -1473,18 +1462,15 @@ if (NumOperands == 2) { // Replace concat of two operands with a shuffle. - MVT PtrVT = TLI.getPointerTy(); - SmallVector MaskOps(WidenNumElts); + SmallVector MaskOps(WidenNumElts); for (unsigned i=0; i < WidenNumElts/2; ++i) { - MaskOps[i] = DAG.getConstant(i, PtrVT); - MaskOps[i+WidenNumElts/2] = DAG.getConstant(i+WidenNumElts, PtrVT); + MaskOps[i] = i; + MaskOps[i+WidenNumElts/2] = i+WidenNumElts; } - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(PtrVT, WidenNumElts), - &MaskOps[0], WidenNumElts); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, - GetWidenedVector(N->getOperand(0)), - GetWidenedVector(N->getOperand(1)), Mask); + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); } } } @@ -1761,8 +1747,9 @@ } SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) { + ShuffleVectorSDNode *SVN = cast(N); MVT VT = N->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); + int NumElts = VT.getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); MVT WidenVT = TLI.getTypeToTransformTo(VT); @@ -1772,28 +1759,17 @@ SDValue InOp2 = GetWidenedVector(N->getOperand(1)); // Adjust mask based on new input vector length. - SDValue Mask = N->getOperand(2); - SmallVector MaskOps(WidenNumElts); - MVT IdxVT = Mask.getValueType().getVectorElementType(); - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) - MaskOps[i] = Arg; - else { - unsigned Idx = cast(Arg)->getZExtValue(); - if (Idx < NumElts) - MaskOps[i] = Arg; - else - MaskOps[i] = DAG.getConstant(Idx - NumElts + WidenNumElts, IdxVT); - } + SmallVector NewMask; + for (int i = 0; i < NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx < NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); } for (unsigned i = NumElts; i < WidenNumElts; ++i) - MaskOps[i] = DAG.getUNDEF(IdxVT); - SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(IdxVT, WidenNumElts), - &MaskOps[0], WidenNumElts); - - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, InOp1, InOp2, NewMask); + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); } SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Apr 27 13:41:29 2009 @@ -456,6 +456,13 @@ ID.AddInteger(AT->getRawSubclassData()); break; } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } } // end switch (N->getOpcode()) } @@ -765,9 +772,9 @@ MVT EltVT = N->getValueType(0).getVectorElementType(); for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) assert((I->getValueType() == EltVT || - (EltVT.isInteger() && I->getValueType().isInteger() && - EltVT.bitsLE(I->getValueType()))) && - "Wrong operand type!"); + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); break; } } @@ -1126,6 +1133,110 @@ return SDValue(CondCodeNodes[Cond], 0); } +static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl &M) { + std::swap(N1, N2); + int NElts = M.size(); + for (int i = 0; i != NElts; ++i) { + if (M[i] >= NElts) + M[i] -= NElts; + else if (M[i] >= 0) + M[i] += NElts; + } +} + +SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, + SDValue N2, const int *Mask) { + assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); + assert(VT.isVector() && N1.getValueType().isVector() && + "Vector Shuffle VTs must be a vectors"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() + && "Vector Shuffle VTs must have same element type"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + return N1; + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + int NElts = VT.getVectorNumElements(); + SmallVector MaskVec; + for (int i = 0; i != NElts; ++i) { + if (Mask[i] >= (NElts * 2)) { + assert(0 && "Index out of range"); + return SDValue(); + } + MaskVec.push_back(Mask[i]); + } + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (int i = 0; i != NElts; ++i) + if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.getOpcode() == ISD::UNDEF) + commuteShuffle(N1, N2, MaskVec); + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.getOpcode() == ISD::UNDEF; + for (int i = 0; i != NElts; ++i) { + if (MaskVec[i] >= NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + + // If Identity shuffle, or all shuffle in to undef, return that node. + bool AllUndef = true; + bool Identity = true; + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; + if (MaskVec[i] >= 0) AllUndef = false; + } + if (Identity) + return N1; + if (AllUndef) + return getUNDEF(VT); + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + for (int i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate(NElts); + memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + + ShuffleVectorSDNode *N = NodeAllocator.Allocate(); + new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, @@ -2087,19 +2198,18 @@ SDValue SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) { MVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - SDValue PermMask = N->getOperand(2); - SDValue Idx = PermMask.getOperand(i); - if (Idx.getOpcode() == ISD::UNDEF) + const ShuffleVectorSDNode *SVN = cast(N); + int Index = SVN->getMaskElt(i); + if (Index < 0) return getUNDEF(VT.getVectorElementType()); - unsigned Index = cast(Idx)->getZExtValue(); - unsigned NumElems = PermMask.getNumOperands(); + int NumElems = VT.getVectorNumElements(); SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); Index %= NumElems; if (V.getOpcode() == ISD::BIT_CONVERT) { V = V.getOperand(0); MVT VVT = V.getValueType(); - if (!VVT.isVector() || VVT.getVectorNumElements() != NumElems) + if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) return SDValue(); } if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) @@ -2794,12 +2904,7 @@ } break; case ISD::VECTOR_SHUFFLE: - assert(N1.getValueType() == N2.getValueType() && - N1.getValueType().isVector() && - VT.isVector() && N3.getValueType().isVector() && - N3.getOpcode() == ISD::BUILD_VECTOR && - VT.getVectorNumElements() == N3.getNumOperands() && - "Illegal VECTOR_SHUFFLE node!"); + assert(0 && "should use getVectorShuffle constructor!"); break; case ISD::BIT_CONVERT: // Fold bit_convert nodes from a type to themselves. @@ -5323,14 +5428,15 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { - SDNode *Mask = getOperand(2).getNode(); + const ShuffleVectorSDNode *SVN = cast(this); OS << "<"; - for (unsigned i = 0, e = Mask->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); if (i) OS << ","; - if (Mask->getOperand(i).getOpcode() == ISD::UNDEF) + if (Idx < 0) OS << "u"; else - OS << cast(Mask->getOperand(i))->getZExtValue(); + OS << Idx; } OS << ">"; } @@ -5611,3 +5717,13 @@ SplatBitSize = sz; return true; } + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) { + int Idx = -1; + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + if (Idx < 0) Idx = Mask[i]; + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + } + return true; +} Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp Mon Apr 27 13:41:29 2009 @@ -870,8 +870,7 @@ if (ConstantFP *CFP = dyn_cast(C)) return N = DAG.getConstantFP(*CFP, VT); - if (isa(C) && !isa(V->getType()) && - !V->getType()->isAggregateType()) + if (isa(C) && !V->getType()->isAggregateType()) return N = DAG.getUNDEF(VT); if (ConstantExpr *CE = dyn_cast(C)) { @@ -925,14 +924,11 @@ for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CP->getOperand(i))); } else { - assert((isa(C) || isa(C)) && - "Unknown vector constant!"); + assert(isa(C) && "Unknown vector constant!"); MVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; - if (isa(C)) - Op = DAG.getUNDEF(EltVT); - else if (EltVT.isFloatingPoint()) + if (EltVT.isFloatingPoint()) Op = DAG.getConstantFP(0, EltVT); else Op = DAG.getConstant(0, EltVT); @@ -2435,37 +2431,42 @@ // Utility for visitShuffleVector - Returns true if the mask is mask starting // from SIndx and increasing to the element length (undefs are allowed). -static bool SequentialMask(SDValue Mask, unsigned SIndx) { - unsigned MaskNumElts = Mask.getNumOperands(); - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) { - unsigned Idx = cast(Mask.getOperand(i))->getZExtValue(); - if (Idx != i + SIndx) - return false; - } - } +static bool SequentialMask(SmallVectorImpl &Mask, int SIndx) { + int MaskNumElts = Mask.size(); + for (int i = 0; i != MaskNumElts; ++i) + if ((Mask[i] >= 0) && (Mask[i] != i + SIndx)) + return false; return true; } void SelectionDAGLowering::visitShuffleVector(User &I) { + SmallVector Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - SDValue Mask = getValue(I.getOperand(2)); + // Convert the ConstantVector mask operand into an array of ints, with -1 + // representing undef values. + SmallVector MaskElts; + cast(I.getOperand(2))->getVectorElements(MaskElts); + int MaskNumElts = MaskElts.size(); + for (int i = 0; i != MaskNumElts; ++i) { + if (isa(MaskElts[i])) + Mask.push_back(-1); + else + Mask.push_back(cast(MaskElts[i])->getSExtValue()); + } + MVT VT = TLI.getValueType(I.getType()); MVT SrcVT = Src1.getValueType(); - int MaskNumElts = Mask.getNumOperands(); int SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(), - VT, Src1, Src2, Mask)); + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); return; } // Normalize the shuffle vector since mask and vector length don't match. - MVT MaskEltVT = Mask.getValueType().getVectorElementType(); - if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors @@ -2479,44 +2480,33 @@ // Pad both vectors with undefs to make them the same length as the mask. unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; SDValue UndefVal = DAG.getUNDEF(SrcVT); - SDValue* MOps1 = new SDValue[NumConcat]; - SDValue* MOps2 = new SDValue[NumConcat]; + SmallVector MOps1(NumConcat, UndefVal); + SmallVector MOps2(NumConcat, UndefVal); MOps1[0] = Src1; MOps2[0] = Src2; - for (unsigned i = 1; i != NumConcat; ++i) { - MOps1[i] = UndefVal; - MOps2[i] = UndefVal; - } - Src1 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, MOps1, NumConcat); - Src2 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, MOps2, NumConcat); - - delete [] MOps1; - delete [] MOps2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps1[0], NumConcat); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps2[0], NumConcat); // Readjust mask for new input vector length. - SmallVector MappedOps; + SmallVector MappedOps; for (int i = 0; i != MaskNumElts; ++i) { - if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) { - MappedOps.push_back(Mask.getOperand(i)); - } else { - int Idx = cast(Mask.getOperand(i))->getZExtValue(); - if (Idx < SrcNumElts) - MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT)); - else - MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts, - MaskEltVT)); - } + int Idx = Mask[i]; + if (Idx < SrcNumElts) + MappedOps.push_back(Idx); + else + MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); } - Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), - Mask.getValueType(), - &MappedOps[0], MappedOps.size()); - - setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(), - VT, Src1, Src2, Mask)); + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); return; } @@ -2541,20 +2531,19 @@ int MaxRange[2] = {-1, -1}; for (int i = 0; i != MaskNumElts; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - int Idx = cast(Arg)->getZExtValue(); - int Input = 0; - if (Idx >= SrcNumElts) { - Input = 1; - Idx -= SrcNumElts; - } - if (Idx > MaxRange[Input]) - MaxRange[Input] = Idx; - if (Idx < MinRange[Input]) - MinRange[Input] = Idx; - } + int Idx = Mask[i]; + int Input = 0; + if (Idx < 0) + continue; + + if (Idx >= SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; } // Check if the access is smaller than the vector size and can we find @@ -2596,26 +2585,18 @@ } } // Calculate new mask. - SmallVector MappedOps; + SmallVector MappedOps; for (int i = 0; i != MaskNumElts; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MappedOps.push_back(Arg); - } else { - int Idx = cast(Arg)->getZExtValue(); - if (Idx < SrcNumElts) - MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT)); - else { - Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts; - MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT)); - } - } + int Idx = Mask[i]; + if (Idx < 0) + MappedOps.push_back(Idx); + else if (Idx < SrcNumElts) + MappedOps.push_back(Idx - StartIdx[0]); + else + MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); } - Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), - Mask.getValueType(), - &MappedOps[0], MappedOps.size()); - setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(), - VT, Src1, Src2, Mask)); + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); return; } } @@ -2627,12 +2608,10 @@ MVT PtrVT = TLI.getPointerTy(); SmallVector Ops; for (int i = 0; i != MaskNumElts; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { + if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); } else { - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - int Idx = cast(Arg)->getZExtValue(); + int Idx = Mask[i]; if (Idx < SrcNumElts) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), EltVT, Src1, DAG.getConstant(Idx, PtrVT))); Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp Mon Apr 27 13:41:29 2009 @@ -1670,9 +1670,9 @@ /// \note /// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); if (V2.getOpcode() == ISD::UNDEF) V2 = V1; @@ -1703,39 +1703,40 @@ } else assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); - for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) { - if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) { - unsigned SrcElt = cast (PermMask.getOperand(i))->getZExtValue(); - - if (monotonic) { - if (SrcElt >= V2EltIdx0) { - if (1 >= (++EltsFromV2)) { - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } - } else if (CurrElt != SrcElt) { - monotonic = false; + for (unsigned i = 0; i != MaxElts; ++i) { + if (SVN->getMaskElt(i) < 0) + continue; + + unsigned SrcElt = SVN->getMaskElt(i); + + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; } - - ++CurrElt; + } else if (CurrElt != SrcElt) { + monotonic = false; } - if (rotate) { - if (PrevElt > 0 && SrcElt < MaxElts) { - if ((PrevElt == SrcElt - 1) - || (PrevElt == MaxElts - 1 && SrcElt == 0)) { - PrevElt = SrcElt; - if (SrcElt == 0) - V0Elt = i; - } else { - rotate = false; - } - } else if (PrevElt == 0) { - // First time through, need to keep track of previous element + ++CurrElt; + } + + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; } else { - // This isn't a rotation, takes elements from vector 2 rotate = false; } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; } } } @@ -1768,17 +1769,11 @@ unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); - - for (unsigned j = 0; j < BytesPerElement; ++j) { - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, - MVT::i8)); - } + for (unsigned i = 0, e = MaxElts; i != e; ++i) { + unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); + + for (unsigned j = 0; j < BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Apr 27 13:41:29 2009 @@ -456,22 +456,21 @@ /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. -static bool isConstantOrUndef(SDValue Op, unsigned Val) { - return Op.getOpcode() == ISD::UNDEF || - cast(Op)->getZExtValue() == Val; +static bool isConstantOrUndef(int Op, int Val) { + return Op < 0 || Op == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { if (!isUnary) { for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; } else { for (unsigned i = 0; i != 8; ++i) - if (!isConstantOrUndef(N->getOperand(i), i*2+1) || - !isConstantOrUndef(N->getOperand(i+8), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) return false; } return true; @@ -479,18 +478,18 @@ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { if (!isUnary) { for (unsigned i = 0; i != 16; i += 2) - if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || - !isConstantOrUndef(N->getOperand(i+1), i*2+3)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; } else { for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || - !isConstantOrUndef(N->getOperand(i+1), i*2+3) || - !isConstantOrUndef(N->getOperand(i+8), i*2+2) || - !isConstantOrUndef(N->getOperand(i+9), i*2+3)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) return false; } return true; @@ -498,27 +497,28 @@ /// isVMerge - Common function, used to match vmrg* shuffles. /// -static bool isVMerge(SDNode *N, unsigned UnitSize, +static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit - if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), + if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), LHSStart+j+i*UnitSize) || - !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), + !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), RHSStart+j+i*UnitSize)) return false; } - return true; + return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { +bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 8, 24); return isVMerge(N, UnitSize, 8, 8); @@ -526,7 +526,8 @@ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { +bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 0, 16); return isVMerge(N, UnitSize, 0, 0); @@ -536,91 +537,90 @@ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); + + ShuffleVectorSDNode *SVOp = cast(N); + // Find the first non-undef value in the shuffle mask. unsigned i; - for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) + for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 16) return -1; // all undef. - // Otherwise, check to see if the rest of the elements are consequtively + // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. - unsigned ShiftAmt = cast(N->getOperand(i))->getZExtValue(); + unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; if (!isUnary) { - // Check the rest of the elements to see if they are consequtive. + // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; } else { - // Check the rest of the elements to see if they are consequtive. + // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) + if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; } - return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. -bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && +bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { + assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. - unsigned ElementBase = 0; - SDValue Elt = N->getOperand(0); - if (ConstantSDNode *EltV = dyn_cast(Elt)) - ElementBase = EltV->getZExtValue(); - else - return false; // FIXME: Handle UNDEF elements too! - - if (cast(Elt)->getZExtValue() >= 16) + unsigned ElementBase = N->getMaskElt(0); + + // FIXME: Handle UNDEF elements too! + if (ElementBase >= 16) return false; - // Check that they are consequtive. - for (unsigned i = 1; i != EltSize; ++i) { - if (!isa(N->getOperand(i)) || - cast(N->getOperand(i))->getZExtValue() != i+ElementBase) + // Check that the indices are consecutive, in the case of a multi-byte element + // splatted with a v16i8 mask. + for (unsigned i = 1; i != EltSize; ++i) + if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) return false; - } - assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - assert(isa(N->getOperand(i)) && - "Invalid VECTOR_SHUFFLE mask!"); + if (N->getMaskElt(i) < 0) continue; for (unsigned j = 0; j != EltSize; ++j) - if (N->getOperand(i+j) != N->getOperand(j)) + if (N->getMaskElt(i+j) != N->getMaskElt(j)) return false; } - return true; } /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool PPC::isAllNegativeZeroVector(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - if (PPC::isSplatShuffleMask(N, N->getNumOperands())) - if (ConstantFPSDNode *CFP = dyn_cast(N)) + BuildVectorSDNode *BV = cast(N); + + APInt APVal, APUndef; + unsigned BitSize; + bool HasAnyUndefs; + + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) + if (ConstantFPSDNode *CFP = dyn_cast(N->getOperand(0))) return CFP->getValueAPF().isNegZero(); + return false; } /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { - assert(isSplatShuffleMask(N, EltSize)); - return cast(N->getOperand(0))->getZExtValue() / EltSize; + ShuffleVectorSDNode *SVOp = cast(N); + assert(isSplatShuffleMask(SVOp, EltSize)); + return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed @@ -3149,11 +3149,10 @@ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); - SDValue Ops[16]; + int Ops[16]; for (unsigned i = 0; i != 16; ++i) - Ops[i] = DAG.getConstant(i+Amt, MVT::i8); - SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops,16)); + Ops[i] = i + Amt; + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); } @@ -3354,7 +3353,7 @@ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); - unsigned ShufIdxs[16]; + int ShufIdxs[16]; switch (OpNum) { default: assert(0 && "Unknown i32 permute!"); case OP_VMRGHW: @@ -3392,13 +3391,11 @@ case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } - SDValue Ops[16]; - for (unsigned i = 0; i != 16; ++i) - Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8); - - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(), - OpLHS, OpRHS, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16)); + MVT VT = OpLHS.getValueType(); + OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS); + OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS); + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this @@ -3406,28 +3403,29 @@ /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); + ShuffleVectorSDNode *SVOp = cast(Op); + MVT VT = Op.getValueType(); // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.getOpcode() == ISD::UNDEF) { - if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) || - PPC::isSplatShuffleMask(PermMask.getNode(), 2) || - PPC::isSplatShuffleMask(PermMask.getNode(), 4) || - PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) || - PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) || - PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) { + if (PPC::isSplatShuffleMask(SVOp, 1) || + PPC::isSplatShuffleMask(SVOp, 2) || + PPC::isSplatShuffleMask(SVOp, 4) || + PPC::isVPKUWUMShuffleMask(SVOp, true) || + PPC::isVPKUHUMShuffleMask(SVOp, true) || + PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, true) || + PPC::isVMRGLShuffleMask(SVOp, 2, true) || + PPC::isVMRGLShuffleMask(SVOp, 4, true) || + PPC::isVMRGHShuffleMask(SVOp, 1, true) || + PPC::isVMRGHShuffleMask(SVOp, 2, true) || + PPC::isVMRGHShuffleMask(SVOp, 4, true)) { return Op; } } @@ -3435,29 +3433,31 @@ // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. - if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) || - PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) || - PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false)) + if (PPC::isVPKUWUMShuffleMask(SVOp, false) || + PPC::isVPKUHUMShuffleMask(SVOp, false) || + PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, false) || + PPC::isVMRGLShuffleMask(SVOp, 2, false) || + PPC::isVMRGLShuffleMask(SVOp, 4, false) || + PPC::isVMRGHShuffleMask(SVOp, 1, false) || + PPC::isVMRGHShuffleMask(SVOp, 2, false) || + PPC::isVMRGHShuffleMask(SVOp, 4, false)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. + SmallVector PermMask; + SVOp->getMask(PermMask); + unsigned PFIndexes[4]; bool isFourElementShuffle = true; for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. - if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) + if (PermMask[i*4+j] < 0) continue; // Undef, ignore it. - unsigned ByteSource = - cast(PermMask.getOperand(i*4+j))->getZExtValue(); + unsigned ByteSource = PermMask[i*4+j]; if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; @@ -3509,12 +3509,8 @@ unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, @@ -3704,13 +3700,12 @@ OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts); // Merge the results together. - SDValue Ops[16]; + int Ops[16]; for (unsigned i = 0; i != 8; ++i) { - Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); - Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); + Ops[i*2 ] = 2*i+1; + Ops[i*2+1] = 2*i+1+16; } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16)); + return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { assert(0 && "Unknown mul to lower!"); abort(); Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original) +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Mon Apr 27 13:41:29 2009 @@ -175,19 +175,21 @@ namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. - bool isVPKUHUMShuffleMask(SDNode *N, bool isUnary); + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. - bool isVPKUWUMShuffleMask(SDNode *N, bool isUnary); + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). - bool isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); + bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). - bool isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); + bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. @@ -196,7 +198,7 @@ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. - bool isSplatShuffleMask(SDNode *N, unsigned EltSize); + bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td (original) +++ llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td Mon Apr 27 13:41:29 2009 @@ -15,96 +15,118 @@ // Altivec transformation functions and pattern fragments. // -/// VPKUHUM_shuffle_mask/VPKUWUM_shuffle_mask - Return true if this is a valid -/// shuffle mask for the VPKUHUM or VPKUWUM instructions. -def VPKUHUM_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUHUMShuffleMask(N, false); + +def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast(N), false); }]>; -def VPKUWUM_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUWUMShuffleMask(N, false); +def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast(N), false); }]>; - -def VPKUHUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUHUMShuffleMask(N, true); +def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast(N), true); }]>; -def VPKUWUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVPKUWUMShuffleMask(N, true); +def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast(N), true); }]>; -def VMRGLB_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 1, false); +def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 1, false); }]>; -def VMRGLH_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 2, false); +def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 2, false); }]>; -def VMRGLW_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 4, false); +def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 4, false); }]>; -def VMRGHB_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 1, false); +def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 1, false); }]>; -def VMRGHH_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 2, false); +def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 2, false); }]>; -def VMRGHW_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 4, false); +def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 4, false); }]>; -def VMRGLB_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 1, true); + +def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 1, true); }]>; -def VMRGLH_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 2, true); +def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 2, true); }]>; -def VMRGLW_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGLShuffleMask(N, 4, true); +def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast(N), 4, true); }]>; -def VMRGHB_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 1, true); +def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 1, true); }]>; -def VMRGHH_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 2, true); +def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 2, true); }]>; -def VMRGHW_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isVMRGHShuffleMask(N, 4, true); +def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast(N), 4, true); }]>; -def VSLDOI_get_imm : SDNodeXForm; -def VSLDOI_shuffle_mask : PatLeaf<(build_vector), [{ +def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVSLDOIShuffleMask(N, false) != -1; }], VSLDOI_get_imm>; + /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// vector_shuffle(X,undef,mask) by the dag combiner. -def VSLDOI_unary_get_imm : SDNodeXForm; -def VSLDOI_unary_shuffle_mask : PatLeaf<(build_vector), [{ +def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVSLDOIShuffleMask(N, true) != -1; }], VSLDOI_unary_get_imm>; // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. -def VSPLTB_get_imm : SDNodeXForm; -def VSPLTB_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isSplatShuffleMask(N, 1); +def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast(N), 1); }], VSPLTB_get_imm>; -def VSPLTH_get_imm : SDNodeXForm; -def VSPLTH_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isSplatShuffleMask(N, 2); +def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast(N), 2); }], VSPLTH_get_imm>; -def VSPLTW_get_imm : SDNodeXForm; -def VSPLTW_shuffle_mask : PatLeaf<(build_vector), [{ - return PPC::isSplatShuffleMask(N, 4); +def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast(N), 4); }], VSPLTW_get_imm>; @@ -268,8 +290,7 @@ def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), "vsldoi $vD, $vA, $vB, $SH", VecFP, [(set VRRC:$vD, - (vector_shuffle (v16i8 VRRC:$vA), VRRC:$vB, - VSLDOI_shuffle_mask:$SH))]>; + (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>; // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), @@ -345,28 +366,22 @@ def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGHB_shuffle_mask))]>; + [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGHH_shuffle_mask))]>; + [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGHW_shuffle_mask))]>; + [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGLB_shuffle_mask))]>; + [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGLH_shuffle_mask))]>; + [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VMRGLW_shuffle_mask))]>; + [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; @@ -440,16 +455,16 @@ def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltb $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), - VSPLTB_shuffle_mask:$UIMM))]>; + [(set VRRC:$vD, + (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vsplth $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), - VSPLTH_shuffle_mask:$UIMM))]>; + [(set VRRC:$vD, + (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltw $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), - VSPLTW_shuffle_mask:$UIMM))]>; + [(set VRRC:$vD, + (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; @@ -479,13 +494,13 @@ def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuhum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VPKUHUM_shuffle_mask))]>; + [(set VRRC:$vD, + (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuwum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), - VRRC:$vB, VPKUWUM_shuffle_mask))]>; + [(set VRRC:$vD, + (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; // Vector Unpack. @@ -603,25 +618,25 @@ // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VSLDOI_unary_shuffle_mask:$in), - (VSLDOI VRRC:$vA, VRRC:$vA, VSLDOI_unary_shuffle_mask:$in)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUWUM_unary_shuffle_mask:$in), +def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), + (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; +def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), (VPKUWUM VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUHUM_unary_shuffle_mask:$in), +def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), (VPKUHUM VRRC:$vA, VRRC:$vA)>; // Match vmrg*(x,x) -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLB_unary_shuffle_mask:$in), +def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGLB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLH_unary_shuffle_mask:$in), +def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGLH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLW_unary_shuffle_mask:$in), +def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGLW VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHB_unary_shuffle_mask:$in), +def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGHB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHH_unary_shuffle_mask:$in), +def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGHH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in), +def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGHW VRRC:$vA, VRRC:$vA)>; // Logical Operations Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 27 13:41:29 2009 @@ -45,7 +45,8 @@ DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); // Forward declarations. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2); X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM) { @@ -1667,9 +1668,7 @@ // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getUNDEF(MVT::v2i64), Arg, - getMOVLMask(2, DAG, dl)); + Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); break; } } @@ -2138,186 +2137,164 @@ } } -/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value falls within the specified range (L, H]. -static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { - if (Op.getOpcode() == ISD::UNDEF) - return true; - - unsigned Val = cast(Op)->getZExtValue(); - return (Val >= Low && Val < Hi); +/// isUndefOrInRange - Return true if Val is undef or if its value falls within +/// the specified range (L, H]. +static bool isUndefOrInRange(int Val, int Low, int Hi) { + return (Val < 0) || (Val >= Low && Val < Hi); } -/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return -/// true if Op is undef or if its value equal to the specified value. -static bool isUndefOrEqual(SDValue Op, unsigned Val) { - if (Op.getOpcode() == ISD::UNDEF) +/// isUndefOrEqual - Val is either less than zero (undef) or equal to the +/// specified value. +static bool isUndefOrEqual(int Val, int CmpVal) { + if (Val < 0 || Val == CmpVal) return true; - return cast(Op)->getZExtValue() == Val; + return false; } -/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFD. -bool X86::isPSHUFDMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 2 && N->getNumOperands() != 4) - return false; - - // Check if the value doesn't reference the second vector. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(Arg)->getZExtValue() >= e) - return false; - } - - return true; +/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference +/// the second operand. +static bool isPSHUFDMask(SmallVectorImpl &Mask, MVT VT) { + if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return (Mask[0] < 2 && Mask[1] < 2); + return false; } -/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFHW. -bool X86::isPSHUFHWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFDMask(M, N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFHW. +static bool isPSHUFHWMask(SmallVectorImpl &Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - - // Lower quadword copied in order. - for (unsigned i = 0; i != 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(Arg)->getZExtValue() != i) + + // Lower quadword copied in order or undef. + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - } - + // Upper quadword shuffled. - for (unsigned i = 4; i != 8; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < 4 || Val > 7) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) return false; - } - + return true; } -/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to PSHUFLW. -bool X86::isPSHUFLWMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFHWMask(M, N->getValueType(0)); +} - if (N->getNumOperands() != 8) +/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PSHUFLW. +static bool isPSHUFLWMask(SmallVectorImpl &Mask, MVT VT) { + if (VT != MVT::v8i16) return false; - + // Upper quadword copied in order. - for (unsigned i = 4; i != 8; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + for (int i = 4; i != 8; ++i) + if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Lower quadword shuffled. - for (unsigned i = 0; i != 4; ++i) - if (!isUndefOrInRange(N->getOperand(i), 0, 4)) + for (int i = 0; i != 4; ++i) + if (Mask[i] >= 4) return false; - + return true; } +bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isPSHUFLWMask(M, N->getValueType(0)); +} + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -template -static bool isSHUFPMask(SDOperand *Elems, unsigned NumElems) { - if (NumElems != 2 && NumElems != 4) return false; - - unsigned Half = NumElems / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Elems[i], 0, NumElems)) +static bool isSHUFPMask(SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; - for (unsigned i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - + return true; } -bool X86::isSHUFPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); +bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isSHUFPMask(M, N->getValueType(0)); } /// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -template -static bool isCommutedSHUFP(SDOperand *Ops, unsigned NumOps) { - if (NumOps != 2 && NumOps != 4) return false; - - unsigned Half = NumOps / 2; - for (unsigned i = 0; i < Half; ++i) - if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) +static bool isCommutedSHUFPMask(SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + + if (NumElems != 2 && NumElems != 4) + return false; + + int Half = NumElems / 2; + for (int i = 0; i < Half; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - for (unsigned i = Half; i < NumOps; ++i) - if (!isUndefOrInRange(Ops[i], 0, NumOps)) + for (int i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], 0, NumElems)) return false; return true; } -static bool isCommutedSHUFP(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); +static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return isCommutedSHUFPMask(M, N->getValueType(0)); } /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -bool X86::isMOVHLPSMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 6) && - isUndefOrEqual(N->getOperand(1), 7) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); -} - -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) - return false; - - // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getOperand(0), 2) && - isUndefOrEqual(N->getOperand(1), 3) && - isUndefOrEqual(N->getOperand(2), 2) && - isUndefOrEqual(N->getOperand(3), 3); + return isUndefOrEqual(N->getMaskElt(0), 6) && + isUndefOrEqual(N->getMaskElt(1), 7) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -bool X86::isMOVLPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) + if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems)) return false; for (unsigned i = NumElems/2; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; return true; @@ -2326,37 +2303,49 @@ /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// and MOVLHPS. -bool X86::isMOVHPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); +bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); - unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) + if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; - for (unsigned i = 0; i < NumElems/2; ++i) { - SDValue Arg = N->getOperand(i + NumElems/2); - if (!isUndefOrEqual(Arg, i + NumElems)) + for (unsigned i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems)) return false; - } return true; } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + return isUndefOrEqual(N->getMaskElt(0), 2) && + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -template -bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, +static bool isUNPCKLMask(SmallVectorImpl &Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (V2IsSplat) { @@ -2367,26 +2356,26 @@ return false; } } - return true; } -bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + SmallVector M; + N->getMask(M); + return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -template -bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, +static bool isUNPCKHMask(SmallVectorImpl &Mask, MVT VT, bool V2IsSplat = false) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { - SDValue BitI = Elts[i]; - SDValue BitI1 = Elts[i+1]; + + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j + NumElts/2)) return false; if (V2IsSplat) { @@ -2397,270 +2386,176 @@ return false; } } - return true; } -bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { + SmallVector M; + N->getMask(M); + return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKL_v_undef_Mask(SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i+1); - + + for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0)); +} + /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned NumElems = N->getNumOperands(); +static bool isUNPCKH_v_undef_Mask(SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { - SDValue BitI = N->getOperand(i); - SDValue BitI1 = N->getOperand(i + 1); - + + for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; } +bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0)); +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -template -static bool isMOVLMask(SDOperand *Elts, unsigned NumElts) { +static bool isMOVLMask(SmallVectorImpl &Mask, MVT VT) { + int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4) return false; - - if (!isUndefOrEqual(Elts[0], NumElts)) + + if (!isUndefOrEqual(Mask[0], NumElts)) return false; - - for (unsigned i = 1; i < NumElts; ++i) { - if (!isUndefOrEqual(Elts[i], i)) + + for (int i = 1; i < NumElts; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - } - + return true; } -bool X86::isMOVLMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return ::isMOVLMask(N->op_begin(), N->getNumOperands()); +bool X86::isMOVLMask(ShuffleVectorSDNode *N) { + SmallVector M; + N->getMask(M); + return ::isMOVLMask(M, N->getValueType(0)); } /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -template -static bool isCommutedMOVL(SDOperand *Ops, unsigned NumOps, - bool V2IsSplat = false, - bool V2IsUndef = false) { +static bool isCommutedMOVLMask(SmallVectorImpl &Mask, MVT VT, + bool V2IsSplat = false, bool V2IsUndef = false) { + int NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - - if (!isUndefOrEqual(Ops[0], 0)) + + if (!isUndefOrEqual(Mask[0], 0)) return false; - - for (unsigned i = 1; i < NumOps; ++i) { - SDValue Arg = Ops[i]; - if (!(isUndefOrEqual(Arg, i+NumOps) || - (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || - (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) + + for (int i = 1; i < NumOps; ++i) + if (!(isUndefOrEqual(Mask[i], i+NumOps) || + (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || + (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) return false; - } - + return true; } -static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, +static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, bool V2IsUndef = false) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - return isCommutedMOVL(N->op_begin(), N->getNumOperands(), - V2IsSplat, V2IsUndef); + SmallVector M; + N->getMask(M); + return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef); } /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 1, 1, 3, 3 for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 1) return false; + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 1) + return false; } bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 3) return false; - HasHi = true; + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 3) + return false; + if (Elt == 3) + HasHi = true; } - // Don't use movshdup if it can be done with a shufps. + // FIXME: verify that matching u, u, 3, 3 is what we want. return HasHi; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 4) +bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { + if (N->getValueType(0).getVectorNumElements() != 4) return false; // Expect 0, 0, 2, 2 - for (unsigned i = 0; i < 2; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 0) return false; - } + for (unsigned i = 0; i < 2; ++i) + if (N->getMaskElt(i) > 0) + return false; bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val != 2) return false; - HasHi = true; - } - - // Don't use movshdup if it can be done with a shufps. - return HasHi; -} - -/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a identity operation on the LHS or RHS. -static bool isIdentityMask(SDNode *N, bool RHS = false) { - unsigned NumElems = N->getNumOperands(); - for (unsigned i = 0; i < NumElems; ++i) - if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + int Elt = N->getMaskElt(i); + if (Elt >= 0 && Elt != 2) return false; - return true; -} - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element. -static bool isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // This is a splat operation if each element of the permute is the same, and - // if the value doesn't reference the second vector. - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa(Elt)) { - ElementBase = Elt; - break; - } - } - - if (!ElementBase.getNode()) - return false; - - for (; i != NumElems; ++i) { - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (Arg != ElementBase) return false; - } - - // Make sure it is a splat of the first vector operand. - return cast(ElementBase)->getZExtValue() < NumElems; -} - -/// getSplatMaskEltNo - Given a splat mask, return the index to the element -/// we want to splat. -static SDValue getSplatMaskEltNo(SDNode *N) { - assert(isSplatMask(N) && "Not a splat mask"); - unsigned NumElems = N->getNumOperands(); - SDValue ElementBase; - unsigned i = 0; - for (; i != NumElems; ++i) { - SDValue Elt = N->getOperand(i); - if (isa(Elt)) - return Elt; + if (Elt == 2) + HasHi = true; } - assert(0 && " No splat value found!"); - return SDValue(); -} - - -/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies -/// a splat of a single element and it's a 2 or 4 element mask. -bool X86::isSplatMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - // We can only splat 64-bit, and 32-bit quantities with a single instruction. - if (N->getNumOperands() != 4 && N->getNumOperands() != 2) - return false; - return ::isSplatMask(N); -} - -/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a splat of zero element. -bool X86::isSplatLoMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), 0)) - return false; - return true; + // Don't use movsldup if it can be done with a shufps. + return HasHi; } /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. -bool X86::isMOVDDUPMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - unsigned e = N->getNumOperands() / 2; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(i), i)) +bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { + int e = N->getValueType(0).getVectorNumElements() / 2; + + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; - for (unsigned i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getOperand(e+i), i)) + for (int i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getMaskElt(e+i), i)) return false; return true; } @@ -2669,20 +2564,19 @@ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { - unsigned NumOperands = N->getNumOperands(); + ShuffleVectorSDNode *SVOp = cast(N); + int NumOperands = SVOp->getValueType(0).getVectorNumElements(); + unsigned Shift = (NumOperands == 4) ? 2 : 1; unsigned Mask = 0; - for (unsigned i = 0; i < NumOperands; ++i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(NumOperands-i-1); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast(Arg)->getZExtValue(); + for (int i = 0; i < NumOperands; ++i) { + int Val = SVOp->getMaskElt(NumOperands-i-1); + if (Val < 0) Val = 0; if (Val >= NumOperands) Val -= NumOperands; Mask |= Val; if (i != NumOperands - 1) Mask <<= Shift; } - return Mask; } @@ -2690,19 +2584,16 @@ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// instructions. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast(N); unsigned Mask = 0; // 8 nodes, but we only care about the last 4. for (unsigned i = 7; i >= 4; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - Val = cast(Arg)->getZExtValue(); + int Val = SVOp->getMaskElt(i); + if (Val >= 0) Mask |= (Val - 4); - } if (i != 4) Mask <<= 2; } - return Mask; } @@ -2710,90 +2601,67 @@ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast(N); unsigned Mask = 0; // 8 nodes, but we only care about the first 4. for (int i = 3; i >= 0; --i) { - unsigned Val = 0; - SDValue Arg = N->getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) - Val = cast(Arg)->getZExtValue(); - Mask |= Val; + int Val = SVOp->getMaskElt(i); + if (Val >= 0) + Mask |= Val; if (i != 0) Mask <<= 2; } - return Mask; } -/// CommuteVectorShuffle - Swap vector_shuffle operands as well as -/// values in ther permute mask. -static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, - SDValue &V2, SDValue &Mask, - SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector MaskVec; - DebugLoc dl = Op.getDebugLoc(); - - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); - continue; - } - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in +/// their permute mask. +static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + SmallVector MaskVec; + + for (int i = 0; i != NumElems; ++i) { + int idx = SVOp->getMaskElt(i); + if (idx < 0) + MaskVec.push_back(idx); + else if (idx < NumElems) + MaskVec.push_back(idx + NumElems); else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + MaskVec.push_back(idx - NumElems); } - - std::swap(V1, V2); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), + SVOp->getOperand(0), &MaskVec[0]); } /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static -SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = Mask.getValueType(); - MVT EltVT = MaskVT.getVectorElementType(); - unsigned NumElems = Mask.getNumOperands(); - SmallVector MaskVec; - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - MaskVec.push_back(DAG.getUNDEF(EltVT)); +static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, MVT VT) { + int NumElems = VT.getVectorNumElements(); + for (int i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) continue; - } - assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < NumElems) - MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); + else if (idx < NumElems) + Mask[i] = idx + NumElems; else - MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); + Mask[i] = idx - NumElems; } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); } - /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(SDNode *Mask) { - unsigned NumElems = Mask->getNumOperands(); - if (NumElems != 4) +static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { + if (Op->getValueType(0).getVectorNumElements() != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+2)) + if (!isUndefOrEqual(Op->getMaskElt(i), i+2)) return false; for (unsigned i = 2; i != 4; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+4)) + if (!isUndefOrEqual(Op->getMaskElt(i), i+4)) return false; return true; } @@ -2817,7 +2685,8 @@ /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. -static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, + ShuffleVectorSDNode *Op) { if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -2825,14 +2694,15 @@ if (ISD::isNON_EXTLoad(V2)) return false; - unsigned NumElems = Mask->getNumOperands(); + int NumElems = Op->getValueType(0).getVectorNumElements(); + if (NumElems != 2 && NumElems != 4) return false; - for (unsigned i = 0, e = NumElems/2; i != e; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i)) + for (int i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Op->getMaskElt(i), i)) return false; - for (unsigned i = NumElems/2; i != NumElems; ++i) - if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) + for (int i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems)) return false; return true; } @@ -2850,29 +2720,6 @@ return true; } -/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an undef. -static bool isUndefShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - - SDValue V1 = N->getOperand(0); - SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast(Arg)->getZExtValue(); - if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) - return false; - else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) - return false; - } - } - return true; -} - /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. static inline bool isZeroNode(SDValue Elt) { @@ -2883,34 +2730,25 @@ } /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. -static bool isZeroShuffle(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return false; - +/// to an zero vector. +/// FIXME: move to dag combiner? +static bool isZeroShuffle(ShuffleVectorSDNode *N) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - SDValue Mask = N->getOperand(2); - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) - continue; - - unsigned Idx = cast(Arg)->getZExtValue(); - if (Idx < NumElems) { - unsigned Opc = V1.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) + int NumElems = N->getValueType(0).getVectorNumElements(); + for (int i = 0; i != NumElems; ++i) { + int Idx = N->getMaskElt(i); + if (Idx >= NumElems) { + unsigned Opc = V2.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V1.getNode()->getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) return false; - } else if (Idx >= NumElems) { - unsigned Opc = V2.getNode()->getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) + } else if (Idx >= 0) { + unsigned Opc = V1.getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) + if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) return false; } } @@ -2958,127 +2796,92 @@ /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. -static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { - assert(Mask.getOpcode() == ISD::BUILD_VECTOR); - +static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + MVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + bool Changed = false; - SmallVector MaskVec; - unsigned NumElems = Mask.getNumOperands(); - for (unsigned i = 0; i != NumElems; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Val = cast(Arg)->getZExtValue(); - if (Val > NumElems) { - Arg = DAG.getConstant(NumElems, Arg.getValueType()); - Changed = true; - } + SmallVector MaskVec; + SVOp->getMask(MaskVec); + + for (int i = 0; i != NumElems; ++i) { + if (MaskVec[i] > NumElems) { + MaskVec[i] = NumElems; + Changed = true; } - MaskVec.push_back(Arg); } - if (Changed) - Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), - Mask.getValueType(), - &MaskVec[0], MaskVec.size()); - return Mask; + return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), + SVOp->getOperand(1), &MaskVec[0]); + return SDValue(SVOp, 0); } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - - SmallVector MaskVec; - MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Mask; + Mask.push_back(NumElems); for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + Mask.push_back(i); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation -/// of specified width. -static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; +/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. +static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Mask; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); + Mask.push_back(i); + Mask.push_back(i + NumElems); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation -/// of specified width. -static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); +/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. +static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); unsigned Half = NumElems/2; - SmallVector MaskVec; + SmallVector Mask; for (unsigned i = 0; i != Half; ++i) { - MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); - MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); + Mask.push_back(i + Half); + Mask.push_back(i + NumElems + Half); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); -} - -/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps -/// element #0 of a vector with the specified index, leaving the rest of the -/// elements in place. -static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, - SelectionDAG &DAG, DebugLoc dl) { - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT BaseVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; - // Element #0 of the result gets the elt we are replacing. - MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); - for (unsigned i = 1; i != NumElems; ++i) - MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { - MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; - MVT VT = Op.getValueType(); - if (PVT == VT) - return Op; - SDValue V1 = Op.getOperand(0); - SDValue Mask = Op.getOperand(2); - unsigned MaskNumElems = Mask.getNumOperands(); - unsigned NumElems = MaskNumElems; - DebugLoc dl = Op.getDebugLoc(); - // Special handling of v4f32 -> v4i32. - if (VT != MVT::v4f32) { - // Find which element we want to splat. - SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode(); - unsigned EltNo = cast(EltNoNode)->getZExtValue(); - // unpack elements to the correct location - while (NumElems > 4) { - if (EltNo < NumElems/2) { - Mask = getUnpacklMask(MaskNumElems, DAG, dl); - } else { - Mask = getUnpackhMask(MaskNumElems, DAG, dl); - EltNo -= NumElems/2; - } - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, Mask); - NumElems >>= 1; +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE2) { + if (SV->getValueType(0).getVectorNumElements() <= 4) + return SDValue(SV, 0); + + MVT PVT = MVT::v4f32; + MVT VT = SV->getValueType(0); + DebugLoc dl = SV->getDebugLoc(); + SDValue V1 = SV->getOperand(0); + int NumElems = VT.getVectorNumElements(); + int EltNo = SV->getSplatIndex(); + + // unpack elements to the correct location + while (NumElems > 4) { + if (EltNo < NumElems/2) { + V1 = getUnpackl(DAG, dl, VT, V1, V1); + } else { + V1 = getUnpackh(DAG, dl, VT, V1, V1); + EltNo -= NumElems/2; } - SDValue Cst = DAG.getConstant(EltNo, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + NumElems >>= 1; } - + + // Perform the splat. + int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); } /// isVectorLoad - Returns true if the node is a vector load, a scalar @@ -3095,32 +2898,28 @@ /// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. /// -static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, - SelectionDAG &DAG, bool HasSSE3) { +static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG, + bool HasSSE3) { // If we have sse3 and shuffle has more than one use or input is a load, then // use movddup. Otherwise, use movlhps. - bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); + SDValue V1 = SV->getOperand(0); + + bool UseMovddup = HasSSE3 && (!SV->hasOneUse() || isVectorLoad(V1)); MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; - MVT VT = Op.getValueType(); + MVT VT = SV->getValueType(0); if (VT == PVT) - return Op; - DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PVT.getVectorNumElements(); - if (NumElems == 2) { - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + return SDValue(SV, 0); + + DebugLoc dl = SV->getDebugLoc(); + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + if (PVT.getVectorNumElements() == 2) { + int Mask[2] = { 0, 0 }; + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); } else { - assert(NumElems == 4); - SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); - SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); - Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Cst0, Cst1, Cst0, Cst1); + int Mask[4] = { 0, 1, 0, 1 }; + V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); } - - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, - DAG.getUNDEF(PVT), Mask); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3130,39 +2929,31 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { - DebugLoc dl = V2.getDebugLoc(); MVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, dl) : DAG.getUNDEF(VT); - unsigned NumElems = V2.getValueType().getVectorNumElements(); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; + ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector MaskVec; for (unsigned i = 0; i != NumElems; ++i) - if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. - MaskVec.push_back(DAG.getConstant(NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); + // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(i == Idx ? NumElems : i); + return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } /// getNumOfConsecutiveZeros - Return the number of elements in a result of /// a shuffle that is zero. static -unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, - unsigned NumElems, bool Low, - SelectionDAG &DAG) { +unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, + bool Low, SelectionDAG &DAG) { unsigned NumZeros = 0; - for (unsigned i = 0; i < NumElems; ++i) { + for (int i = 0; i < NumElems; ++i) { unsigned Index = Low ? i : NumElems-i-1; - SDValue Idx = Mask.getOperand(Index); - if (Idx.getOpcode() == ISD::UNDEF) { + int Idx = SVOp->getMaskElt(Index); + if (Idx < 0) { ++NumZeros; continue; } - SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); + SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); if (Elt.getNode() && isZeroNode(Elt)) ++NumZeros; else @@ -3173,40 +2964,39 @@ /// isVectorShift - Returns true if the shuffle can be implemented as a /// logical left or right shift of a vector. -static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, +/// FIXME: split into pslldqi, psrldqi, palignr variants. +static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = Mask.getNumOperands(); + int NumElems = SVOp->getValueType(0).getVectorNumElements(); isLeft = true; - unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); if (!NumZeros) { isLeft = false; - NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); + NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG); if (!NumZeros) return false; } - bool SeenV1 = false; bool SeenV2 = false; - for (unsigned i = NumZeros; i < NumElems; ++i) { - unsigned Val = isLeft ? (i - NumZeros) : i; - SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); - if (Idx.getOpcode() == ISD::UNDEF) + for (int i = NumZeros; i < NumElems; ++i) { + int Val = isLeft ? (i - NumZeros) : i; + int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); + if (Idx < 0) continue; - unsigned Index = cast(Idx)->getZExtValue(); - if (Index < NumElems) + if (Idx < NumElems) SeenV1 = true; else { - Index -= NumElems; + Idx -= NumElems; SeenV2 = true; } - if (Index != Val) + if (Idx != Val) return false; } if (SeenV1 && SeenV2) return false; - ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); + ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); ShAmt = NumZeros; return true; } @@ -3291,8 +3081,8 @@ /// getVShift - Return a vector logical shift node. /// static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, - unsigned NumBits, SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + unsigned NumBits, SelectionDAG &DAG, + const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; @@ -3377,11 +3167,13 @@ // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. if (Idx != 0) { - SDValue Ops[] = { - Item, DAG.getUNDEF(Item.getValueType()), - getSwapEltZeroMask(VecElts, Idx, DAG, dl) - }; - Item = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VecVT, Ops, 3); + SmallVector Mask; + Mask.push_back(Idx); + for (unsigned i = 1; i != VecElts; ++i) + Mask.push_back(i); + Item = DAG.getVectorShuffle(VecVT, dl, Item, + DAG.getUNDEF(Item.getValueType()), + &Mask[0]); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); } @@ -3425,15 +3217,10 @@ // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget->hasSSE2(), DAG); - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT MaskEVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; + SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i++) - MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Item, - DAG.getUNDEF(VT), Mask); + MaskVec.push_back(i == Idx ? 0 : 1); + return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); } } @@ -3491,54 +3278,53 @@ V[i] = V[i*2]; // Must be a zero vector. break; case 1: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2+1], V[i*2], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); break; case 2: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getMOVLMask(NumElems, DAG, dl)); + V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); break; case 3: - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], - getUnpacklMask(NumElems, DAG, dl)); + V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); break; } } - MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT EVT = MaskVT.getVectorElementType(); - SmallVector MaskVec; + SmallVector MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i, EVT)); - else - MaskVec.push_back(DAG.getConstant(i, EVT)); + MaskVec.push_back(Reverse ? 1-i : i); Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; for (unsigned i = 0; i < 2; ++i) - if (Reverse) - MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); - else - MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[0], V[1], ShufMask); + MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); + return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } if (Values.size() > 2) { + // If we have SSE 4.1, Expand into a number of inserts unless the number of + // values to be inserted is equal to the number of elements, in which case + // use the unpack code below in the hopes of matching the consecutive elts + // load merge pattern for shuffles. + // FIXME: We could probably just check that here directly. + if (Values.size() < NumElems && VT.getSizeInBits() == 128 && + getSubtarget()->hasSSE41()) { + V[0] = DAG.getUNDEF(VT); + for (unsigned i = 0; i < NumElems; ++i) + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], + Op.getOperand(i), DAG.getIntPtrConstant(i)); + return V[0]; + } // Expand into a number of unpckl*. // e.g. for v4f32 // Step 1: unpcklps 0, 2 ==> X: // : unpcklps 1, 3 ==> Y: // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - SDValue UnpckMask = getUnpacklMask(NumElems, DAG, dl); for (unsigned i = 0; i < NumElems; ++i) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); NumElems >>= 1; while (NumElems != 0) { for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i], V[i + NumElems], - UnpckMask); + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); NumElems >>= 1; } return V[0]; @@ -3553,11 +3339,11 @@ // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static -SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); SmallVector MaskVals; // Determine if more than 1 of the words in each of the low and high quadwords @@ -3568,9 +3354,7 @@ BitVector InputQuads(4); for (unsigned i = 0; i < 8; ++i) { SmallVectorImpl &Quad = i < 4 ? LoQuad : HiQuad; - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast(Elt)->getZExtValue(); + int EltIdx = SVOp->getMaskElt(i); MaskVals.push_back(EltIdx); if (EltIdx < 0) { ++Quad[0]; @@ -3623,14 +3407,12 @@ // words from all 4 input quadwords. SDValue NewV; if (BestLoQuad >= 0 || BestHiQuad >= 0) { - SmallVector MaskV; - MaskV.push_back(DAG.getConstant(BestLoQuad < 0 ? 0 : BestLoQuad, MVT::i64)); - MaskV.push_back(DAG.getConstant(BestHiQuad < 0 ? 1 : BestHiQuad, MVT::i64)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, &MaskV[0], 2); - - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), Mask); + SmallVector MaskV; + MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); + MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); + NewV = DAG.getVectorShuffle(MVT::v2i64, dl, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the @@ -3668,15 +3450,8 @@ // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - MaskV.clear(); - for (unsigned i = 0; i != 8; ++i) - MaskV.push_back((MaskVals[i] < 0) ? DAG.getUNDEF(MVT::i16) - : DAG.getConstant(MaskVals[i], - MVT::i16)); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, - &MaskV[0], 8)); + return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); } } @@ -3733,49 +3508,45 @@ // and update MaskVals with new element order. BitVector InOrder(8); if (BestLoQuad >= 0) { - SmallVector MaskV; + SmallVector MaskV; for (int i = 0; i != 4; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestLoQuad) { - MaskV.push_back(DAG.getConstant(idx & 3, MVT::i16)); + MaskV.push_back(idx & 3); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } for (unsigned i = 4; i != 8; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + MaskV.push_back(i); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { - SmallVector MaskV; + SmallVector MaskV; for (unsigned i = 0; i != 4; ++i) - MaskV.push_back(DAG.getConstant(i, MVT::i16)); + MaskV.push_back(i); for (unsigned i = 4; i != 8; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestHiQuad) { - MaskV.push_back(DAG.getConstant((idx & 3) + 4, MVT::i16)); + MaskV.push_back((idx & 3) + 4); InOrder.set(i); } else { - MaskV.push_back(DAG.getUNDEF(MVT::i16)); + MaskV.push_back(-1); } } - NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, - DAG.getUNDEF(MVT::v8i16), - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v8i16, &MaskV[0], 8)); + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), + &MaskV[0]); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -3811,12 +3582,13 @@ // 2. [ssse3] 2 x pshufb + 1 x por // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static -SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, - SDValue PermMask, SelectionDAG &DAG, - X86TargetLowering &TLI, DebugLoc dl) { - SmallVector MaskElts(PermMask.getNode()->op_begin(), - PermMask.getNode()->op_end()); +SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, X86TargetLowering &TLI) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); SmallVector MaskVals; + SVOp->getMask(MaskVals); // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is @@ -3825,10 +3597,7 @@ bool V1Only = true; bool V2Only = true; for (unsigned i = 0; i < 16; ++i) { - SDValue Elt = MaskElts[i]; - int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : - cast(Elt)->getZExtValue(); - MaskVals.push_back(EltIdx); + int EltIdx = MaskVals[i]; if (EltIdx < 0) continue; if (EltIdx < 16) @@ -3958,11 +3727,13 @@ /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static -SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, - MVT VT, - SDValue PermMask, SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { - unsigned NumElems = PermMask.getNumOperands(); +SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + TargetLowering &TLI, DebugLoc dl) { + MVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); MVT MaskEltVT = MaskVT.getVectorElementType(); @@ -3981,38 +3752,35 @@ else NewVT = MVT::v2f64; } - unsigned Scale = NumElems / NewWidth; - SmallVector MaskVec; + int Scale = NumElems / NewWidth; + SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { - unsigned StartIdx = ~0U; - for (unsigned j = 0; j < Scale; ++j) { - SDValue Elt = PermMask.getOperand(i+j); - if (Elt.getOpcode() == ISD::UNDEF) + int StartIdx = -1; + for (int j = 0; j < Scale; ++j) { + int EltIdx = SVOp->getMaskElt(i+j); + if (EltIdx < 0) continue; - unsigned EltIdx = cast(Elt)->getZExtValue(); - if (StartIdx == ~0U) + if (StartIdx == -1) StartIdx = EltIdx - (EltIdx % Scale); if (EltIdx != StartIdx + j) return SDValue(); } - if (StartIdx == ~0U) - MaskVec.push_back(DAG.getUNDEF(MaskEltVT)); + if (StartIdx == -1) + MaskVec.push_back(-1); else - MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); + MaskVec.push_back(StartIdx / Scale); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskVec[0], MaskVec.size())); + return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } /// getVZextMovL - Return a zero-extending vector move low node. /// static SDValue getVZextMovL(MVT VT, MVT OpVT, - SDValue SrcOp, SelectionDAG &DAG, - const X86Subtarget *Subtarget, DebugLoc dl) { + SDValue SrcOp, SelectionDAG &DAG, + const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { LoadSDNode *LD = NULL; if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) @@ -4046,31 +3814,34 @@ /// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of /// shuffles. static SDValue -LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, - SDValue PermMask, MVT VT, SelectionDAG &DAG, - DebugLoc dl) { - MVT MaskVT = PermMask.getValueType(); - MVT MaskEVT = MaskVT.getVectorElementType(); +LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + MVT VT = SVOp->getValueType(0); + SmallVector, 8> Locs; Locs.resize(4); - SmallVector Mask1(4, DAG.getUNDEF(MaskEVT)); + SmallVector Mask1(4U, -1); + SmallVector PermMask; + SVOp->getMask(PermMask); + unsigned NumHi = 0; unsigned NumLo = 0; for (unsigned i = 0; i != 4; ++i) { - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); } else { - unsigned Val = cast(Elt)->getZExtValue(); - assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); - if (Val < 4) { + assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); + if (Idx < 4) { Locs[i] = std::make_pair(0, NumLo); - Mask1[NumLo] = Elt; + Mask1[NumLo] = Idx; NumLo++; } else { Locs[i] = std::make_pair(1, NumHi); if (2+NumHi < 4) - Mask1[2+NumHi] = Elt; + Mask1[2+NumHi] = Idx; NumHi++; } } @@ -4081,24 +3852,21 @@ // implemented with two shuffles. First shuffle gather the elements. // The second shuffle, which takes the first shuffle as both of its // vector operands, put the elements into the right order. - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask1[0], Mask1.size())); + V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); - SmallVector Mask2(4, DAG.getUNDEF(MaskEVT)); + SmallVector Mask2(4U, -1); + for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; else { unsigned Idx = (i < 2) ? 0 : 4; Idx += Locs[i].first * 2 + Locs[i].second; - Mask2[i] = DAG.getConstant(Idx, MaskEVT); + Mask2[i] = Idx; } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &Mask2[0], Mask2.size())); + return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); } else if (NumLo == 3 || NumHi == 3) { // Otherwise, we must have three elements from one vector, call it X, and // one element from the other, call it Y. First, use a shufps to build an @@ -4109,60 +3877,51 @@ // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl); + CommuteVectorShuffleMask(PermMask, VT); std::swap(V1, V2); } // Find the element from V2. unsigned HiIndex; for (HiIndex = 0; HiIndex < 3; ++HiIndex) { - SDValue Elt = PermMask.getOperand(HiIndex); - if (Elt.getOpcode() == ISD::UNDEF) + int Val = PermMask[HiIndex]; + if (Val < 0) continue; - unsigned Val = cast(Elt)->getZExtValue(); if (Val >= 4) break; } - Mask1[0] = PermMask.getOperand(HiIndex); - Mask1[1] = DAG.getUNDEF(MaskEVT); - Mask1[2] = PermMask.getOperand(HiIndex^1); - Mask1[3] = DAG.getUNDEF(MaskEVT); - V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[HiIndex]; + Mask1[1] = -1; + Mask1[2] = PermMask[HiIndex^1]; + Mask1[3] = -1; + V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); if (HiIndex >= 2) { - Mask1[0] = PermMask.getOperand(0); - Mask1[1] = PermMask.getOperand(1); - Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); - Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = PermMask[0]; + Mask1[1] = PermMask[1]; + Mask1[2] = HiIndex & 1 ? 6 : 4; + Mask1[3] = HiIndex & 1 ? 4 : 6; + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); } else { - Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); - Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); - Mask1[2] = PermMask.getOperand(2); - Mask1[3] = PermMask.getOperand(3); - if (Mask1[2].getOpcode() != ISD::UNDEF) - Mask1[2] = - DAG.getConstant(cast(Mask1[2])->getZExtValue()+4, - MaskEVT); - if (Mask1[3].getOpcode() != ISD::UNDEF) - Mask1[3] = - DAG.getConstant(cast(Mask1[3])->getZExtValue()+4, - MaskEVT); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MaskVT, &Mask1[0], 4)); + Mask1[0] = HiIndex & 1 ? 2 : 0; + Mask1[1] = HiIndex & 1 ? 0 : 2; + Mask1[2] = PermMask[2]; + Mask1[3] = PermMask[3]; + if (Mask1[2] >= 0) + Mask1[2] += 4; + if (Mask1[3] >= 0) + Mask1[3] += 4; + return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); } } // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); - SmallVector LoMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector HiMask(4, DAG.getUNDEF(MaskEVT)); - SmallVector *MaskPtr = &LoMask; + SmallVector LoMask(4U, -1); + SmallVector HiMask(4U, -1); + + SmallVector *MaskPtr = &LoMask; unsigned MaskIdx = 0; unsigned LoIdx = 0; unsigned HiIdx = 2; @@ -4173,84 +3932,67 @@ LoIdx = 0; HiIdx = 2; } - SDValue Elt = PermMask.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) { + int Idx = PermMask[i]; + if (Idx < 0) { Locs[i] = std::make_pair(-1, -1); - } else if (cast(Elt)->getZExtValue() < 4) { + } else if (Idx < 4) { Locs[i] = std::make_pair(MaskIdx, LoIdx); - (*MaskPtr)[LoIdx] = Elt; + (*MaskPtr)[LoIdx] = Idx; LoIdx++; } else { Locs[i] = std::make_pair(MaskIdx, HiIdx); - (*MaskPtr)[HiIdx] = Elt; + (*MaskPtr)[HiIdx] = Idx; HiIdx++; } } - SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &LoMask[0], LoMask.size())); - SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &HiMask[0], HiMask.size())); - SmallVector MaskOps; + SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); + SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); + SmallVector MaskOps; for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) { - MaskOps.push_back(DAG.getUNDEF(MaskEVT)); + MaskOps.push_back(-1); } else { unsigned Idx = Locs[i].first * 4 + Locs[i].second; - MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); + MaskOps.push_back(Idx); } } - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle, - DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &MaskOps[0], MaskOps.size())); + return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = PermMask.getNumOperands(); + unsigned NumElems = VT.getVectorNumElements(); bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - // FIXME: Check for legal shuffle and return? - - if (isUndefShuffle(Op.getNode())) - return DAG.getUNDEF(VT); - - if (isZeroShuffle(Op.getNode())) + if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); - if (isIdentityMask(PermMask.getNode())) - return V1; - else if (isIdentityMask(PermMask.getNode(), true)) - return V2; - // Canonicalize movddup shuffles. - if (V2IsUndef && Subtarget->hasSSE2() && - VT.getSizeInBits() == 128 && - X86::isMOVDDUPMask(PermMask.getNode())) - return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); - - if (isSplatMask(PermMask.getNode())) { - if (isMMX || NumElems < 4) return Op; - // Promote it to a v4{if}32 splat. - return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); + if (V2IsUndef && Subtarget->hasSSE2() && VT.getSizeInBits() == 128 && + X86::isMOVDDUPMask(SVOp)) + return CanonicalizeMovddup(SVOp, DAG, Subtarget->hasSSE3()); + + // Promote splats to v4f32. + if (SVOp->isSplat()) { + if (isMMX || NumElems < 4) + return Op; + return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, - *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BIT_CONVERT, dl, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); @@ -4258,32 +4000,29 @@ // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); if (NewOp.getNode()) { - SDValue NewV1 = NewOp.getOperand(0); - SDValue NewV2 = NewOp.getOperand(1); - SDValue NewMask = NewOp.getOperand(2); - if (isCommutedMOVL(NewMask.getNode(), true, false)) { - NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); - return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget, - dl); - } + if (isCommutedMOVL(cast(NewOp), true, false)) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), + DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, - DAG, *this, dl); - if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + if (NewOp.getNode() && X86::isMOVLMask(cast(NewOp))) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); + DAG, Subtarget, dl); } } - + + if (X86::isPSHUFDMask(SVOp)) + return Op; + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); + bool isShift = getSubtarget()->hasSSE2() && + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4291,8 +4030,8 @@ ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - - if (X86::isMOVLMask(PermMask.getNode())) { + + if (X86::isMOVLMask(SVOp)) { if (V1IsUndef) return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) @@ -4300,17 +4039,18 @@ if (!isMMX) return Op; } - - if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || - X86::isMOVSLDUPMask(PermMask.getNode()) || - X86::isMOVHLPSMask(PermMask.getNode()) || - X86::isMOVHPMask(PermMask.getNode()) || - X86::isMOVLPMask(PermMask.getNode()))) + + // FIXME: fold these into legal mask. + if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || + X86::isMOVSLDUPMask(SVOp) || + X86::isMOVHLPSMask(SVOp) || + X86::isMOVHPMask(SVOp) || + X86::isMOVLPMask(SVOp))) return Op; - if (ShouldXformToMOVHLPS(PermMask.getNode()) || - ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) - return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (ShouldXformToMOVHLPS(SVOp) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) + return CommuteVectorShuffle(SVOp, DAG); if (isShift) { // No better options. Use a vshl / vsrl. @@ -4318,7 +4058,7 @@ ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4327,115 +4067,86 @@ // Canonicalize the splat or undef, if present, to be on the RHS. if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + Op = CommuteVectorShuffle(SVOp, DAG); + SVOp = cast(Op); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); std::swap(V1IsSplat, V2IsSplat); std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - // FIXME: Figure out a cleaner way to do this. - if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { - if (V2IsUndef) return V1; - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (V2IsSplat) { - // V2 is a splat, so the mask may be malformed. That is, it may point - // to any V2 element. The instruction selectior won't like this. Get - // a corrected mask and commute to form a proper MOVS{S|D}. - SDValue NewMask = getMOVLMask(NumElems, DAG, dl); - if (NewMask.getNode() != PermMask.getNode()) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } - return Op; + if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { + // Shuffling low element of v1 into undef, just return v1. + if (V2IsUndef) + return V1; + // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which + // the instruction selector will not match, so get a canonical MOVL with + // swapped operands to undo the commute. + return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) + if (X86::isUNPCKL_v_undef_Mask(SVOp) || + X86::isUNPCKH_v_undef_Mask(SVOp) || + X86::isUNPCKLMask(SVOp) || + X86::isUNPCKHMask(SVOp)) return Op; if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first // element then try to match unpck{h|l} again. If match, return a // new vector_shuffle with the corrected mask. - SDValue NewMask = NormalizeMask(PermMask, DAG); - if (NewMask.getNode() != PermMask.getNode()) { - if (X86::isUNPCKLMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpacklMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); - } else if (X86::isUNPCKHMask(NewMask.getNode(), true)) { - SDValue NewMask = getUnpackhMask(NumElems, DAG, dl); - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + SDValue NewMask = NormalizeMask(SVOp, DAG); + ShuffleVectorSDNode *NSVOp = cast(NewMask); + if (NSVOp != SVOp) { + if (X86::isUNPCKLMask(NSVOp, true)) { + return NewMask; + } else if (X86::isUNPCKHMask(NSVOp, true)) { + return NewMask; } } } - // Normalize the node to match x86 shuffle ops if needed - if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (Commuted) { // Commute is back and try unpck* again. - Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); - if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || - X86::isUNPCKLMask(PermMask.getNode()) || - X86::isUNPCKHMask(PermMask.getNode())) - return Op; + // FIXME: this seems wrong. + SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); + ShuffleVectorSDNode *NewSVOp = cast(NewOp); + if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || + X86::isUNPCKH_v_undef_Mask(NewSVOp) || + X86::isUNPCKLMask(NewSVOp) || + X86::isUNPCKHMask(NewSVOp)) + return NewOp; } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Try PSHUF* first, then SHUFP*. - // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically - // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. - if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { - if (V2.getOpcode() != ISD::UNDEF) - return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, - DAG.getUNDEF(VT), PermMask); - return Op; - } - - if (!isMMX) { - if (Subtarget->hasSSE2() && - (X86::isPSHUFDMask(PermMask.getNode()) || - X86::isPSHUFHWMask(PermMask.getNode()) || - X86::isPSHUFLWMask(PermMask.getNode()))) { - MVT RVT = VT; - if (VT == MVT::v4f32) { - RVT = MVT::v4i32; - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, - DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1), - DAG.getUNDEF(RVT), PermMask); - } else if (V2.getOpcode() != ISD::UNDEF) - Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1, - DAG.getUNDEF(RVT), PermMask); - if (RVT != VT) - Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); - return Op; - } - // Binary or unary shufps. - if (X86::isSHUFPMask(PermMask.getNode()) || - (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) - return Op; - } + // Normalize the node to match x86 shuffle ops if needed + if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + return CommuteVectorShuffle(SVOp, DAG); + // Check for legal shuffle and return? + SmallVector PermMask; + SVOp->getMask(PermMask); + if (isShuffleMaskLegal(PermMask, VT)) + return Op; + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } if (VT == MVT::v16i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl); + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); if (NewOp.getNode()) return NewOp; } // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) - return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl); + return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); } @@ -4529,22 +4240,12 @@ unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; + // SHUFPS the element to the lowest double word, then movss. - MVT MaskVT = MVT::getIntVectorWithNumElements(4); - SmallVector IdxVec; - IdxVec. - push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), Mask); + int Mask[4] = { Idx, -1, -1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } else if (VT.getSizeInBits() == 64) { @@ -4558,17 +4259,10 @@ // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. - MVT MaskVT = MVT::getIntVectorWithNumElements(2); - SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); - IdxVec. - push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, - &IdxVec[0], IdxVec.size()); - SDValue Vec = Op.getOperand(0); - Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), - Vec, DAG.getUNDEF(Vec.getValueType()), - Mask); + int Mask[2] = { 1, -1 }; + MVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } @@ -5049,19 +4743,6 @@ Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); - SmallVector MaskVec; - MaskVec.push_back(DAG.getConstant(0, MVT::i32)); - MaskVec.push_back(DAG.getConstant(4, MVT::i32)); - MaskVec.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec.push_back(DAG.getConstant(5, MVT::i32)); - SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &MaskVec[0], MaskVec.size()); - SmallVector MaskVec2; - MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); - MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); - SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, - &MaskVec2[0], MaskVec2.size()); - SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), @@ -5070,13 +4751,11 @@ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), DAG.getIntPtrConstant(0))); - SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - XR1, XR2, UnpcklMask); + SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, false, 16); - SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, - Unpck1, CLod0, UnpcklMask); + SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, @@ -5084,8 +4763,9 @@ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. - SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2f64, - Sub, Sub, ShufMask); + int ShufMask[2] = { 1, -1 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, + DAG.getUNDEF(MVT::v2f64), ShufMask); SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, DAG.getIntPtrConstant(0)); @@ -7237,34 +6917,37 @@ /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(SmallVectorImpl &M, MVT VT) const { // Only do shuffles on 128-bit vector types for now. - // FIXME: pshufb, blends - if (VT.getSizeInBits() == 64) return false; - return (Mask.getNode()->getNumOperands() <= 4 || - isIdentityMask(Mask.getNode()) || - isIdentityMask(Mask.getNode(), true) || - isSplatMask(Mask.getNode()) || - X86::isPSHUFHWMask(Mask.getNode()) || - X86::isPSHUFLWMask(Mask.getNode()) || - X86::isUNPCKLMask(Mask.getNode()) || - X86::isUNPCKHMask(Mask.getNode()) || - X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || - X86::isUNPCKH_v_undef_Mask(Mask.getNode())); + if (VT.getSizeInBits() == 64) + return false; + + // FIXME: pshufb, blends, palignr, shifts. + return (VT.getVectorNumElements() == 2 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + isMOVLMask(M, VT) || + isSHUFPMask(M, VT) || + isPSHUFDMask(M, VT) || + isPSHUFHWMask(M, VT) || + isPSHUFLWMask(M, VT) || + isUNPCKLMask(M, VT) || + isUNPCKHMask(M, VT) || + isUNPCKL_v_undef_Mask(M, VT) || + isUNPCKH_v_undef_Mask(M, VT)); } bool -X86TargetLowering::isVectorClearMaskLegal(const std::vector &BVOps, - MVT EVT, SelectionDAG &DAG) const { - unsigned NumElts = BVOps.size(); - // Only do shuffles on 128-bit vector types for now. - if (EVT.getSizeInBits() * NumElts == 64) return false; - if (NumElts == 2) return true; - if (NumElts == 4) { - return (isMOVLMask(&BVOps[0], 4) || - isCommutedMOVL(&BVOps[0], 4, true) || - isSHUFPMask(&BVOps[0], 4) || - isCommutedSHUFP(&BVOps[0], 4)); +X86TargetLowering::isVectorClearMaskLegal(SmallVectorImpl &Mask, + MVT VT) const { + unsigned NumElts = VT.getVectorNumElements(); + // FIXME: This collection of masks seems suspect. + if (NumElts == 2) + return true; + if (NumElts == 4 && VT.getSizeInBits() == 128) { + return (isMOVLMask(Mask, VT) || + isCommutedMOVLMask(Mask, VT, true) || + isSHUFPMask(Mask, VT) || + isCommutedSHUFPMask(Mask, VT)); } return false; } @@ -7999,15 +7682,13 @@ return false; } -static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, - unsigned NumElems, MVT EVT, - SDNode *&Base, +static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, + MVT EVT, SDNode *&Base, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { Base = NULL; for (unsigned i = 0; i < NumElems; ++i) { - SDValue Idx = PermMask.getOperand(i); - if (Idx.getOpcode() == ISD::UNDEF) { + if (N->getMaskElt(i) < 0) { if (!Base) return false; continue; @@ -8040,12 +7721,12 @@ /// shuffle to be an appropriate build vector so it can take advantage of // performBuildVectorCombine. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - SDValue PermMask = N->getOperand(2); - unsigned NumElems = PermMask.getNumOperands(); + ShuffleVectorSDNode *SVN = cast(N); + unsigned NumElems = VT.getVectorNumElements(); // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 // where the upper half is 0, it is advantageous to rewrite it as a build @@ -8054,15 +7735,16 @@ SDValue In[2]; In[0] = N->getOperand(0); In[1] = N->getOperand(1); - unsigned Idx0 =cast(PermMask.getOperand(0))->getZExtValue(); - unsigned Idx1 =cast(PermMask.getOperand(1))->getZExtValue(); - if (In[0].getValueType().getVectorNumElements() == NumElems && + int Idx0 = SVN->getMaskElt(0); + int Idx1 = SVN->getMaskElt(1); + // FIXME: can we take advantage of undef index? + if (Idx0 >= 0 && Idx1 >= 0 && In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { ConstantSDNode* InsertVecIdx = dyn_cast(In[Idx0/2].getOperand(2)); if (InsertVecIdx && - InsertVecIdx->getZExtValue() == (Idx0 % 2) && + InsertVecIdx->getZExtValue() == (unsigned)(Idx0 % 2) && isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, In[Idx0/2].getOperand(1), @@ -8074,8 +7756,7 @@ // Try to combine a vector_shuffle into a 128-bit load. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDNode *Base = NULL; - if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base, - DAG, MFI, TLI)) + if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI)) return SDValue(); LoadSDNode *LD = cast(Base); @@ -8520,9 +8201,9 @@ } } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - isSplatMask(ShAmtOp.getOperand(2).getNode())) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + cast(ShAmtOp)->isSplat()) { + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Apr 27 13:41:29 2009 @@ -230,7 +230,8 @@ // VSHL, VSRL - Vector logical left / right shift. VSHL, VSRL, - + + // CMPPD, CMPPS - Vector double/float comparison. // CMPPD, CMPPS - Vector double/float comparison. CMPPD, CMPPS, @@ -251,80 +252,72 @@ namespace X86 { /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFDMask(SDNode *N); + bool isPSHUFDMask(ShuffleVectorSDNode *N); /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFHWMask(SDNode *N); + bool isPSHUFHWMask(ShuffleVectorSDNode *N); /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFLWMask(SDNode *N); + bool isPSHUFLWMask(ShuffleVectorSDNode *N); /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. - bool isSHUFPMask(SDNode *N); + bool isSHUFPMask(ShuffleVectorSDNode *N); /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. - bool isMOVHLPSMask(SDNode *N); + bool isMOVHLPSMask(ShuffleVectorSDNode *N); /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> - bool isMOVHLPS_v_undef_Mask(SDNode *N); + bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N); /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. - bool isMOVLPMask(SDNode *N); + /// specifies a shuffle of elements that is suitable for MOVLP{S|D}. + bool isMOVLPMask(ShuffleVectorSDNode *N); /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} + /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. /// as well as MOVLHPS. - bool isMOVHPMask(SDNode *N); + bool isMOVHPMask(ShuffleVectorSDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> - bool isUNPCKL_v_undef_Mask(SDNode *N); + bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N); /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> - bool isUNPCKH_v_undef_Mask(SDNode *N); + bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N); /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. - bool isMOVLMask(SDNode *N); + bool isMOVLMask(ShuffleVectorSDNode *N); /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. - bool isMOVSHDUPMask(SDNode *N); + bool isMOVSHDUPMask(ShuffleVectorSDNode *N); /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. - bool isMOVSLDUPMask(SDNode *N); - - /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a splat of a single element. - bool isSplatMask(SDNode *N); - - /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a splat of zero element. - bool isSplatLoMask(SDNode *N); + bool isMOVSLDUPMask(ShuffleVectorSDNode *N); /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. - bool isMOVDDUPMask(SDNode *N); + bool isMOVDDUPMask(ShuffleVectorSDNode *N); /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* @@ -477,14 +470,13 @@ /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// values are assumed to be legal. - virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const; + virtual bool isShuffleMaskLegal(SmallVectorImpl &Mask, MVT VT) const; /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. - virtual bool isVectorClearMaskLegal(const std::vector &BVOps, - MVT EVT, SelectionDAG &DAG) const; + virtual bool isVectorClearMaskLegal(SmallVectorImpl &M, MVT VT) const; /// ShouldShrinkFPConstant - If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Apr 27 13:41:29 2009 @@ -3821,6 +3821,7 @@ (implicit EFLAGS)), (DEC32m addr:$dst)>, Requires<[In32BitMode]>; + //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Mon Apr 27 13:41:29 2009 @@ -30,33 +30,37 @@ // MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to // PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm; // Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKHMask(N); +def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast(N)); }]>; // Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKLMask(N); +def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast(N)); }]>; // Patterns for: vector_shuffle v1, , <0, 0, 1, 1, ...> -def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKH_v_undef_Mask(N); +def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast(N)); }]>; // Patterns for: vector_shuffle v1, , <2, 2, 3, 3, ...> -def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKL_v_undef_Mask(N); +def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast(N)); }]>; -// Patterns for shuffling. -def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFDMask(N); +def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast(N)); }], MMX_SHUFFLE_get_shuf_imm>; //===----------------------------------------------------------------------===// @@ -185,9 +189,8 @@ def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2i64 (vector_shuffle immAllZerosV, - (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))), - MOVL_shuffle_mask)))]>; + (movl immAllZerosV, + (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>; let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), @@ -319,86 +322,74 @@ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKH_shuffle_mask)))]>; + (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)), - MMX_UNPCKH_shuffle_mask)))]>; + (v8i8 (mmx_unpckh VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKH_shuffle_mask)))]>; + (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)), - MMX_UNPCKH_shuffle_mask)))]>; + (v4i16 (mmx_unpckh VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKH_shuffle_mask)))]>; + (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)), - MMX_UNPCKH_shuffle_mask)))]>; + (v2i32 (mmx_unpckh VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)))))]>; // Unpack Low Packed Data Instructions def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKL_shuffle_mask)))]>; + (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (vector_shuffle VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)), - MMX_UNPCKL_shuffle_mask)))]>; + (v8i8 (mmx_unpckl VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKL_shuffle_mask)))]>; + (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)), - MMX_UNPCKL_shuffle_mask)))]>; + (v4i16 (mmx_unpckl VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)))))]>; def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, - MMX_UNPCKL_shuffle_mask)))]>; + (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)), - MMX_UNPCKL_shuffle_mask)))]>; + (v2i32 (mmx_unpckl VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)))))]>; } // -- Pack Instructions @@ -411,17 +402,13 @@ (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle - VR64:$src1, (undef), - MMX_PSHUFW_shuffle_mask:$src2)))]>; + (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (vector_shuffle - (bc_v4i16 (load_mmx addr:$src1)), - (undef), - MMX_PSHUFW_shuffle_mask:$src2)))]>; + (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), + (undef)))]>; // -- Conversion Instructions let neverHasSideEffects = 1 in { @@ -627,34 +614,27 @@ // Patterns to perform canonical versions of vector shuffling. let AddedComplexity = 10 in { - def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKL_v_undef_shuffle_mask)), + def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKL_v_undef_shuffle_mask)), + def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKL_v_undef_shuffle_mask)), + def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; } let AddedComplexity = 10 in { - def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKH_v_undef_shuffle_mask)), + def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKH_v_undef_shuffle_mask)), + def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), - MMX_UNPCKH_v_undef_shuffle_mask)), + def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } // Patterns to perform vector shuffling with a zeroed out vector. let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))), - MMX_UNPCKL_shuffle_mask)), + def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, + (v2i32 (scalar_to_vector (load_mmx addr:$src))))), (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; } Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Apr 27 13:41:29 2009 @@ -175,102 +175,107 @@ // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, // SHUFP* etc. imm. -def SHUFFLE_get_shuf_imm : SDNodeXForm; // SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to // PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm; // SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to // PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm; -def SSE_splat_mask : PatLeaf<(build_vector), [{ - return X86::isSplatMask(N); -}], SHUFFLE_get_shuf_imm>; - -def SSE_splat_lo_mask : PatLeaf<(build_vector), [{ - return X86::isSplatLoMask(N); +def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast(N); + return SVOp->isSplat() && SVOp->getSplatIndex() == 0; }]>; -def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVDDUPMask(N); +def movddup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVDDUPMask(cast(N)); }]>; -def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVHLPSMask(N); +def movhlps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPSMask(cast(N)); }]>; -def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVHLPS_v_undef_Mask(N); +def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPS_v_undef_Mask(cast(N)); }]>; -def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVHPMask(N); +def movhp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHPMask(cast(N)); }]>; -def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVLPMask(N); +def movlp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLPMask(cast(N)); }]>; -def MOVL_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVLMask(N); +def movl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLMask(cast(N)); }]>; -def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVSHDUPMask(N); +def movshdup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSHDUPMask(cast(N)); }]>; -def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVSLDUPMask(N); +def movsldup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSLDUPMask(cast(N)); }]>; -def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKLMask(N); +def unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast(N)); }]>; -def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKHMask(N); +def unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast(N)); }]>; -def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKL_v_undef_Mask(N); +def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast(N)); }]>; -def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKH_v_undef_Mask(N); +def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast(N)); }]>; -def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFDMask(N); -}], SHUFFLE_get_shuf_imm>; - -def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFHWMask(N); -}], SHUFFLE_get_pshufhw_imm>; - -def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFLWMask(N); -}], SHUFFLE_get_pshuflw_imm>; - -def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isPSHUFDMask(N); +def pshufd : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast(N)); }], SHUFFLE_get_shuf_imm>; -def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isSHUFPMask(N); +def shufp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isSHUFPMask(cast(N)); }], SHUFFLE_get_shuf_imm>; -def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isSHUFPMask(N); -}], SHUFFLE_get_shuf_imm>; +def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFHWMask(cast(N)); +}], SHUFFLE_get_pshufhw_imm>; +def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFLWMask(cast(N)); +}], SHUFFLE_get_pshuflw_imm>; //===----------------------------------------------------------------------===// // SSE scalar FP Instructions @@ -704,16 +709,14 @@ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), - MOVLP_shuffle_mask)))]>; + (movlp VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; def MOVHPSrm : PSI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), - MOVHP_shuffle_mask)))]>; + (movhp VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -728,29 +731,25 @@ def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (vector_shuffle - (bc_v2f64 (v4f32 VR128:$src)), (undef), - UNPCKH_shuffle_mask)), (iPTR 0))), - addr:$dst)]>; + (unpckh (bc_v2f64 (v4f32 VR128:$src)), + (undef)), (iPTR 0))), addr:$dst)]>; let Constraints = "$src1 = $dst" in { let AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHP_shuffle_mask)))]>; + (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHLPS_shuffle_mask)))]>; + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" let AddedComplexity = 20 in -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)), +def : Pat<(v4f32 (movddup VR128:$src, (undef))), (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; @@ -908,51 +907,41 @@ let isConvertibleToThreeAddress = 1 in // Convert to pshufd def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2, i32i8imm:$src3), + VR128:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, VR128:$src2, - SHUFP_shuffle_mask:$src3)))]>; + (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i32i8imm:$src3), + f128mem:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, (memopv4f32 addr:$src2), - SHUFP_shuffle_mask:$src3)))]>; + (v4f32 (shufp:$src3 + VR128:$src1, (memopv4f32 addr:$src2))))]>; let AddedComplexity = 10 in { def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, (memopv4f32 addr:$src2), - UNPCKH_shuffle_mask)))]>; + (v4f32 (unpckh VR128:$src1, + (memopv4f32 addr:$src2))))]>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle - VR128:$src1, (memopv4f32 addr:$src2), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1044,8 +1033,7 @@ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)))]>; + (v4f32 (movl VR128:$src1, VR128:$src2)))]>; } // Move to lower bits of a VR128 and zeroing upper bits. @@ -1451,16 +1439,14 @@ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)), - MOVLP_shuffle_mask)))]>; + (v2f64 (movlp VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))]>; def MOVHPDrm : PDI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)), - MOVHP_shuffle_mask)))]>; + (v2f64 (movhp VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1474,9 +1460,8 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (vector_shuffle VR128:$src, (undef), - UNPCKH_shuffle_mask)), (iPTR 0))), - addr:$dst)]>; + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>; // SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -1744,48 +1729,39 @@ def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (v2f64 (vector_shuffle - VR128:$src1, VR128:$src2, - SHUFP_shuffle_mask:$src3)))]>; + [(set VR128:$dst, + (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, (memopv2f64 addr:$src2), - SHUFP_shuffle_mask:$src3)))]>; + (v2f64 (shufp:$src3 + VR128:$src1, (memopv2f64 addr:$src2))))]>; let AddedComplexity = 10 in { def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, (memopv2f64 addr:$src2), - UNPCKH_shuffle_mask)))]>; + (v2f64 (unpckh VR128:$src1, + (memopv2f64 addr:$src2))))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle - VR128:$src1, (memopv2f64 addr:$src2), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -2043,49 +2019,43 @@ def PSHUFDri : PDIi8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (vector_shuffle - VR128:$src1, (undef), - PSHUFD_shuffle_mask:$src2)))]>; + [(set VR128:$dst, (v4i32 (pshufd:$src2 + VR128:$src1, (undef))))]>; def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (vector_shuffle + [(set VR128:$dst, (v4i32 (pshufd:$src2 (bc_v4i32(memopv2i64 addr:$src1)), - (undef), - PSHUFD_shuffle_mask:$src2)))]>; + (undef))))]>; // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - VR128:$src1, (undef), - PSHUFHW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, + (undef))))]>, XS, Requires<[HasSSE2]>; def PSHUFHWmi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef), - PSHUFHW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshufhw:$src2 + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, XS, Requires<[HasSSE2]>; // SSE2 with ImmT == Imm8 and XD prefix. def PSHUFLWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - VR128:$src1, (undef), - PSHUFLW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, + (undef))))]>, XD, Requires<[HasSSE2]>; def PSHUFLWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2), + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (vector_shuffle - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef), - PSHUFLW_shuffle_mask:$src2)))]>, + [(set VR128:$dst, (v8i16 (pshuflw:$src2 + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, XD, Requires<[HasSSE2]>; @@ -2094,107 +2064,91 @@ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2))))]>; def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2))))]>; def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), - UNPCKL_shuffle_mask)))]>; + (unpckl VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>; def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, - (memopv2i64 addr:$src2), - UNPCKL_shuffle_mask)))]>; + (v2i64 (unpckl VR128:$src1, + (memopv2i64 addr:$src2))))]>; def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (vector_shuffle VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)), - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, + (unpckh VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2))))]>; def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (vector_shuffle VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)), - UNPCKH_shuffle_mask)))]>; + (unpckh VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2))))]>; def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (vector_shuffle VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), - UNPCKH_shuffle_mask)))]>; + (unpckh VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>; def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (vector_shuffle VR128:$src1, - (memopv2i64 addr:$src2), - UNPCKH_shuffle_mask)))]>; + (v2i64 (unpckh VR128:$src1, + (memopv2i64 addr:$src2))))]>; } // Extract / Insert @@ -2357,8 +2311,7 @@ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movsd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)))]>; + (v2f64 (movl VR128:$src1, VR128:$src2)))]>; } // Store / copy lower 64-bits of a XMM register. @@ -2449,44 +2402,35 @@ // Move Instructions def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - VR128:$src, (undef), - MOVSHDUP_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (movshdup + VR128:$src, (undef))))]>; def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - (memopv4f32 addr:$src), (undef), - MOVSHDUP_shuffle_mask)))]>; + [(set VR128:$dst, (movshdup + (memopv4f32 addr:$src), (undef)))]>; def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - VR128:$src, (undef), - MOVSLDUP_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (movsldup + VR128:$src, (undef))))]>; def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle - (memopv4f32 addr:$src), (undef), - MOVSLDUP_shuffle_mask)))]>; + [(set VR128:$dst, (movsldup + (memopv4f32 addr:$src), (undef)))]>; def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movddup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src, (undef), - MOVDDUP_shuffle_mask)))]>; + [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "movddup\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (vector_shuffle - (scalar_to_vector (loadf64 addr:$src)), - (undef), MOVDDUP_shuffle_mask)))]>; - -def : Pat<(vector_shuffle - (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef), MOVDDUP_shuffle_mask), + (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), + (undef))))]>; + +def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(vector_shuffle - (memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask), +def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -2555,22 +2499,18 @@ // vector_shuffle v1, <1, 1, 3, 3> let AddedComplexity = 15 in -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - MOVSHDUP_shuffle_mask)), +def : Pat<(v4i32 (movshdup VR128:$src, (undef))), (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in -def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), - MOVSHDUP_shuffle_mask)), +def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; // vector_shuffle v1, <0, 0, 2, 2> let AddedComplexity = 15 in - def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - MOVSLDUP_shuffle_mask)), + def : Pat<(v4i32 (movsldup VR128:$src, (undef))), (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in - def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), - MOVSLDUP_shuffle_mask)), + def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; //===----------------------------------------------------------------------===// @@ -2911,207 +2851,173 @@ // Splat v2f64 / v2i64 let AddedComplexity = 10 in { -def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), +def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), +def : Pat<(unpckh (v2f64 VR128:$src), (undef)), (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), +def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), +def : Pat<(unpckh (v2i64 VR128:$src), (undef)), (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // Special unary SHUFPSrri case. -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), - SHUFP_unary_shuffle_mask:$sm)), - (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, +def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; +let AddedComplexity = 5 in +def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), + (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, + Requires<[HasSSE2]>; +// Special unary SHUFPDrri case. +def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, + Requires<[HasSSE2]>; // Special unary SHUFPDrri case. -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef), - SHUFP_unary_shuffle_mask:$sm)), - (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, +def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. -def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef), - SHUFP_unary_shuffle_mask:$sm), - (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>, +def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), + (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[HasSSE2]>; // Special binary v4i32 shuffle cases with SHUFPS. -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2), - PSHUFD_binary_shuffle_mask:$sm)), - (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>, +def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)), - (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>, +def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), + (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Special binary v2i64 shuffle cases using SHUFPDrri. -def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - SHUFP_shuffle_mask:$sm)), - (SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>, +def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef), - SHUFP_unary_shuffle_mask:$sm)), - (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, - Requires<[HasSSE2]>; // vector_shuffle v1, , <0, 0, 1, 1, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKL_v_undef_shuffle_mask)), +def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // vector_shuffle v1, , <2, 2, 3, 3, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask:$sm)), - (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, +def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), + (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), - UNPCKH_v_undef_shuffle_mask)), +def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHP_shuffle_mask)), +def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVHLPS_shuffle_mask)), +def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), (MOVHLPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), - MOVHLPS_v_undef_shuffle_mask)), +def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), (MOVHLPSrr VR128:$src1, VR128:$src1)>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef), - MOVHLPS_v_undef_shuffle_mask)), +def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), (MOVHLPSrr VR128:$src1, VR128:$src1)>; } let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS // vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVLP_shuffle_mask)), +def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), - MOVHP_shuffle_mask)), +def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS // (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS -def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (vector_shuffle - (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVLP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (vector_shuffle - (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, - MOVHP_shuffle_mask)), addr:$src1), +def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)), +def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, - MOVL_shuffle_mask)), +def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; // vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) -def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVLP_shuffle_mask)), +def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, - MOVLP_shuffle_mask)), +def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; } // Set lowest element and zero upper elements. let AddedComplexity = 15 in -def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src, - MOVL_shuffle_mask)), +def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; Modified: llvm/trunk/test/CodeGen/Generic/vector-casts.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/vector-casts.ll?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Generic/vector-casts.ll (original) +++ llvm/trunk/test/CodeGen/Generic/vector-casts.ll Mon Apr 27 13:41:29 2009 @@ -1,45 +1,9 @@ ; RUN: llvm-as < %s | llc ; PR2671 -define void @a(<2 x double>* %p, <2 x i8>* %q) { - %t = load <2 x double>* %p - %r = fptosi <2 x double> %t to <2 x i8> - store <2 x i8> %r, <2 x i8>* %q - ret void -} -define void @b(<2 x double>* %p, <2 x i8>* %q) { - %t = load <2 x double>* %p - %r = fptoui <2 x double> %t to <2 x i8> - store <2 x i8> %r, <2 x i8>* %q - ret void -} -define void @c(<2 x i8>* %p, <2 x double>* %q) { - %t = load <2 x i8>* %p - %r = sitofp <2 x i8> %t to <2 x double> - store <2 x double> %r, <2 x double>* %q - ret void -} -define void @d(<2 x i8>* %p, <2 x double>* %q) { - %t = load <2 x i8>* %p - %r = uitofp <2 x i8> %t to <2 x double> - store <2 x double> %r, <2 x double>* %q - ret void -} -define void @e(<2 x i8>* %p, <2 x i16>* %q) { - %t = load <2 x i8>* %p - %r = sext <2 x i8> %t to <2 x i16> - store <2 x i16> %r, <2 x i16>* %q - ret void -} -define void @f(<2 x i8>* %p, <2 x i16>* %q) { - %t = load <2 x i8>* %p - %r = zext <2 x i8> %t to <2 x i16> - store <2 x i16> %r, <2 x i16>* %q - ret void -} define void @g(<2 x i16>* %p, <2 x i8>* %q) { - %t = load <2 x i16>* %p - %r = trunc <2 x i16> %t to <2 x i8> - store <2 x i8> %r, <2 x i8>* %q - ret void + %t = load <2 x i16>* %p + %r = trunc <2 x i16> %t to <2 x i8> + store <2 x i8> %r, <2 x i8>* %q + ret void } Modified: llvm/trunk/test/CodeGen/X86/vec_clear.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_clear.ll?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_clear.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_clear.ll Mon Apr 27 13:41:29 2009 @@ -1,5 +1,7 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep and -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | grep psrldq +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f +; RUN: not grep and %t +; RUN: not grep psrldq %t +; RUN: grep xorps %t define <4 x float> @test(<4 x float>* %v1) nounwind { %tmp = load <4 x float>* %v1 ; <<4 x float>> [#uses=1] Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-10.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-10.ll?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-10.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-10.ll Mon Apr 27 13:41:29 2009 @@ -1,9 +1,7 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ -; RUN: grep unpcklps | count 1 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ -; RUN: grep unpckhps | count 1 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ -; RUN: not grep {sub.*esp} +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f +; RUN: grep unpcklps %t | count 1 +; RUN: grep pshufd %t | count 1 +; RUN: not grep {sub.*esp} %t define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) { %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-16.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-16.ll?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-16.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-16.ll Mon Apr 27 13:41:29 2009 @@ -1,8 +1,10 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 | grep shufps | count 4 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | grep mov | count 2 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 4 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shufps -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f +; RUN: grep shufps %t | count 4 +; RUN: grep movaps %t | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f +; RUN: grep pshufd %t | count 4 +; RUN: not grep shufps %t +; RUN: not grep mov %t define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind { %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-30.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-30.ll?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-30.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-30.ll Mon Apr 27 13:41:29 2009 @@ -1,8 +1,7 @@ ; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f ; RUN: grep pshufhw %t | grep 161 | count 1 -; RUN: grep pslldq %t | count 1 - - +; RUN: grep shufps %t | count 1 +; RUN: not grep pslldq %t ; Test case when creating pshufhw, we incorrectly set the higher order bit ; for an undef, @@ -20,4 +19,4 @@ %0 = shufflevector <4 x i32> %in, <4 x i32> , <4 x i32> < i32 undef, i32 5, i32 undef, i32 2> store <4 x i32> %0, <4 x i32>* %dest ret void -} \ No newline at end of file +} Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-31.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-31.ll?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-31.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-31.ll Mon Apr 27 13:41:29 2009 @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f ; RUN: grep pextrw %t | count 1 -; RUN: grep punpcklqdq %t | count 1 +; RUN: grep movlhps %t | count 1 ; RUN: grep pshufhw %t | count 1 ; RUN: grep pinsrw %t | count 1 ; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f Modified: llvm/trunk/utils/TableGen/CodeGenDAGPatterns.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenDAGPatterns.cpp?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenDAGPatterns.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenDAGPatterns.cpp Mon Apr 27 13:41:29 2009 @@ -194,10 +194,6 @@ ConstraintType = SDTCisOpSmallerThanOp; x.SDTCisOpSmallerThanOp_Info.BigOperandNum = R->getValueAsInt("BigOperandNum"); - } else if (R->isSubClassOf("SDTCisIntVectorOfSameSize")) { - ConstraintType = SDTCisIntVectorOfSameSize; - x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum = - R->getValueAsInt("OtherOpNum"); } else if (R->isSubClassOf("SDTCisEltOfVec")) { ConstraintType = SDTCisEltOfVec; x.SDTCisEltOfVec_Info.OtherOperandNum = @@ -365,23 +361,9 @@ } return MadeChange; } - case SDTCisIntVectorOfSameSize: { - TreePatternNode *OtherOperand = - getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum, - N, NumResults); - if (OtherOperand->hasTypeSet()) { - if (!isVector(OtherOperand->getTypeNum(0))) - TP.error(N->getOperator()->getName() + " VT operand must be a vector!"); - MVT IVT = OtherOperand->getTypeNum(0); - unsigned NumElements = IVT.getVectorNumElements(); - IVT = MVT::getIntVectorWithNumElements(NumElements); - return NodeToApply->UpdateNodeType(IVT.getSimpleVT(), TP); - } - return false; - } case SDTCisEltOfVec: { TreePatternNode *OtherOperand = - getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum, + getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NumResults); if (OtherOperand->hasTypeSet()) { if (!isVector(OtherOperand->getTypeNum(0))) @@ -925,25 +907,6 @@ if (NI.getNumResults() == 0) MadeChange |= UpdateNodeType(MVT::isVoid, TP); - // If this is a vector_shuffle operation, apply types to the build_vector - // operation. The types of the integers don't matter, but this ensures they - // won't get checked. - if (getOperator()->getName() == "vector_shuffle" && - getChild(2)->getOperator()->getName() == "build_vector") { - TreePatternNode *BV = getChild(2); - const std::vector &LegalVTs - = CDP.getTargetInfo().getLegalValueTypes(); - MVT::SimpleValueType LegalIntVT = MVT::Other; - for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i) - if (isInteger(LegalVTs[i]) && !isVector(LegalVTs[i])) { - LegalIntVT = LegalVTs[i]; - break; - } - assert(LegalIntVT != MVT::Other && "No legal integer VT?"); - - for (unsigned i = 0, e = BV->getNumChildren(); i != e; ++i) - MadeChange |= BV->getChild(i)->UpdateNodeType(LegalIntVT, TP); - } return MadeChange; } else if (getOperator()->isSubClassOf("Instruction")) { const DAGInstruction &Inst = CDP.getInstruction(getOperator()); @@ -2086,7 +2049,7 @@ IterateInference |= Result->getTree(0)-> UpdateNodeType(Pattern->getTree(0)->getExtTypes(), *Result); } while (IterateInference); - + // Verify that we inferred enough types that we can do something with the // pattern and result. If these fire the user has to add type casts. if (!InferredAllPatternTypes) Modified: llvm/trunk/utils/TableGen/CodeGenDAGPatterns.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenDAGPatterns.h?rev=70225&r1=70224&r2=70225&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenDAGPatterns.h (original) +++ llvm/trunk/utils/TableGen/CodeGenDAGPatterns.h Mon Apr 27 13:41:29 2009 @@ -62,8 +62,7 @@ unsigned OperandNo; // The operand # this constraint applies to. enum { SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisSameAs, - SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisIntVectorOfSameSize, - SDTCisEltOfVec + SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec } ConstraintType; union { // The discriminated union. @@ -81,9 +80,6 @@ } SDTCisOpSmallerThanOp_Info; struct { unsigned OtherOperandNum; - } SDTCisIntVectorOfSameSize_Info; - struct { - unsigned OtherOperandNum; } SDTCisEltOfVec_Info; } x; From natebegeman at mac.com Mon Apr 27 13:42:40 2009 From: natebegeman at mac.com (Nate Begeman) Date: Mon, 27 Apr 2009 18:42:40 -0000 Subject: [llvm-commits] [llvm] r70226 - /llvm/trunk/test/CodeGen/Generic/vector-casts.ll Message-ID: <200904271842.n3RIgfiN015634@zion.cs.uiuc.edu> Author: sampo Date: Mon Apr 27 13:42:40 2009 New Revision: 70226 URL: http://llvm.org/viewvc/llvm-project?rev=70226&view=rev Log: Revert accidental testcase reduction Modified: llvm/trunk/test/CodeGen/Generic/vector-casts.ll Modified: llvm/trunk/test/CodeGen/Generic/vector-casts.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/vector-casts.ll?rev=70226&r1=70225&r2=70226&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Generic/vector-casts.ll (original) +++ llvm/trunk/test/CodeGen/Generic/vector-casts.ll Mon Apr 27 13:42:40 2009 @@ -1,9 +1,45 @@ ; RUN: llvm-as < %s | llc ; PR2671 +define void @a(<2 x double>* %p, <2 x i8>* %q) { + %t = load <2 x double>* %p + %r = fptosi <2 x double> %t to <2 x i8> + store <2 x i8> %r, <2 x i8>* %q + ret void +} +define void @b(<2 x double>* %p, <2 x i8>* %q) { + %t = load <2 x double>* %p + %r = fptoui <2 x double> %t to <2 x i8> + store <2 x i8> %r, <2 x i8>* %q + ret void +} +define void @c(<2 x i8>* %p, <2 x double>* %q) { + %t = load <2 x i8>* %p + %r = sitofp <2 x i8> %t to <2 x double> + store <2 x double> %r, <2 x double>* %q + ret void +} +define void @d(<2 x i8>* %p, <2 x double>* %q) { + %t = load <2 x i8>* %p + %r = uitofp <2 x i8> %t to <2 x double> + store <2 x double> %r, <2 x double>* %q + ret void +} +define void @e(<2 x i8>* %p, <2 x i16>* %q) { + %t = load <2 x i8>* %p + %r = sext <2 x i8> %t to <2 x i16> + store <2 x i16> %r, <2 x i16>* %q + ret void +} +define void @f(<2 x i8>* %p, <2 x i16>* %q) { + %t = load <2 x i8>* %p + %r = zext <2 x i8> %t to <2 x i16> + store <2 x i16> %r, <2 x i16>* %q + ret void +} define void @g(<2 x i16>* %p, <2 x i8>* %q) { - %t = load <2 x i16>* %p - %r = trunc <2 x i16> %t to <2 x i8> - store <2 x i8> %r, <2 x i8>* %q - ret void + %t = load <2 x i16>* %p + %r = trunc <2 x i16> %t to <2 x i8> + store <2 x i8> %r, <2 x i8>* %q + ret void } From dalej at apple.com Mon Apr 27 14:13:44 2009 From: dalej at apple.com (Dale Johannesen) Date: Mon, 27 Apr 2009 19:13:44 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r70231 - /llvm-gcc-4.2/trunk/gcc/objc/objc-act.c Message-ID: <200904271913.n3RJDic7016626@zion.cs.uiuc.edu> Author: johannes Date: Mon Apr 27 14:13:43 2009 New Revision: 70231 URL: http://llvm.org/viewvc/llvm-project?rev=70231&view=rev Log: Fix a layout difference with gcc. 6817146. Modified: llvm-gcc-4.2/trunk/gcc/objc/objc-act.c Modified: llvm-gcc-4.2/trunk/gcc/objc/objc-act.c URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/objc/objc-act.c?rev=70231&r1=70230&r2=70231&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/objc/objc-act.c (original) +++ llvm-gcc-4.2/trunk/gcc/objc/objc-act.c Mon Apr 27 14:13:43 2009 @@ -1018,6 +1018,23 @@ objc_method_optional_flag = 0; } +#ifdef ENABLE_LLVM +/* Return size in bits this class occupies when used as a base class. */ +static int realClassSize(tree class) +{ + unsigned int instanceSize = 0; + tree field = TYPE_FIELDS (class); + while (field && TREE_CHAIN (field) + && TREE_CODE (TREE_CHAIN (field)) == FIELD_DECL) + field = TREE_CHAIN (field); + + if (field && TREE_CODE (field) == FIELD_DECL) + instanceSize = int_byte_position (field) * BITS_PER_UNIT + + tree_low_cst (DECL_SIZE (field), 0); + return instanceSize; +} +#endif + void objc_start_class_implementation (tree class, tree super_class) { @@ -1044,18 +1061,24 @@ /* If we have an embedded base class, and its size doesn't match the size in the field node, that's because ivars were added to the base class after the field node was built. We need to update the field - node and re-layout the outer record. */ + node and re-layout the outer record. + Note that we can't rely on the size in the TYPE_SIZE node of + the embedded base class type, it is wrong for some cases + involving bitfields (!) */ if (DECL_ARTIFICIAL (field) && !DECL_NAME (field) && TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE - && DECL_SIZE (field) && TYPE_SIZE(TREE_TYPE(field)) - && TREE_CODE (DECL_SIZE (field)) == INTEGER_CST - && TREE_CODE (TYPE_SIZE (TREE_TYPE (field))) == INTEGER_CST - && TREE_INT_CST_LOW (DECL_SIZE (field)) - != TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (field)))) + && DECL_SIZE (field) + && TREE_CODE (DECL_SIZE (field)) == INTEGER_CST) { - DECL_SIZE (field) = TYPE_SIZE (TREE_TYPE (field)); - DECL_SIZE_UNIT (field) = TYPE_SIZE_UNIT (TREE_TYPE (field)); - changed = true; + unsigned int realSize = realClassSize(TREE_TYPE(field)); + if (realSize && + TREE_INT_CST_LOW (DECL_SIZE (field)) != realSize) + { + DECL_SIZE (field) = build_int_cst(bitsizetype, realSize); + DECL_SIZE_UNIT (field) = + build_int_cst(sizetype, realSize / BITS_PER_UNIT); + changed = true; + } } } if (changed) From dalej at apple.com Mon Apr 27 14:15:09 2009 From: dalej at apple.com (Dale Johannesen) Date: Mon, 27 Apr 2009 19:15:09 -0000 Subject: [llvm-commits] [llvm] r70233 - /llvm/trunk/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m Message-ID: <200904271915.n3RJF9Zb016689@zion.cs.uiuc.edu> Author: johannes Date: Mon Apr 27 14:15:09 2009 New Revision: 70233 URL: http://llvm.org/viewvc/llvm-project?rev=70233&view=rev Log: Test for (llvm-gcc) 70231. Added: llvm/trunk/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m Added: llvm/trunk/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m?rev=70233&view=auto ============================================================================== --- llvm/trunk/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m (added) +++ llvm/trunk/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m Mon Apr 27 14:15:09 2009 @@ -0,0 +1,44 @@ +// RUN: %llvmgcc -S -x objective-c -m64 -fobjc-abi-version=2 %s -o %t +// RUN: grep {OBJC_CLASS_RO_\\\$_I4} %t | grep {i32 0, i32 1, i32 2, i32 0} +// RUN: grep {OBJC_CLASS_RO_\\\$_I2} %t | grep {i32 0, i32 1, i32 1, i32 0} +// RUN: grep {OBJC_CLASS_RO_\\\$_I5} %t | grep {i32 0, i32 0, i32 0, i32 0} +// XTARGETS: darwin + +// Test instance variable sizing when base class ends in bitfield + at interface I3 { + unsigned int _iv2 :1; +} + at end + + at interface I4 : I3 { + char _iv4; +} + at end + +// Test case with no instance variables in derived class + at interface I1 { + unsigned int _iv2 :1; +} + at end + + at interface I2 : I1 { +} + at end + +// Test case with no instance variables anywhere + at interface I6 { +} + at end + + at interface I5 : I6 { +} + at end + + at implementation I4 + at end + + at implementation I2 + at end + + at implementation I5 + at end From baldrick at free.fr Mon Apr 27 14:33:05 2009 From: baldrick at free.fr (Duncan Sands) Date: Mon, 27 Apr 2009 19:33:05 -0000 Subject: [llvm-commits] [llvm] r70234 - in /llvm/trunk/lib/CodeGen/SelectionDAG: LegalizeDAG.cpp LegalizeTypes.cpp LegalizeTypes.h LegalizeVectorTypes.cpp Message-ID: <200904271933.n3RJX59P017484@zion.cs.uiuc.edu> Author: baldrick Date: Mon Apr 27 14:33:03 2009 New Revision: 70234 URL: http://llvm.org/viewvc/llvm-project?rev=70234&view=rev Log: Now that PR2957 is resolved, remove a bunch of no-longer needed workarounds. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=70234&r1=70233&r2=70234&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Mon Apr 27 14:33:03 2009 @@ -489,10 +489,8 @@ // If the type legalizer was run then we should never see any illegal result // types here except for target constants (the type legalizer does not touch // those) or for build vector used as a mask for a vector shuffle. - // FIXME: We can removed the BUILD_VECTOR case when we fix PR2957. assert((TypesNeedLegalizing || getTypeAction(VT) == Legal || - IsLegalizingCallArgs || Op.getOpcode() == ISD::TargetConstant || - Op.getOpcode() == ISD::BUILD_VECTOR) && + IsLegalizingCallArgs || Op.getOpcode() == ISD::TargetConstant) && "Illegal type introduced after type legalization?"); switch (getTypeAction(VT)) { default: assert(0 && "Bad type action!"); Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp?rev=70234&r1=70233&r2=70234&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp Mon Apr 27 14:33:03 2009 @@ -116,11 +116,8 @@ cerr << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { - // FIXME: Because of PR2957, the build vector can be placed on this - // list but if the associated vector shuffle is split, the build vector - // can also be split so we allow this to go through for now. - if (Mapped > 1 && Res.getOpcode() != ISD::BUILD_VECTOR) { + } else if (isTypeLegal(Res.getValueType())) { + if (Mapped > 1) { cerr << "Value with legal type was transformed!"; Failed = true; } @@ -266,13 +263,6 @@ if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - if (N->getOpcode() == ISD::VECTOR_SHUFFLE && i == 2) { - // The shuffle mask doesn't need to be a legal vector type. - // FIXME: We can remove this once we fix PR2957. - SetIgnoredNodeResult(N->getOperand(2).getNode()); - continue; - } - MVT OpVT = N->getOperand(i).getValueType(); switch (getTypeAction(OpVT)) { default: @@ -842,10 +832,6 @@ OpEntry = Result; } -// Set to ignore result -void DAGTypeLegalizer::SetIgnoredNodeResult(SDNode* N) { - IgnoredNodesResultsSet.insert(N); -} //===----------------------------------------------------------------------===// // Utilities. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=70234&r1=70233&r2=70234&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Mon Apr 27 14:33:03 2009 @@ -114,16 +114,10 @@ } /// IgnoreNodeResults - Pretend all of this node's results are legal. - /// FIXME: Remove once PR2957 is done. bool IgnoreNodeResults(SDNode *N) const { - return N->getOpcode() == ISD::TargetConstant || - IgnoredNodesResultsSet.count(N); + return N->getOpcode() == ISD::TargetConstant; } - /// IgnoredNode - Set of nodes whose result don't need to be legal. - /// FIXME: Remove once PR2957 is done. - DenseSet IgnoredNodesResultsSet; - /// PromotedIntegers - For integer nodes that are below legal width, this map /// indicates what promoted value to use. DenseMap PromotedIntegers; @@ -202,7 +196,6 @@ SDValue PromoteTargetBoolean(SDValue Bool, MVT VT); void ReplaceValueWith(SDValue From, SDValue To); void ReplaceValueWithHelper(SDValue From, SDValue To); - void SetIgnoredNodeResult(SDNode* N); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT, SDValue &Lo, SDValue &Hi); @@ -577,7 +570,6 @@ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); - SDValue SplitVecOp_VECTOR_SHUFFLE(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=70234&r1=70233&r2=70234&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Mon Apr 27 14:33:03 2009 @@ -910,7 +910,6 @@ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); break; - case ISD::VECTOR_SHUFFLE: Res = SplitVecOp_VECTOR_SHUFFLE(N, OpNo);break; case ISD::CTTZ: case ISD::CTLZ: @@ -1073,67 +1072,6 @@ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } -SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_SHUFFLE(SDNode *N, unsigned OpNo) { - assert(OpNo == 2 && "Shuffle source type differs from result type?"); - SDValue Mask = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); - unsigned MaskLength = Mask.getValueType().getVectorNumElements(); - unsigned LargestMaskEntryPlusOne = 2 * MaskLength; - unsigned MinimumBitWidth = Log2_32_Ceil(LargestMaskEntryPlusOne); - - // Look for a legal vector type to place the mask values in. - // Note that there may not be *any* legal vector-of-integer - // type for which the element type is legal! - for (MVT::SimpleValueType EltVT = MVT::FIRST_INTEGER_VALUETYPE; - EltVT <= MVT::LAST_INTEGER_VALUETYPE; - // Integer values types are consecutively numbered. Exploit this. - EltVT = MVT::SimpleValueType(EltVT + 1)) { - - // Is the element type big enough to hold the values? - if (MVT(EltVT).getSizeInBits() < MinimumBitWidth) - // Nope. - continue; - - // Is the vector type legal? - MVT VecVT = MVT::getVectorVT(EltVT, MaskLength); - if (!isTypeLegal(VecVT)) - // Nope. - continue; - - // If the element type is not legal, find a larger legal type to use for - // the BUILD_VECTOR operands. This is an ugly hack, but seems to work! - // FIXME: The real solution is to change VECTOR_SHUFFLE into a variadic - // node where the shuffle mask is a list of integer operands, #2 .. #2+n. - for (MVT::SimpleValueType OpVT = EltVT; OpVT <= MVT::LAST_INTEGER_VALUETYPE; - // Integer values types are consecutively numbered. Exploit this. - OpVT = MVT::SimpleValueType(OpVT + 1)) { - if (!isTypeLegal(OpVT)) - continue; - - // Success! Rebuild the vector using the legal types. - SmallVector Ops(MaskLength); - for (unsigned i = 0; i < MaskLength; ++i) { - SDValue Arg = Mask.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - Ops[i] = DAG.getUNDEF(OpVT); - } else { - uint64_t Idx = cast(Arg)->getZExtValue(); - Ops[i] = DAG.getConstant(Idx, OpVT); - } - } - return DAG.UpdateNodeOperands(SDValue(N,0), - N->getOperand(0), N->getOperand(1), - DAG.getNode(ISD::BUILD_VECTOR, dl, - VecVT, &Ops[0], Ops.size())); - } - - // Continuing is pointless - failure is certain. - break; - } - assert(false && "Failed to find an appropriate mask type!"); - return SDValue(N, 0); -} - //===----------------------------------------------------------------------===// // Result Vector Widening From resistor at mac.com Mon Apr 27 14:55:47 2009 From: resistor at mac.com (Owen Anderson) Date: Mon, 27 Apr 2009 19:55:47 -0000 Subject: [llvm-commits] [llvm] r70238 - /llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Message-ID: <200904271955.n3RJtl0i018527@zion.cs.uiuc.edu> Author: resistor Date: Mon Apr 27 14:55:47 2009 New Revision: 70238 URL: http://llvm.org/viewvc/llvm-project?rev=70238&view=rev Log: Don't skip the CopyMI when removing kill markers. This should have no effect on generated code, but makes the intermediate state of the coalescer more sane. Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=70238&r1=70237&r2=70238&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original) +++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Mon Apr 27 14:55:47 2009 @@ -772,8 +772,6 @@ if (UseMO.isKill()) { MachineInstr *UseMI = UseMO.getParent(); unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); - if (JoinedCopies.count(UseMI)) - continue; const LiveRange *UI = LI.getLiveRangeContaining(UseIdx); if (!UI || !LI.isKill(UI->valno, UseIdx+1)) UseMO.setIsKill(false); From sabre at nondot.org Mon Apr 27 15:04:09 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 27 Apr 2009 20:04:09 -0000 Subject: [llvm-commits] [llvm] r70239 - in /llvm/trunk: include/llvm/Bitcode/BitstreamReader.h tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Message-ID: <200904272004.n3RK495X018801@zion.cs.uiuc.edu> Author: lattner Date: Mon Apr 27 15:04:08 2009 New Revision: 70239 URL: http://llvm.org/viewvc/llvm-project?rev=70239&view=rev Log: give bitstreamreader an API to ignore names for blocks/records, only llvm-bcanalyzer wants this info. Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamReader.h?rev=70239&r1=70238&r2=70239&view=diff ============================================================================== --- llvm/trunk/include/llvm/Bitcode/BitstreamReader.h (original) +++ llvm/trunk/include/llvm/Bitcode/BitstreamReader.h Mon Apr 27 15:04:08 2009 @@ -41,11 +41,19 @@ std::vector BlockInfoRecords; + /// IgnoreBlockInfoNames - This is set to true if we don't care about the + /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer + /// uses this. + bool IgnoreBlockInfoNames; + + BitstreamReader(const BitstreamReader&); // NOT IMPLEMENTED + void operator=(const BitstreamReader&); // NOT IMPLEMENTED public: - BitstreamReader() : FirstChar(0), LastChar(0) { + BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) { } BitstreamReader(const unsigned char *Start, const unsigned char *End) { + IgnoreBlockInfoNames = true; init(Start, End); } @@ -70,6 +78,11 @@ const unsigned char *getFirstChar() const { return FirstChar; } const unsigned char *getLastChar() const { return LastChar; } + /// CollectBlockInfoNames - This is called by clients that want block/record + /// name information. + void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } + bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } + //===--------------------------------------------------------------------===// // Block Manipulation //===--------------------------------------------------------------------===// @@ -598,6 +611,7 @@ break; case bitc::BLOCKINFO_CODE_BLOCKNAME: { if (!CurBlockInfo) return true; + if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. std::string Name; for (unsigned i = 0, e = Record.size(); i != e; ++i) Name += (char)Record[i]; @@ -606,6 +620,7 @@ } case bitc::BLOCKINFO_CODE_SETRECORDNAME: { if (!CurBlockInfo) return true; + if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. std::string Name; for (unsigned i = 1, e = Record.size(); i != e; ++i) Name += (char)Record[i]; Modified: llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp?rev=70239&r1=70238&r2=70239&view=diff ============================================================================== --- llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp (original) +++ llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Mon Apr 27 15:04:08 2009 @@ -464,6 +464,7 @@ BitstreamReader StreamFile(BufPtr, EndBufPtr); BitstreamCursor Stream(StreamFile); + StreamFile.CollectBlockInfoNames(); // Read the stream signature. char Signature[6]; From evan.cheng at apple.com Mon Apr 27 15:14:52 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 27 Apr 2009 13:14:52 -0700 Subject: [llvm-commits] [llvm] r70238 - /llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp In-Reply-To: <200904271955.n3RJtl0i018527@zion.cs.uiuc.edu> References: <200904271955.n3RJtl0i018527@zion.cs.uiuc.edu> Message-ID: <64806CD5-0FAE-4E0B-A1D4-E2AD98440A0B@apple.com> On Apr 27, 2009, at 12:55 PM, Owen Anderson wrote: > Author: resistor > Date: Mon Apr 27 14:55:47 2009 > New Revision: 70238 > > URL: http://llvm.org/viewvc/llvm-project?rev=70238&view=rev > Log: > Don't skip the CopyMI when removing kill markers. > This should have no effect on generated code, but makes the > intermediate state > of the coalescer more sane. Please be sure about this. I assume you ran lots of tests? One of the possible fallout of something like this is it breaks ARM (which uses scavenger). It would be nice if we can just run it as a verification pass. Evan > > > Modified: > llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp > > Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=70238&r1=70237&r2=70238&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original) > +++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Mon Apr 27 > 14:55:47 2009 > @@ -772,8 +772,6 @@ > if (UseMO.isKill()) { > MachineInstr *UseMI = UseMO.getParent(); > unsigned UseIdx = li_->getUseIndex(li_- > >getInstructionIndex(UseMI)); > - if (JoinedCopies.count(UseMI)) > - continue; > const LiveRange *UI = LI.getLiveRangeContaining(UseIdx); > if (!UI || !LI.isKill(UI->valno, UseIdx+1)) > UseMO.setIsKill(false); > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From evan.cheng at apple.com Mon Apr 27 15:15:16 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 27 Apr 2009 13:15:16 -0700 Subject: [llvm-commits] [PATCH] Annotate virtual registers with register class in debug output In-Reply-To: References: Message-ID: I think it's too much clutter. :-( Evan On Apr 27, 2009, at 10:01 AM, Jakob Stoklund Olesen wrote: > I am using this patch when working on the Blackfin backend. Now that > X86 is gaining a decent set of register classes, it might be > generally useful. > > It prints machine code like this: > > %reg1025GR32 = MOV32rr %ESI > %reg1024GR64 = MOV64rr %RDI > %reg1026GR32_ABCD = MOV32rr %reg1025GR32 > %reg1027GR8_ABCD_H = EXTRACT_SUBREG %reg1026GR32_ABCD, 2 > %reg1028GR32_NOREX = MOVZX32_NOREXrr8 %reg1027GR8_ABCD_H > %EAX = MOV32rr %reg1028GR32_NOREX > RET > > What do you think? Too much clutter? > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From gohman at apple.com Mon Apr 27 15:16:15 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 27 Apr 2009 20:16:15 -0000 Subject: [llvm-commits] [llvm] r70241 - in /llvm/trunk: lib/Analysis/ScalarEvolution.cpp test/CodeGen/X86/masked-iv-safe.ll test/CodeGen/X86/masked-iv-unsafe.ll test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll Message-ID: <200904272016.n3RKGFWt019147@zion.cs.uiuc.edu> Author: djg Date: Mon Apr 27 15:16:15 2009 New Revision: 70241 URL: http://llvm.org/viewvc/llvm-project?rev=70241&view=rev Log: Teach getZeroExtendExpr and getSignExtendExpr to use trip-count information to simplify [sz]ext({a,+,b}) to {zext(a),+,[zs]ext(b)}, as appropriate. These functions and the trip count code each call into the other, so this requires careful handling to avoid infinite recursion. During the initial trip count computation, conservative SCEVs are used, which are subsequently discarded once the trip count is actually known. Among other benefits, this change lets LSR automatically eliminate some unnecessary zext-inreg and sext-inreg operation where the operand is an induction variable. Added: llvm/trunk/test/CodeGen/X86/masked-iv-safe.ll llvm/trunk/test/CodeGen/X86/masked-iv-unsafe.ll Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp llvm/trunk/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=70241&r1=70240&r2=70241&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Mon Apr 27 15:16:15 2009 @@ -701,17 +701,81 @@ if (SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty); - // FIXME: If the input value is a chrec scev, and we can prove that the value + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the - // operands (often constants). This would allow analysis of something like + // operands (often constants). This allows analysis of something like // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } + if (SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop()); + if (!isa(BECount)) { + // Compute the extent of AR and divide it by the step value. This is + // used to determine if it's safe to extend the stride value. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedBECount = + getTruncateOrZeroExtend(BECount, Start->getType()); + if (BECount == + getTruncateOrZeroExtend(CastedBECount, BECount->getType())) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + SCEVHandle ZMul = + getMulExpr(CastedBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no unsigned overflow. + if (getZeroExtendExpr(ZMul, WideTy) == + getMulExpr(getZeroExtendExpr(CastedBECount, WideTy), + getZeroExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, ZMul); + if (getZeroExtendExpr(Add, WideTy) == + getAddExpr(getZeroExtendExpr(Start, WideTy), + getZeroExtendExpr(ZMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + AR->getLoop()); + } + + // Similar to above, only this time treat the step value as signed. + // This covers loops that count down. + SCEVHandle SMul = + getMulExpr(CastedBECount, + getTruncateOrSignExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no unsigned overflow. + if (getSignExtendExpr(SMul, WideTy) == + getMulExpr(getZeroExtendExpr(CastedBECount, WideTy), + getSignExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, SMul); + if (getZeroExtendExpr(Add, WideTy) == + getAddExpr(getZeroExtendExpr(Start, WideTy), + getSignExtendExpr(SMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + } SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)]; if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty); return Result; } -SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, const Type *Ty) { +SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); @@ -726,10 +790,54 @@ if (SCEVSignExtendExpr *SS = dyn_cast(Op)) return getSignExtendExpr(SS->getOperand(), Ty); - // FIXME: If the input value is a chrec scev, and we can prove that the value + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the - // operands (often constants). This would allow analysis of something like + // operands (often constants). This allows analysis of something like // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } + if (SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop()); + if (!isa(BECount)) { + // Compute the extent of AR and divide it by the step value. This is + // used to determine if it's safe to extend the stride value. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedBECount = + getTruncateOrZeroExtend(BECount, Start->getType()); + if (BECount == + getTruncateOrZeroExtend(CastedBECount, BECount->getType())) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + SCEVHandle SMul = + getMulExpr(CastedBECount, + getTruncateOrSignExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no signed overflow. + if (getSignExtendExpr(SMul, WideTy) == + getMulExpr(getSignExtendExpr(CastedBECount, WideTy), + getSignExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, SMul); + if (getSignExtendExpr(Add, WideTy) == + getAddExpr(getSignExtendExpr(Start, WideTy), + getSignExtendExpr(SMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + } SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)]; if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty); @@ -1962,20 +2070,36 @@ /// hasLoopInvariantBackedgeTakenCount). /// SCEVHandle ScalarEvolution::getBackedgeTakenCount(const Loop *L) { - std::map::iterator I = BackedgeTakenCounts.find(L); - if (I == BackedgeTakenCounts.end()) { + // Initially insert a CouldNotCompute for this loop. If the insertion + // succeeds, procede to actually compute a backedge-taken count and + // update the value. The temporary CouldNotCompute value tells SCEV + // code elsewhere that it shouldn't attempt to request a new + // backedge-taken count, which could result in infinite recursion. + std::pair::iterator, bool> Pair = + BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + if (Pair.second) { SCEVHandle ItCount = ComputeBackedgeTakenCount(L); - I = BackedgeTakenCounts.insert(std::make_pair(L, ItCount)).first; if (ItCount != UnknownValue) { assert(ItCount->isLoopInvariant(L) && "Computed trip count isn't loop invariant for loop!"); ++NumTripCountsComputed; + + // Now that we know the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they + // are only conservative estimates made without the benefit + // of trip count information. + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast(I); ++I) + deleteValueFromRecords(PN); + + // Update the value in the map. + Pair.first->second = ItCount; } else if (isa(L->getHeader()->begin())) { // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; } } - return I->second; + return Pair.first->second; } /// forgetLoopBackedgeTakenCount - This method should be called by the Added: llvm/trunk/test/CodeGen/X86/masked-iv-safe.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked-iv-safe.ll?rev=70241&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/masked-iv-safe.ll (added) +++ llvm/trunk/test/CodeGen/X86/masked-iv-safe.ll Mon Apr 27 15:16:15 2009 @@ -0,0 +1,244 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: not grep and %t +; RUN: not grep movz %t +; RUN: not grep sar %t +; RUN: not grep shl %t +; RUN: grep add %t | count 6 +; RUN: grep inc %t | count 4 +; RUN: grep dec %t | count 2 +; RUN: grep lea %t | count 2 + +; Optimize away zext-inreg and sext-inreg on the loop induction +; variable using trip-count information. + +define void @count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} Added: llvm/trunk/test/CodeGen/X86/masked-iv-unsafe.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked-iv-unsafe.ll?rev=70241&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/masked-iv-unsafe.ll (added) +++ llvm/trunk/test/CodeGen/X86/masked-iv-unsafe.ll Mon Apr 27 15:16:15 2009 @@ -0,0 +1,386 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep and %t | count 6 +; RUN: grep movzb %t | count 6 +; RUN: grep sar %t | count 12 + +; Don't optimize away zext-inreg and sext-inreg on the loop induction +; variable, because it isn't safe to do so in these cases. + +define void @count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 20 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 20 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @still_another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + + + Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll?rev=70241&r1=70240&r2=70241&view=diff ============================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll (original) +++ llvm/trunk/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll Mon Apr 27 15:16:15 2009 @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpl \$8} +; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpq \$8} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin9" @@ -6,7 +6,7 @@ ; happens after the relevant use, so the comparison stride can be ; easily changed. -define void @foo() { +define void @foo() nounwind { entry: br label %loop @@ -14,9 +14,11 @@ %indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ] ; [#uses=1] %i.2.0.us1534 = add i32 %indvar, 1 ; [#uses=3] %tmp628.us1540 = shl i32 %i.2.0.us1534, 1 ; [#uses=1] - %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; [#uses=0] + %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; [#uses=1] + store i64 %tmp645646647.us1547, i64* null %tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4 ; [#uses=2] - %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; [#uses=0] + %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; [#uses=1] + store i32 %tmp623.us1538, i32* null br i1 %tmp611.us1535, label %exit, label %loop exit: From evan.cheng at apple.com Mon Apr 27 15:17:30 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 27 Apr 2009 13:17:30 -0700 Subject: [llvm-commits] [PATCH] Prevent illegal joins of physical and virtual registers In-Reply-To: References: Message-ID: Looks good. Please commit. Thanks, Evan On Apr 26, 2009, at 8:21 AM, Jakob Stoklund Olesen wrote: > This is my second attempt at verifying register classes when > coalescing physical and virtual registers. The first attempt > completely missed the issue of subregisters being joined. > > I have attached two patches: > > targetregclass.patch: Change the static function > getSubRegisterRegClass in ScheduleDagSDNodesEmit.cpp to a method on > TargetRegisterClass. The method is needed by the next patch. > > coalesce-phys-virt.patch: Verify register classes when joining > physical and virtual registers. This time take into account all the > weird subreg combinations that are possible. > > Please note that this patch causes a number of failures in the > CodeGen/X86 test suite. That is because X86RegisterInfo.td contains > inconsistent information about subregister classes. I am submitting > a patch to the X86 backend that fixes this. > > With the X86 patch, this patch passes "make check". > > subreg.patch>_______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From jyasskin at google.com Mon Apr 27 15:32:07 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 27 Apr 2009 20:32:07 -0000 Subject: [llvm-commits] [llvm] r70243 - in /llvm/trunk: include/llvm/Support/ValueHandle.h unittests/Support/ValueHandleTest.cpp Message-ID: <200904272032.n3RKW8tn019785@zion.cs.uiuc.edu> Author: jyasskin Date: Mon Apr 27 15:32:07 2009 New Revision: 70243 URL: http://llvm.org/viewvc/llvm-project?rev=70243&view=rev Log: Add tests for WeakVH and AssertingVH. These pointed out that the overloads for the comparison operators were not only unnecessary in the presence of the implicit conversion; they caused ambiguous overload errors. So I deleted them. Added: llvm/trunk/unittests/Support/ValueHandleTest.cpp Modified: llvm/trunk/include/llvm/Support/ValueHandle.h Modified: llvm/trunk/include/llvm/Support/ValueHandle.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/ValueHandle.h?rev=70243&r1=70242&r2=70243&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/ValueHandle.h (original) +++ llvm/trunk/include/llvm/Support/ValueHandle.h Mon Apr 27 15:32:07 2009 @@ -39,7 +39,7 @@ class ValueHandleBase { friend class Value; protected: - /// HandleBaseKind - This indicates what base class the handle actually is. + /// HandleBaseKind - This indicates what sub class the handle actually is. /// This is to avoid having a vtable for the light-weight handle pointers. The /// fully generally Callback version does have a vtable. enum HandleBaseKind { @@ -87,20 +87,7 @@ Value *operator->() const { return getValPtr(); } Value &operator*() const { return *getValPtr(); } - - bool operator==(const Value *RHS) const { return VP == RHS; } - bool operator==(const ValueHandleBase &RHS) const { return VP == RHS.VP; } - bool operator!=(const Value *RHS) const { return VP != RHS; } - bool operator!=(const ValueHandleBase &RHS) const { return VP != RHS.VP; } - bool operator<(const Value *RHS) const { return VP < RHS; } - bool operator<(const ValueHandleBase &RHS) const { return VP < RHS.VP; } - bool operator>(const Value *RHS) const { return VP > RHS; } - bool operator>(const ValueHandleBase &RHS) const { return VP > RHS.VP; } - bool operator<=(const Value *RHS) const { return VP <= RHS; } - bool operator<=(const ValueHandleBase &RHS) const { return VP <= RHS.VP; } - bool operator>=(const Value *RHS) const { return VP >= RHS; } - bool operator>=(const ValueHandleBase &RHS) const { return VP >= RHS.VP; } - + protected: Value *getValPtr() const { return VP; } private: @@ -198,33 +185,6 @@ ValueTy *operator->() const { return getValPtr(); } ValueTy &operator*() const { return *getValPtr(); } - - // Duplicate these from the base class so that they work when assertions are - // off. - bool operator==(const Value *RHS) const { return getValPtr() == RHS; } - bool operator!=(const Value *RHS) const { return getValPtr() != RHS; } - bool operator<(const Value *RHS) const { return getValPtr() < RHS; } - bool operator>(const Value *RHS) const { return getValPtr() > RHS; } - bool operator<=(const Value *RHS) const { return getValPtr() <= RHS; } - bool operator>=(const Value *RHS) const { return getValPtr() >= RHS; } - bool operator==(const AssertingVH &RHS) const { - return getValPtr() == RHS.getValPtr(); - } - bool operator!=(const AssertingVH &RHS) const { - return getValPtr() != RHS.getValPtr(); - } - bool operator<(const AssertingVH &RHS) const { - return getValPtr() < RHS.getValPtr(); - } - bool operator>(const AssertingVH &RHS) const { - return getValPtr() > RHS.getValPtr(); - } - bool operator<=(const AssertingVH &RHS) const { - return getValPtr() <= RHS.getValPtr(); - } - bool operator>=(const AssertingVH &RHS) const { - return getValPtr() >= RHS.getValPtr(); - } }; } // End llvm namespace Added: llvm/trunk/unittests/Support/ValueHandleTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/ValueHandleTest.cpp?rev=70243&view=auto ============================================================================== --- llvm/trunk/unittests/Support/ValueHandleTest.cpp (added) +++ llvm/trunk/unittests/Support/ValueHandleTest.cpp Mon Apr 27 15:32:07 2009 @@ -0,0 +1,181 @@ +//===- llvm/unittest/Support/ValueHandleTest.cpp - ValueHandle tests --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ValueHandle.h" + +#include "llvm/Constants.h" +#include "llvm/Instructions.h" + +#include "gtest/gtest.h" + +#include + +using namespace llvm; + +namespace { + +class ValueHandle : public testing::Test { +protected: + Constant *ConstantV; + std::auto_ptr BitcastV; + + ValueHandle() : ConstantV(ConstantInt::get(Type::Int32Ty, 0)), + BitcastV(new BitCastInst(ConstantV, Type::Int32Ty)) { + } +}; + +TEST_F(ValueHandle, WeakVH_BasicOperation) { + WeakVH WVH(BitcastV.get()); + EXPECT_EQ(BitcastV.get(), WVH); + WVH = ConstantV; + EXPECT_EQ(ConstantV, WVH); + + // Make sure I can call a method on the underlying Value. It + // doesn't matter which method. + EXPECT_EQ(Type::Int32Ty, WVH->getType()); + EXPECT_EQ(Type::Int32Ty, (*WVH).getType()); +} + +TEST_F(ValueHandle, WeakVH_Comparisons) { + WeakVH BitcastWVH(BitcastV.get()); + WeakVH ConstantWVH(ConstantV); + + EXPECT_TRUE(BitcastWVH == BitcastWVH); + EXPECT_TRUE(BitcastV.get() == BitcastWVH); + EXPECT_TRUE(BitcastWVH == BitcastV.get()); + EXPECT_FALSE(BitcastWVH == ConstantWVH); + + EXPECT_TRUE(BitcastWVH != ConstantWVH); + EXPECT_TRUE(BitcastV.get() != ConstantWVH); + EXPECT_TRUE(BitcastWVH != ConstantV); + EXPECT_FALSE(BitcastWVH != BitcastWVH); + + // Cast to Value* so comparisons work. + Value *BV = BitcastV.get(); + Value *CV = ConstantV; + EXPECT_EQ(BV < CV, BitcastWVH < ConstantWVH); + EXPECT_EQ(BV <= CV, BitcastWVH <= ConstantWVH); + EXPECT_EQ(BV > CV, BitcastWVH > ConstantWVH); + EXPECT_EQ(BV >= CV, BitcastWVH >= ConstantWVH); + + EXPECT_EQ(BV < CV, BitcastV.get() < ConstantWVH); + EXPECT_EQ(BV <= CV, BitcastV.get() <= ConstantWVH); + EXPECT_EQ(BV > CV, BitcastV.get() > ConstantWVH); + EXPECT_EQ(BV >= CV, BitcastV.get() >= ConstantWVH); + + EXPECT_EQ(BV < CV, BitcastWVH < ConstantV); + EXPECT_EQ(BV <= CV, BitcastWVH <= ConstantV); + EXPECT_EQ(BV > CV, BitcastWVH > ConstantV); + EXPECT_EQ(BV >= CV, BitcastWVH >= ConstantV); +} + +TEST_F(ValueHandle, WeakVH_FollowsRAUW) { + WeakVH WVH(BitcastV.get()); + WeakVH WVH_Copy(WVH); + WeakVH WVH_Recreated(BitcastV.get()); + BitcastV->replaceAllUsesWith(ConstantV); + EXPECT_EQ(ConstantV, WVH); + EXPECT_EQ(ConstantV, WVH_Copy); + EXPECT_EQ(ConstantV, WVH_Recreated); +} + +TEST_F(ValueHandle, WeakVH_NullOnDeletion) { + WeakVH WVH(BitcastV.get()); + WeakVH WVH_Copy(WVH); + WeakVH WVH_Recreated(BitcastV.get()); + BitcastV.reset(); + Value *null_value = NULL; + EXPECT_EQ(null_value, WVH); + EXPECT_EQ(null_value, WVH_Copy); + EXPECT_EQ(null_value, WVH_Recreated); +} + + +TEST_F(ValueHandle, AssertingVH_BasicOperation) { + AssertingVH AVH(BitcastV.get()); + CastInst *implicit_to_exact_type = AVH; + implicit_to_exact_type = implicit_to_exact_type; // Avoid warning. + + AssertingVH GenericAVH(BitcastV.get()); + EXPECT_EQ(BitcastV.get(), GenericAVH); + GenericAVH = ConstantV; + EXPECT_EQ(ConstantV, GenericAVH); + + // Make sure I can call a method on the underlying CastInst. It + // doesn't matter which method. + EXPECT_FALSE(AVH->mayWriteToMemory()); + EXPECT_FALSE((*AVH).mayWriteToMemory()); +} + +TEST_F(ValueHandle, AssertingVH_Comparisons) { + AssertingVH BitcastAVH(BitcastV.get()); + AssertingVH ConstantAVH(ConstantV); + + EXPECT_TRUE(BitcastAVH == BitcastAVH); + EXPECT_TRUE(BitcastV.get() == BitcastAVH); + EXPECT_TRUE(BitcastAVH == BitcastV.get()); + EXPECT_FALSE(BitcastAVH == ConstantAVH); + + EXPECT_TRUE(BitcastAVH != ConstantAVH); + EXPECT_TRUE(BitcastV.get() != ConstantAVH); + EXPECT_TRUE(BitcastAVH != ConstantV); + EXPECT_FALSE(BitcastAVH != BitcastAVH); + + // Cast to Value* so comparisons work. + Value *BV = BitcastV.get(); + Value *CV = ConstantV; + EXPECT_EQ(BV < CV, BitcastAVH < ConstantAVH); + EXPECT_EQ(BV <= CV, BitcastAVH <= ConstantAVH); + EXPECT_EQ(BV > CV, BitcastAVH > ConstantAVH); + EXPECT_EQ(BV >= CV, BitcastAVH >= ConstantAVH); + + EXPECT_EQ(BV < CV, BitcastV.get() < ConstantAVH); + EXPECT_EQ(BV <= CV, BitcastV.get() <= ConstantAVH); + EXPECT_EQ(BV > CV, BitcastV.get() > ConstantAVH); + EXPECT_EQ(BV >= CV, BitcastV.get() >= ConstantAVH); + + EXPECT_EQ(BV < CV, BitcastAVH < ConstantV); + EXPECT_EQ(BV <= CV, BitcastAVH <= ConstantV); + EXPECT_EQ(BV > CV, BitcastAVH > ConstantV); + EXPECT_EQ(BV >= CV, BitcastAVH >= ConstantV); +} + +TEST_F(ValueHandle, AssertingVH_DoesNotFollowRAUW) { + AssertingVH AVH(BitcastV.get()); + BitcastV->replaceAllUsesWith(ConstantV); + EXPECT_EQ(BitcastV.get(), AVH); +} + +#ifdef NDEBUG + +TEST_F(ValueHandle, AssertingVH_ReducesToPointer) { + EXPECT_EQ(sizeof(CastInst *), sizeof(AssertingVH)); +} + +#else // !NDEBUG + +#ifdef GTEST_HAS_DEATH_TEST + +TEST_F(ValueHandle, AssertingVH_Asserts) { + AssertingVH AVH(BitcastV.get()); + EXPECT_DEATH({BitcastV.reset();}, + "An asserting value handle still pointed to this value!"); + AssertingVH Copy(AVH); + AVH = NULL; + EXPECT_DEATH({BitcastV.reset();}, + "An asserting value handle still pointed to this value!"); + Copy = NULL; + BitcastV.reset(); +} + +#endif // GTEST_HAS_DEATH_TEST + +#endif // NDEBUG + +} From jyasskin at google.com Mon Apr 27 15:33:54 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 27 Apr 2009 13:33:54 -0700 Subject: [llvm-commits] Tests for WeakVH and AssertingVH In-Reply-To: <19F86ABC-29C0-406C-A6A6-EC781E54887C@apple.com> References: <19F86ABC-29C0-406C-A6A6-EC781E54887C@apple.com> Message-ID: On Sun, Apr 26, 2009 at 9:36 PM, Chris Lattner wrote: > > On Apr 26, 2009, at 9:31 PM, Jeffrey Yasskin wrote: > >> In preparation for writing CallbackVH, I figured it'd be nice for >> WeakVH and AssertingVH to have tests. These pointed out that the >> overloads for the comparison operators were not only unnecessary in >> the presence of the implicit conversion; they caused ambiguous >> overload errors. So I deleted them. > > Looks good, please commit! Thanks! Committed as r70243: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20090427/076774.html. From jyasskin at google.com Mon Apr 27 15:35:07 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 27 Apr 2009 13:35:07 -0700 Subject: [llvm-commits] TypeBuilder helper class In-Reply-To: References: <5F5A9B7F-1967-4D07-9D74-20466088C123@apple.com> Message-ID: On Sat, Apr 25, 2009 at 3:15 PM, Chris Lattner wrote: > > On Apr 12, 2009, at 5:16 PM, Jeffrey Yasskin wrote: > >> On Thu, Apr 9, 2009 at 11:38 PM, Chris Lattner >> wrote: >>> >>> On Apr 9, 2009, at 9:40 AM, Jeffrey Yasskin wrote: >>> >>>> >>>> I think I see how to define TypeBuilder>>> restrict_to_cross_compilable_types> so that TypeBuilder>>> true> >>>> fails to compile but TypeBuilder, false> succeeds, >>>> without duplicating code. Could you confirm that that's along the >>>> lines of what you want implemented before I waste time implementing >>>> it? Also, what's the right spelling for "sometype"? I'd vote for >>>> llvm::types::i or something similar. >>>> >>> >>> Sounds great to me! >> >> Ok, here's the new version. I'm not entirely happy with using a bool >> for this decision since it makes user code harder to read. I could use >> an enum or distinct subtypes, but then I'd need names. "Host" seems to >> work for the version that accepts types like 'size_t', but >> "CrossCompilable" seems too long for the version that doesn't. >> Thoughts? > > This looks great! ?Can you please add some mention of this to the > programmer's manual? > http://llvm.org/docs/ProgrammersManual.html > > Perhaps a "how to create types" entry in the "Helpful Hints for Common > Operations" section. Will do, probably on Wednesday. > Applied here: > http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20090420/076704.html > > Thanks Jeffrey! From gohman at apple.com Mon Apr 27 15:35:33 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 27 Apr 2009 20:35:33 -0000 Subject: [llvm-commits] [llvm] r70244 - in /llvm/trunk: lib/Transforms/Scalar/LoopStrengthReduce.cpp test/CodeGen/X86/change-compare-stride-0.ll test/CodeGen/X86/change-compare-stride-1.ll Message-ID: <200904272035.n3RKZXPK019887@zion.cs.uiuc.edu> Author: djg Date: Mon Apr 27 15:35:32 2009 New Revision: 70244 URL: http://llvm.org/viewvc/llvm-project?rev=70244&view=rev Log: Permit ChangeCompareStride to rewrite a comparison when the factor between the comparison's iv stride and the candidate stride is exactly -1. Added: llvm/trunk/test/CodeGen/X86/change-compare-stride-0.ll llvm/trunk/test/CodeGen/X86/change-compare-stride-1.ll Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=70244&r1=70243&r2=70244&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Apr 27 15:35:32 2009 @@ -2028,7 +2028,9 @@ if (!isa(SI->first)) continue; int64_t SSInt = cast(SI->first)->getValue()->getSExtValue(); - if (abs(SSInt) <= abs(CmpSSInt) || (SSInt % CmpSSInt) != 0) + if (SSInt == CmpSSInt || + abs(SSInt) < abs(CmpSSInt) || + (SSInt % CmpSSInt) != 0) continue; Scale = SSInt / CmpSSInt; Added: llvm/trunk/test/CodeGen/X86/change-compare-stride-0.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/change-compare-stride-0.ll?rev=70244&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/change-compare-stride-0.ll (added) +++ llvm/trunk/test/CodeGen/X86/change-compare-stride-0.ll Mon Apr 27 15:35:32 2009 @@ -0,0 +1,77 @@ +; RUN: llvm-as < %s | llc -march=x86 > %t +; RUN: grep {cmpl \$4294966818,} %t +; RUN: not grep inc %t +; RUN: not grep {leal 1(} %t +; RUN: not grep {leal -1(} %t +; RUN: grep dec %t | count 1 + +define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind { +bb4.thread: + br label %bb2.outer + +bb2.outer: ; preds = %bb4, %bb4.thread + %indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ] ; [#uses=3] + %tmp34 = mul i32 %indvar18, 65535 ; [#uses=1] + %i.0.reg2mem.0.ph = add i32 %tmp34, 639 ; [#uses=1] + %0 = and i32 %i.0.reg2mem.0.ph, 65535 ; [#uses=1] + %1 = mul i32 %0, 480 ; [#uses=1] + %tmp20 = mul i32 %indvar18, -478 ; [#uses=1] + br label %bb2 + +bb2: ; preds = %bb2, %bb2.outer + %indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ] ; [#uses=3] + %ctg2 = getelementptr i8* %out, i32 %tmp20 ; [#uses=1] + %tmp21 = ptrtoint i8* %ctg2 to i32 ; [#uses=1] + %tmp23 = sub i32 %tmp21, %indvar ; [#uses=1] + %out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8* ; [#uses=1] + %tmp25 = mul i32 %indvar, 65535 ; [#uses=1] + %j.0.reg2mem.0 = add i32 %tmp25, 479 ; [#uses=1] + %2 = and i32 %j.0.reg2mem.0, 65535 ; [#uses=1] + %3 = add i32 %1, %2 ; [#uses=9] + %4 = add i32 %3, -481 ; [#uses=1] + %5 = getelementptr i8* %in, i32 %4 ; [#uses=1] + %6 = load i8* %5, align 1 ; [#uses=1] + %7 = add i32 %3, -480 ; [#uses=1] + %8 = getelementptr i8* %in, i32 %7 ; [#uses=1] + %9 = load i8* %8, align 1 ; [#uses=1] + %10 = add i32 %3, -479 ; [#uses=1] + %11 = getelementptr i8* %in, i32 %10 ; [#uses=1] + %12 = load i8* %11, align 1 ; [#uses=1] + %13 = add i32 %3, -1 ; [#uses=1] + %14 = getelementptr i8* %in, i32 %13 ; [#uses=1] + %15 = load i8* %14, align 1 ; [#uses=1] + %16 = getelementptr i8* %in, i32 %3 ; [#uses=1] + %17 = load i8* %16, align 1 ; [#uses=1] + %18 = add i32 %3, 1 ; [#uses=1] + %19 = getelementptr i8* %in, i32 %18 ; [#uses=1] + %20 = load i8* %19, align 1 ; [#uses=1] + %21 = add i32 %3, 481 ; [#uses=1] + %22 = getelementptr i8* %in, i32 %21 ; [#uses=1] + %23 = load i8* %22, align 1 ; [#uses=1] + %24 = add i32 %3, 480 ; [#uses=1] + %25 = getelementptr i8* %in, i32 %24 ; [#uses=1] + %26 = load i8* %25, align 1 ; [#uses=1] + %27 = add i32 %3, 479 ; [#uses=1] + %28 = getelementptr i8* %in, i32 %27 ; [#uses=1] + %29 = load i8* %28, align 1 ; [#uses=1] + %30 = add i8 %9, %6 ; [#uses=1] + %31 = add i8 %30, %12 ; [#uses=1] + %32 = add i8 %31, %15 ; [#uses=1] + %33 = add i8 %32, %17 ; [#uses=1] + %34 = add i8 %33, %20 ; [#uses=1] + %35 = add i8 %34, %23 ; [#uses=1] + %36 = add i8 %35, %26 ; [#uses=1] + %37 = add i8 %36, %29 ; [#uses=1] + store i8 %37, i8* %out_addr.0.reg2mem.0, align 1 + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, 478 ; [#uses=1] + br i1 %exitcond, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %indvar.next28 = add i32 %indvar18, 1 ; [#uses=2] + %exitcond29 = icmp eq i32 %indvar.next28, 638 ; [#uses=1] + br i1 %exitcond29, label %return, label %bb2.outer + +return: ; preds = %bb4 + ret void +} Added: llvm/trunk/test/CodeGen/X86/change-compare-stride-1.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/change-compare-stride-1.ll?rev=70244&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/change-compare-stride-1.ll (added) +++ llvm/trunk/test/CodeGen/X86/change-compare-stride-1.ll Mon Apr 27 15:35:32 2009 @@ -0,0 +1,86 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep {cmpq \$-478,} %t +; RUN: not grep inc %t +; RUN: not grep {leal 1(} %t +; RUN: not grep {leal -1(} %t +; RUN: grep dec %t | count 1 + +define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind { +bb4.thread: + br label %bb2.outer + +bb2.outer: ; preds = %bb4, %bb4.thread + %indvar19 = phi i64 [ 0, %bb4.thread ], [ %indvar.next29, %bb4 ] ; [#uses=3] + %indvar31 = trunc i64 %indvar19 to i16 ; [#uses=1] + %i.0.reg2mem.0.ph = sub i16 639, %indvar31 ; [#uses=1] + %0 = zext i16 %i.0.reg2mem.0.ph to i32 ; [#uses=1] + %1 = mul i32 %0, 480 ; [#uses=1] + %tmp21 = mul i64 %indvar19, -478 ; [#uses=1] + br label %bb2 + +bb2: ; preds = %bb2, %bb2.outer + %indvar = phi i64 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ] ; [#uses=3] + %indvar16 = trunc i64 %indvar to i16 ; [#uses=1] + %ctg2 = getelementptr i8* %out, i64 %tmp21 ; [#uses=1] + %tmp22 = ptrtoint i8* %ctg2 to i64 ; [#uses=1] + %tmp24 = sub i64 %tmp22, %indvar ; [#uses=1] + %out_addr.0.reg2mem.0 = inttoptr i64 %tmp24 to i8* ; [#uses=1] + %j.0.reg2mem.0 = sub i16 479, %indvar16 ; [#uses=1] + %2 = zext i16 %j.0.reg2mem.0 to i32 ; [#uses=1] + %3 = add i32 %1, %2 ; [#uses=9] + %4 = add i32 %3, -481 ; [#uses=1] + %5 = zext i32 %4 to i64 ; [#uses=1] + %6 = getelementptr i8* %in, i64 %5 ; [#uses=1] + %7 = load i8* %6, align 1 ; [#uses=1] + %8 = add i32 %3, -480 ; [#uses=1] + %9 = zext i32 %8 to i64 ; [#uses=1] + %10 = getelementptr i8* %in, i64 %9 ; [#uses=1] + %11 = load i8* %10, align 1 ; [#uses=1] + %12 = add i32 %3, -479 ; [#uses=1] + %13 = zext i32 %12 to i64 ; [#uses=1] + %14 = getelementptr i8* %in, i64 %13 ; [#uses=1] + %15 = load i8* %14, align 1 ; [#uses=1] + %16 = add i32 %3, -1 ; [#uses=1] + %17 = zext i32 %16 to i64 ; [#uses=1] + %18 = getelementptr i8* %in, i64 %17 ; [#uses=1] + %19 = load i8* %18, align 1 ; [#uses=1] + %20 = zext i32 %3 to i64 ; [#uses=1] + %21 = getelementptr i8* %in, i64 %20 ; [#uses=1] + %22 = load i8* %21, align 1 ; [#uses=1] + %23 = add i32 %3, 1 ; [#uses=1] + %24 = zext i32 %23 to i64 ; [#uses=1] + %25 = getelementptr i8* %in, i64 %24 ; [#uses=1] + %26 = load i8* %25, align 1 ; [#uses=1] + %27 = add i32 %3, 481 ; [#uses=1] + %28 = zext i32 %27 to i64 ; [#uses=1] + %29 = getelementptr i8* %in, i64 %28 ; [#uses=1] + %30 = load i8* %29, align 1 ; [#uses=1] + %31 = add i32 %3, 480 ; [#uses=1] + %32 = zext i32 %31 to i64 ; [#uses=1] + %33 = getelementptr i8* %in, i64 %32 ; [#uses=1] + %34 = load i8* %33, align 1 ; [#uses=1] + %35 = add i32 %3, 479 ; [#uses=1] + %36 = zext i32 %35 to i64 ; [#uses=1] + %37 = getelementptr i8* %in, i64 %36 ; [#uses=1] + %38 = load i8* %37, align 1 ; [#uses=1] + %39 = add i8 %11, %7 ; [#uses=1] + %40 = add i8 %39, %15 ; [#uses=1] + %41 = add i8 %40, %19 ; [#uses=1] + %42 = add i8 %41, %22 ; [#uses=1] + %43 = add i8 %42, %26 ; [#uses=1] + %44 = add i8 %43, %30 ; [#uses=1] + %45 = add i8 %44, %34 ; [#uses=1] + %46 = add i8 %45, %38 ; [#uses=1] + store i8 %46, i8* %out_addr.0.reg2mem.0, align 1 + %indvar.next = add i64 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i64 %indvar.next, 478 ; [#uses=1] + br i1 %exitcond, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %indvar.next29 = add i64 %indvar19, 1 ; [#uses=2] + %exitcond30 = icmp eq i64 %indvar.next29, 638 ; [#uses=1] + br i1 %exitcond30, label %return, label %bb2.outer + +return: ; preds = %bb4 + ret void +} From evan.cheng at apple.com Mon Apr 27 15:42:46 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 27 Apr 2009 20:42:46 -0000 Subject: [llvm-commits] [llvm] r70245 - in /llvm/trunk: lib/CodeGen/LiveIntervalAnalysis.cpp test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll Message-ID: <200904272042.n3RKgk0P020110@zion.cs.uiuc.edu> Author: evancheng Date: Mon Apr 27 15:42:46 2009 New Revision: 70245 URL: http://llvm.org/viewvc/llvm-project?rev=70245&view=rev Log: Fix PR4076. Correctly create live interval of physical register with two-address update. Added: llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=70245&r1=70244&r2=70245&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Mon Apr 27 15:42:46 2009 @@ -612,14 +612,24 @@ DOUT << " killed"; end = getUseIndex(baseIndex) + 1; goto exit; - } else if (mi->modifiesRegister(interval.reg, tri_)) { - // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that defines - // it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - DOUT << " dead"; - end = start + 1; - goto exit; + } else { + int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_); + if (DefIdx != -1) { + if (mi->isRegTiedToUseOperand(DefIdx)) { + // Two-address instruction. + end = getDefIndex(baseIndex); + if (mi->getOperand(DefIdx).isEarlyClobber()) + end = getUseIndex(baseIndex); + } else { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DOUT << " dead"; + end = start + 1; + } + goto exit; + } } baseIndex += InstrSlots::NUM; @@ -663,14 +673,14 @@ MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = MI; - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(MO.getReg()), CopyMI); // Def of a register also defines its sub-registers. for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) // If MI also modifies the sub-register explicitly, avoid processing it // more than once. Do not pass in TRI here so it checks for exact match. if (!MI->modifiesRegister(*AS)) - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(*AS), 0); } } Added: llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll?rev=70245&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll (added) +++ llvm/trunk/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll Mon Apr 27 15:42:46 2009 @@ -0,0 +1,165 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax +; PR4076 + + type { i8, i8, i8 } ; type %0 + type { i32, i8** } ; type %1 + type { %3* } ; type %2 + type { %4 } ; type %3 + type { %5 } ; type %4 + type { %6, i32, %7 } ; type %5 + type { i8* } ; type %6 + type { i32, [12 x i8] } ; type %7 + type { %9 } ; type %8 + type { %10, %11*, i8 } ; type %9 + type { %11* } ; type %10 + type { i32, %6, i8*, %12, %13*, i8, i32, %28, %29, i32, %30, i32, i32, i32, i8*, i8*, i8, i8 } ; type %11 + type { %13* } ; type %12 + type { %14, i32, %13*, %21 } ; type %13 + type { %15, %16 } ; type %14 + type { i32 (...)** } ; type %15 + type { %17, i8* (i32)*, void (i8*)*, i8 } ; type %16 + type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %18 } ; type %17 + type { %19* } ; type %18 + type { i32, %20**, i32, %20**, i8** } ; type %19 + type { i32 (...)**, i32 } ; type %20 + type { %22, %25*, i8, i8, %17*, %26*, %27*, %27* } ; type %21 + type { i32 (...)**, i32, i32, i32, i32, i32, %23*, %24, [8 x %24], i32, %24*, %18 } ; type %22 + type { %23*, void (i32, %22*, i32)*, i32, i32 } ; type %23 + type { i8*, i32 } ; type %24 + type { i32 (...)**, %21 } ; type %25 + type { %20, i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 } ; type %26 + type { %20 } ; type %27 + type { void (%9*)*, i32 } ; type %28 + type { %15* } ; type %29 + type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* } ; type %30 + at AtomicOps_Internalx86CPUFeatures = external global %0 ; <%0*> [#uses=1] +internal constant [19 x i8] c"xxxxxxxxxxxxxxxxxx\00" ; <[19 x i8]*>:0 [#uses=1] +internal constant [47 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00" ; <[47 x i8]*>:1 [#uses=1] + +define i8** @func6(i8 zeroext, i32, i32, %1*) nounwind { +;