From stoklund at 2pi.dk Mon Apr 5 00:07:43 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sun, 4 Apr 2010 22:07:43 -0700 Subject: [llvm-commits] [llvm] r100233 - in /llvm/trunk: ./ include/llvm/Support/ lib/Target/ARM/ lib/Target/ARM/Disassembler/ test/MC/Disassembler/ utils/TableGen/ In-Reply-To: References: <20100402222738.E3E3D2A6C12C@llvm.org> <2582BF8C-8287-40FA-B090-C46C078D5973@apple.com> Message-ID: <9769FBBB-5D35-461E-BA7E-01A4FDA7B5AD@2pi.dk> On Apr 4, 2010, at 6:59 PM, Jakob Stoklund Olesen wrote: > > On Apr 4, 2010, at 6:48 PM, Johnny Chen wrote: > >> Hi Jakob, >> >> Thanks. You're welcome to modify the MathExtras.h stuff. >> Right now, arm disassembler is the only client of SignExtend32(int32_t x). >> I originally use a bit-field hack but Chris thinks it is non-portable. > > How about this? > > return x | -(x & (1 << (B - 1))); In two operations, assuming the high bits in x are 0: return (x ^ (1 << (B - 1))) - (1 << (B - 1)); >From Henry S. Warren, Jr., "Hacker's Delight". From clattner at apple.com Mon Apr 5 00:10:59 2010 From: clattner at apple.com (Chris Lattner) Date: Sun, 4 Apr 2010 22:10:59 -0700 Subject: [llvm-commits] [llvm] r100233 - in /llvm/trunk: ./ include/llvm/Support/ lib/Target/ARM/ lib/Target/ARM/Disassembler/ test/MC/Disassembler/ utils/TableGen/ In-Reply-To: <6B4F5068-C32F-47F3-8E25-BFEF3E201CC3@2pi.dk> References: <20100402222738.E3E3D2A6C12C@llvm.org> <2582BF8C-8287-40FA-B090-C46C078D5973@apple.com> <0F76E611-D66F-4456-949D-9487D4E46D94@apple.com> <1270439298.1443.5.camel@aspire> <1270440180.1443.10.camel@aspire> <808FF74B-0D21-4F22-A064-53B27ABCFF14@apple.com> <6B4F5068-C32F-47F3-8E25-BFEF3E201CC3@2pi.dk> Message-ID: On Apr 4, 2010, at 9:30 PM, Jakob Stoklund Olesen wrote: >>> If you left-shift a positive number until it goes negative, that's an >>> overflow. At least that's how I understand it. >> >> You can shift left an N bit number between 0 and N-1 bits safely without overflow. > > Only an unsigned number. From the C++0X working draft: LLVM makes this assumption all over the place, just like it assumes a two's complement representation and that 'unsigned' is 32-bits. It is safe to rely on. -Chris From sabre at nondot.org Mon Apr 5 00:11:15 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:11:15 -0000 Subject: [llvm-commits] [llvm] r100405 - in /llvm/trunk: include/llvm/CodeGen/AsmPrinter.h lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h lib/CodeGen/AsmPrinter/DwarfWriter.cpp Message-ID: <20100405051115.AB7212A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:11:15 2010 New Revision: 100405 URL: http://llvm.org/viewvc/llvm-project?rev=100405&view=rev Log: change AsmPrinter to use DwarfDebug/DwarfException directly instead of going through DwarfWriter. Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/AsmPrinter.h?rev=100405&r1=100404&r2=100405&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h (original) +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h Mon Apr 5 00:11:15 2010 @@ -42,14 +42,15 @@ class MachineJumpTableInfo; class MachineModuleInfo; class MachineMove; + class MCAsmInfo; class MCInst; class MCContext; class MCSection; class MCStreamer; class MCSymbol; - class DwarfWriter; + class DwarfDebug; + class DwarfException; class Mangler; - class MCAsmInfo; class TargetLoweringObjectFile; class TargetData; class Twine; @@ -59,9 +60,6 @@ /// asm writers. class AsmPrinter : public MachineFunctionPass { public: - /// DW - If available, this is a pointer to the current dwarf writer. - DwarfWriter *DW; - /// Target machine description. /// TargetMachine &TM; @@ -107,8 +105,14 @@ /// If VerboseAsm is set, a pointer to the loop info for this /// function. - /// MachineLoopInfo *LI; + + /// DD - If the target supports dwarf debug info, this pointer is non-null. + DwarfDebug *DD; + + /// DE - If the target supports dwarf exception info, this pointer is + /// non-null. + DwarfException *DE; protected: explicit AsmPrinter(TargetMachine &TM, MCStreamer &Streamer); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100405&r1=100404&r2=100405&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Mon Apr 5 00:11:15 2010 @@ -13,8 +13,9 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" +#include "DwarfDebug.h" +#include "DwarfException.h" #include "llvm/Module.h" -#include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -62,12 +63,14 @@ OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DW = 0; MMI = 0; LI = 0; + DD = 0; DE = 0; MMI = 0; LI = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); } AsmPrinter::~AsmPrinter() { + assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); + if (GCMetadataPrinters != 0) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); @@ -108,7 +111,6 @@ MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); - AU.addRequired(); if (isVerbose()) AU.addRequired(); } @@ -148,9 +150,11 @@ OutStreamer.AddBlankLine(); } - DW = getAnalysisIfAvailable(); - if (DW) - DW->BeginModule(&M, this); + if (MAI->doesSupportDebugInformation()) + DD = new DwarfDebug(this, &M); + + if (MAI->doesSupportExceptionHandling()) + DE = new DwarfException(this); return false; } @@ -344,8 +348,8 @@ } // Emit pre-function debug and/or EH information. - if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) - DW->BeginFunction(MF); + if (DE) DE->BeginFunction(MF); + if (DD) DD->beginFunction(MF); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -439,8 +443,7 @@ // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); - bool ShouldPrintDebugScopes = - DW && MAI->doesSupportDebugInformation() &&DW->ShouldEmitDwarfDebug(); + bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); // Print out code for the function. bool HasAnyRealCode = false; @@ -457,7 +460,7 @@ ++EmittedInsts; if (ShouldPrintDebugScopes) - DW->BeginScope(II); + DD->beginScope(II); if (isVerbose()) EmitComments(*II, OutStreamer.GetCommentOS()); @@ -483,7 +486,7 @@ } if (ShouldPrintDebugScopes) - DW->EndScope(II); + DD->endScope(II); } } @@ -512,8 +515,9 @@ } // Emit post-function debug information. - if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) - DW->EndFunction(MF); + if (DD) DD->endFunction(MF); + if (DE) DE->EndFunction(); + MMI->EndFunction(); // Print out jump tables referenced by the function. EmitJumpTableInfo(); @@ -528,9 +532,15 @@ I != E; ++I) EmitGlobalVariable(I); - // Emit final debug information. - if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) - DW->EndModule(); + // Finalize debug and EH information. + if (DE) { + DE->EndModule(); + delete DE; DE = 0; + } + if (DD) { + DD->endModule(); + delete DD; DD = 0; + } // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { @@ -594,7 +604,7 @@ EmitEndOfAsmFile(M); delete Mang; Mang = 0; - DW = 0; MMI = 0; + MMI = 0; OutStreamer.Finish(); return false; Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100405&r1=100404&r2=100405&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Apr 5 00:11:15 2010 @@ -299,7 +299,7 @@ delete Variables[j]; } -DwarfDebug::DwarfDebug(AsmPrinter *A) +DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), ModuleCU(0), AbbreviationsSet(InitAbbreviationsSetSize), CurrentFnDbgScope(0), DebugTimer(0) { @@ -310,6 +310,8 @@ if (TimePassesIsEnabled) DebugTimer = new Timer("Dwarf Debug Writer"); + + beginModule(M); } DwarfDebug::~DwarfDebug() { for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) @@ -1791,9 +1793,6 @@ /// content. Create global DIEs and emit initial debug info sections. /// This is inovked by the target AsmPrinter. void DwarfDebug::beginModule(Module *M) { - if (!Asm->MAI->doesSupportDebugInformation()) - return; - MMI = Asm->MMI; TimeRegion Timer(DebugTimer); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100405&r1=100404&r2=100405&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Apr 5 00:11:15 2010 @@ -513,7 +513,7 @@ //===--------------------------------------------------------------------===// // Main entry points. // - DwarfDebug(AsmPrinter *A); + DwarfDebug(AsmPrinter *A, Module *M); virtual ~DwarfDebug(); /// beginModule - Emit all Dwarf sections that should come prior to the Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp?rev=100405&r1=100404&r2=100405&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp Mon Apr 5 00:11:15 2010 @@ -38,8 +38,7 @@ /// content. void DwarfWriter::BeginModule(Module *M, AsmPrinter *A) { DE = new DwarfException(A); - DD = new DwarfDebug(A); - DD->beginModule(M); + DD = new DwarfDebug(A, M); } /// EndModule - Emit all Dwarf sections that should come after the content. From sabre at nondot.org Mon Apr 5 00:12:59 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:12:59 -0000 Subject: [llvm-commits] [llvm] r100406 - in /llvm/trunk: include/llvm/CodeGen/DwarfWriter.h lib/CodeGen/AsmPrinter/CMakeLists.txt lib/CodeGen/AsmPrinter/DwarfWriter.cpp Message-ID: <20100405051259.783162A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:12:59 2010 New Revision: 100406 URL: http://llvm.org/viewvc/llvm-project?rev=100406&view=rev Log: finally blast DwarfWriter away. Removed: llvm/trunk/include/llvm/CodeGen/DwarfWriter.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/CMakeLists.txt Removed: llvm/trunk/include/llvm/CodeGen/DwarfWriter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/DwarfWriter.h?rev=100405&view=auto ============================================================================== --- llvm/trunk/include/llvm/CodeGen/DwarfWriter.h (original) +++ llvm/trunk/include/llvm/CodeGen/DwarfWriter.h (removed) @@ -1,95 +0,0 @@ -//===-- llvm/CodeGen/DwarfWriter.h - Dwarf Framework ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing Dwarf debug and exception info into -// asm files. For Details on the Dwarf 3 specfication see DWARF Debugging -// Information Format V.3 reference manual http://dwarf.freestandards.org , -// -// The role of the Dwarf Writer class is to extract information from the -// MachineModuleInfo object, organize it in Dwarf form and then emit it into asm -// the current asm file using data and high level Dwarf directives. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_DWARFWRITER_H -#define LLVM_CODEGEN_DWARFWRITER_H - -#include "llvm/Pass.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -class AsmPrinter; -class DwarfDebug; -class DwarfException; -class MachineModuleInfo; -class MachineFunction; -class MachineInstr; -class Value; -class Module; -class MDNode; -class MCAsmInfo; -class MCSymbol; -class raw_ostream; -class Instruction; -class DICompileUnit; -class DISubprogram; -class DIVariable; - -//===----------------------------------------------------------------------===// -// DwarfWriter - Emits Dwarf debug and exception handling directives. -// - -class DwarfWriter : public ImmutablePass { -private: - /// DD - Provides the DwarfWriter debug implementation. - /// - DwarfDebug *DD; - - /// DE - Provides the DwarfWriter exception implementation. - /// - DwarfException *DE; - -public: - static char ID; // Pass identification, replacement for typeid - - DwarfWriter(); - virtual ~DwarfWriter(); - - //===--------------------------------------------------------------------===// - // Main entry points. - // - - /// BeginModule - Emit all Dwarf sections that should come prior to the - /// content. - void BeginModule(Module *M, AsmPrinter *A); - - /// EndModule - Emit all Dwarf sections that should come after the content. - /// - void EndModule(); - - /// BeginFunction - Gather pre-function debug information. Assumes being - /// emitted immediately after the function entry point. - void BeginFunction(const MachineFunction *MF); - - /// EndFunction - Gather and emit post-function debug information. - /// - void EndFunction(const MachineFunction *MF); - - /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should - /// be emitted. - bool ShouldEmitDwarfDebug() const; - - void BeginScope(const MachineInstr *MI); - void EndScope(const MachineInstr *MI); -}; - -} // end llvm namespace - -#endif Modified: llvm/trunk/lib/CodeGen/AsmPrinter/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/CMakeLists.txt?rev=100406&r1=100405&r2=100406&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/CMakeLists.txt (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/CMakeLists.txt Mon Apr 5 00:12:59 2010 @@ -5,6 +5,5 @@ DIE.cpp DwarfDebug.cpp DwarfException.cpp - DwarfWriter.cpp OcamlGCPrinter.cpp ) Removed: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp?rev=100405&view=auto ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp (removed) @@ -1,82 +0,0 @@ -//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing dwarf info into asm files. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/DwarfWriter.h" -#include "DwarfDebug.h" -#include "DwarfException.h" -#include "llvm/CodeGen/MachineModuleInfo.h" - -using namespace llvm; - -static RegisterPass -X("dwarfwriter", "DWARF Information Writer"); -char DwarfWriter::ID = 0; - -//===----------------------------------------------------------------------===// -/// DwarfWriter Implementation -/// - -DwarfWriter::DwarfWriter() - : ImmutablePass(&ID), DD(0), DE(0) {} - -DwarfWriter::~DwarfWriter() { - delete DE; - delete DD; -} - -/// BeginModule - Emit all Dwarf sections that should come prior to the -/// content. -void DwarfWriter::BeginModule(Module *M, AsmPrinter *A) { - DE = new DwarfException(A); - DD = new DwarfDebug(A, M); -} - -/// EndModule - Emit all Dwarf sections that should come after the content. -/// -void DwarfWriter::EndModule() { - DE->EndModule(); - DD->endModule(); - delete DD; DD = 0; - delete DE; DE = 0; -} - -/// BeginFunction - Gather pre-function debug information. Assumes being -/// emitted immediately after the function entry point. -void DwarfWriter::BeginFunction(const MachineFunction *MF) { - DE->BeginFunction(MF); - DD->beginFunction(MF); -} - -/// EndFunction - Gather and emit post-function debug information. -/// -void DwarfWriter::EndFunction(const MachineFunction *MF) { - DD->endFunction(MF); - DE->EndFunction(); - - if (MachineModuleInfo *MMI = DE->MMI) - // Clear function debug information. - MMI->EndFunction(); -} - -/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should -/// be emitted. -bool DwarfWriter::ShouldEmitDwarfDebug() const { - return DD && DD->MMI->hasDebugInfo(); -} - -void DwarfWriter::BeginScope(const MachineInstr *MI) { - DD->beginScope(MI); -} -void DwarfWriter::EndScope(const MachineInstr *MI) { - DD->endScope(MI); -} From sabre at nondot.org Mon Apr 5 00:24:55 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:24:55 -0000 Subject: [llvm-commits] [llvm] r100407 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h Message-ID: <20100405052455.8F3642A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:24:55 2010 New Revision: 100407 URL: http://llvm.org/viewvc/llvm-project?rev=100407&view=rev Log: prune #includes, realize the MMI can never be null. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100407&r1=100406&r2=100407&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Apr 5 00:24:55 2010 @@ -13,6 +13,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" +#include "DIE.h" #include "llvm/Module.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -26,6 +27,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" @@ -401,6 +403,14 @@ Die->addValue(Attribute, Form, Value); } +/// addDIEEntry - Add a DIE attribute data and value. +/// +void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, + DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); +} + + /// addBlock - Add block data. /// void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, @@ -1793,8 +1803,6 @@ /// content. Create global DIEs and emit initial debug info sections. /// This is inovked by the target AsmPrinter. void DwarfDebug::beginModule(Module *M) { - MMI = Asm->MMI; - TimeRegion Timer(DebugTimer); DebugInfoFinder DbgFinder; @@ -1994,8 +2002,6 @@ /// collectVariableInfo - Populate DbgScope entries with variables' info. void DwarfDebug::collectVariableInfo() { - if (!MMI) return; - const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); @@ -2297,8 +2303,8 @@ /// endFunction - Gather and emit post-function debug information. /// void DwarfDebug::endFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) return; - if (DbgScopeMap.empty()) return; + if (!MMI->hasDebugInfo() || + DbgScopeMap.empty()) return; TimeRegion Timer(DebugTimer); @@ -2346,9 +2352,6 @@ /// unique label that was emitted and which provides correspondence to /// the source line list. MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { - if (!MMI) - return 0; - TimeRegion Timer(DebugTimer); StringRef Dir; Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100407&r1=100406&r2=100407&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Apr 5 00:24:55 2010 @@ -14,18 +14,13 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ -#include "DIE.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/UniqueVector.h" -#include +#include "llvm/Support/Allocator.h" namespace llvm { @@ -34,9 +29,27 @@ class DbgScope; class DbgVariable; class MachineFrameInfo; +class MachineLocation; class MachineModuleInfo; class MCAsmInfo; class Timer; +class DIEAbbrev; +class DIE; +class DIEBlock; +class DIEEntry; + +class DIEnumerator; +class DIDescriptor; +class DIVariable; +class DIGlobal; +class DIGlobalVariable; +class DISubprogram; +class DIBasicType; +class DIDerivedType; +class DIType; +class DINameSpace; +class DISubrange; +class DICompositeType; //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. @@ -279,10 +292,8 @@ /// addDIEEntry - Add a DIE attribute data and value. /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); - } - + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + /// addBlock - Add block data. /// void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); From sabre at nondot.org Mon Apr 5 00:28:24 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:28:24 -0000 Subject: [llvm-commits] [llvm] r100408 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfException.cpp DwarfException.h Message-ID: <20100405052824.1F4D82A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:28:23 2010 New Revision: 100408 URL: http://llvm.org/viewvc/llvm-project?rev=100408&view=rev Log: prune #includes, MMI can never be null Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp?rev=100408&r1=100407&r2=100408&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Mon Apr 5 00:28:23 2010 @@ -13,6 +13,7 @@ #include "DwarfException.h" #include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -917,8 +918,6 @@ /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfException::BeginFunction(const MachineFunction *MF) { - if (!MMI || !Asm->MAI->doesSupportExceptionHandling()) return; - TimeRegion Timer(ExceptionTimer); shouldEmitTable = shouldEmitMoves = false; Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h?rev=100408&r1=100407&r2=100408&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Mon Apr 5 00:28:23 2010 @@ -14,18 +14,23 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/ADT/DenseMap.h" -#include +#include namespace llvm { +template class SmallVectorImpl; struct LandingPadInfo; class MachineModuleInfo; +class MachineMove; +class MachineInstr; +class MachineFunction; class MCAsmInfo; class MCExpr; +class MCSymbol; class Timer; -class raw_ostream; +class Function; +class AsmPrinter; //===----------------------------------------------------------------------===// /// DwarfException - Emits Dwarf exception handling directives. From sabre at nondot.org Mon Apr 5 00:31:04 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:31:04 -0000 Subject: [llvm-commits] [llvm] r100409 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.h DwarfException.h Message-ID: <20100405053104.F218B2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:31:04 2010 New Revision: 100409 URL: http://llvm.org/viewvc/llvm-project?rev=100409&view=rev Log: reprivatize now that DwarfWriter is gone. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100409&r1=100408&r2=100409&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Apr 5 00:31:04 2010 @@ -73,10 +73,9 @@ class DwarfDebug { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; -public: + /// MMI - Collected machine module information. MachineModuleInfo *MMI; -private: //===--------------------------------------------------------------------===// // Attributes used to construct specific Dwarf sections. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h?rev=100409&r1=100408&r2=100409&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Mon Apr 5 00:31:04 2010 @@ -38,10 +38,9 @@ class DwarfException { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; -public: + /// MMI - Collected machine module information. MachineModuleInfo *MMI; -private: struct FunctionEHFrameInfo { MCSymbol *FunctionEHSym; // L_foo.eh From sabre at nondot.org Mon Apr 5 00:32:45 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:32:45 -0000 Subject: [llvm-commits] [llvm] r100410 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.h DwarfException.h Message-ID: <20100405053245.EBBCE2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:32:45 2010 New Revision: 100410 URL: http://llvm.org/viewvc/llvm-project?rev=100410&view=rev Log: privatize more stuff, eliminate vtables. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100410&r1=100409&r2=100410&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Apr 5 00:32:45 2010 @@ -519,40 +519,17 @@ /// DIType getBlockByrefType(DIType Ty, std::string Name); -public: - //===--------------------------------------------------------------------===// - // Main entry points. - // - DwarfDebug(AsmPrinter *A, Module *M); - virtual ~DwarfDebug(); - - /// beginModule - Emit all Dwarf sections that should come prior to the - /// content. - void beginModule(Module *M); - - /// endModule - Emit all Dwarf sections that should come after the content. - /// - void endModule(); - - /// beginFunction - Gather pre-function debug information. Assumes being - /// emitted immediately after the function entry point. - void beginFunction(const MachineFunction *MF); - - /// endFunction - Gather and emit post-function debug information. - /// - void endFunction(const MachineFunction *MF); - /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. MCSymbol *recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); - + /// getSourceLineCount - Return the number of source lines in the debug /// info. unsigned getSourceLineCount() const { return Lines.size(); } - + /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be /// timed. Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the @@ -560,13 +537,36 @@ /// well. unsigned getOrCreateSourceID(const std::string &DirName, const std::string &FileName); - + /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. bool extractScopeInformation(); - + /// collectVariableInfo - Populate DbgScope entries with variables' info. void collectVariableInfo(); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(AsmPrinter *A, Module *M); + ~DwarfDebug(); + + /// beginModule - Emit all Dwarf sections that should come prior to the + /// content. + void beginModule(Module *M); + + /// endModule - Emit all Dwarf sections that should come after the content. + /// + void endModule(); + + /// beginFunction - Gather pre-function debug information. Assumes being + /// emitted immediately after the function entry point. + void beginFunction(const MachineFunction *MF); + + /// endFunction - Gather and emit post-function debug information. + /// + void endFunction(const MachineFunction *MF); /// beginScope - Process beginning of a scope. void beginScope(const MachineInstr *MI); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h?rev=100410&r1=100409&r2=100410&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Mon Apr 5 00:32:45 2010 @@ -176,7 +176,7 @@ // Main entry points. // DwarfException(AsmPrinter *A); - virtual ~DwarfException(); + ~DwarfException(); /// EndModule - Emit all exception information that should come after the /// content. From sabre at nondot.org Mon Apr 5 00:43:16 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:43:16 -0000 Subject: [llvm-commits] [llvm] r100411 - /llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h Message-ID: <20100405054316.8995C2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:43:16 2010 New Revision: 100411 URL: http://llvm.org/viewvc/llvm-project?rev=100411&view=rev Log: prune #includes. Modified: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h Modified: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h?rev=100411&r1=100410&r2=100411&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h Mon Apr 5 00:43:16 2010 @@ -14,12 +14,9 @@ #ifndef LLVM_CODEGEN_MACHINEFRAMEINFO_H #define LLVM_CODEGEN_MACHINEFRAMEINFO_H -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/System/DataTypes.h" #include -#include #include namespace llvm { @@ -31,22 +28,18 @@ class MachineFunction; class MachineBasicBlock; class TargetFrameInfo; +class BitVector; /// The CalleeSavedInfo class tracks the information need to locate where a /// callee saved register in the current frame. class CalleeSavedInfo { - -private: unsigned Reg; const TargetRegisterClass *RegClass; int FrameIdx; public: CalleeSavedInfo(unsigned R, const TargetRegisterClass *RC, int FI = 0) - : Reg(R) - , RegClass(RC) - , FrameIdx(FI) - {} + : Reg(R), RegClass(RC), FrameIdx(FI) {} // Accessors. unsigned getReg() const { return Reg; } From sabre at nondot.org Mon Apr 5 00:48:36 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:48:36 -0000 Subject: [llvm-commits] [llvm] r100412 - /llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp Message-ID: <20100405054836.7E3562A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:48:36 2010 New Revision: 100412 URL: http://llvm.org/viewvc/llvm-project?rev=100412&view=rev Log: simplify code. Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp?rev=100412&r1=100411&r2=100412&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp Mon Apr 5 00:48:36 2010 @@ -14,12 +14,12 @@ #include "XCoreMachineFunctionInfo.h" #include "XCoreInstrInfo.h" #include "XCore.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCContext.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "XCoreGenInstrInfo.inc" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -419,14 +419,11 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI) const -{ + const std::vector &CSI) const { if (CSI.empty()) { return true; } MachineFunction *MF = MBB.getParent(); - const MachineFrameInfo *MFI = MF->getFrameInfo(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); XCoreFunctionInfo *XFI = MF->getInfo(); bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF); @@ -442,7 +439,7 @@ storeRegToStackSlot(MBB, MI, it->getReg(), true, it->getFrameIdx(), it->getRegClass()); if (emitFrameMoves) { - MCSymbol *SaveLabel = MMI->getContext().CreateTempSymbol(); + MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it)); } From nicholas at mxc.ca Mon Apr 5 00:48:47 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 05 Apr 2010 05:48:47 -0000 Subject: [llvm-commits] [llvm] r100413 - /llvm/trunk/docs/Lexicon.html Message-ID: <20100405054847.C33952A6C12C@llvm.org> Author: nicholas Date: Mon Apr 5 00:48:47 2010 New Revision: 100413 URL: http://llvm.org/viewvc/llvm-project?rev=100413&view=rev Log: Add MC and LTO, two terms I just had to explain on IRC. Modified: llvm/trunk/docs/Lexicon.html Modified: llvm/trunk/docs/Lexicon.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/Lexicon.html?rev=100413&r1=100412&r2=100413&view=diff ============================================================================== --- llvm/trunk/docs/Lexicon.html (original) +++ llvm/trunk/docs/Lexicon.html Mon Apr 5 00:48:47 2010 @@ -50,6 +50,11 @@ LCSSA LICM Load-VN + LTO + + - M - + + MC - O - @@ -167,15 +172,24 @@
-
LCSSA
-
Loop-Closed Static Single Assignment Form
+
LCSSA
+
Loop-Closed Static Single Assignment Form
LICM
Loop Invariant Code Motion
Load-VN
Load Value Numbering
+
LTO
+
Link-Time Optimization
+
+
+ + +
+
+
MC
+
Machine Code
-
From sabre at nondot.org Mon Apr 5 00:49:50 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:49:50 -0000 Subject: [llvm-commits] [llvm] r100414 - in /llvm/trunk: include/llvm/CodeGen/MachineFunction.h lib/CodeGen/MachineFunction.cpp lib/CodeGen/MachineFunctionAnalysis.cpp Message-ID: <20100405054950.C0CC42A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:49:50 2010 New Revision: 100414 URL: http://llvm.org/viewvc/llvm-project?rev=100414&view=rev Log: enhance MachineFunction to have a MMI pointer. Modified: llvm/trunk/include/llvm/CodeGen/MachineFunction.h llvm/trunk/lib/CodeGen/MachineFunction.cpp llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineFunction.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineFunction.h?rev=100414&r1=100413&r2=100414&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineFunction.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineFunction.h Mon Apr 5 00:49:50 2010 @@ -32,6 +32,7 @@ class MachineFrameInfo; class MachineConstantPool; class MachineJumpTableInfo; +class MachineModuleInfo; class MCContext; class Pass; class TargetMachine; @@ -72,7 +73,8 @@ Function *Fn; const TargetMachine &Target; MCContext &Ctx; - + MachineModuleInfo &MMI; + // RegInfo - Information about each register in use in the function. MachineRegisterInfo *RegInfo; @@ -107,8 +109,8 @@ typedef ilist BasicBlockListType; BasicBlockListType BasicBlocks; - // Default debug location. Used to print out the debug label at the beginning - // of a function. + /// Default debug location. Used to print out the debug label at the beginning + /// of a function. DebugLoc DefaultDebugLoc; /// FunctionNumber - This provides a unique ID for each function emitted in @@ -116,17 +118,17 @@ /// unsigned FunctionNumber; - // The alignment of the function. + /// The alignment of the function. unsigned Alignment; MachineFunction(const MachineFunction &); // DO NOT IMPLEMENT void operator=(const MachineFunction&); // DO NOT IMPLEMENT - public: MachineFunction(Function *Fn, const TargetMachine &TM, unsigned FunctionNum, - MCContext &Ctx); + MachineModuleInfo &MMI); ~MachineFunction(); + MachineModuleInfo &getMMI() const { return MMI; } MCContext &getContext() const { return Ctx; } /// getFunction - Return the LLVM function that this machine code represents Modified: llvm/trunk/lib/CodeGen/MachineFunction.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineFunction.cpp?rev=100414&r1=100413&r2=100414&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineFunction.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineFunction.cpp Mon Apr 5 00:49:50 2010 @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" @@ -51,8 +52,8 @@ } MachineFunction::MachineFunction(Function *F, const TargetMachine &TM, - unsigned FunctionNum, MCContext &ctx) - : Fn(F), Target(TM), Ctx(ctx) { + unsigned FunctionNum, MachineModuleInfo &mmi) + : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) { if (TM.getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); else Modified: llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp?rev=100414&r1=100413&r2=100414&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp Mon Apr 5 00:49:50 2010 @@ -38,7 +38,7 @@ bool MachineFunctionAnalysis::runOnFunction(Function &F) { assert(!MF && "MachineFunctionAnalysis already initialized!"); MF = new MachineFunction(&F, TM, NextFnNum++, - getAnalysis().getContext()); + getAnalysis()); return false; } From sabre at nondot.org Mon Apr 5 00:57:53 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 05:57:53 -0000 Subject: [llvm-commits] [llvm] r100415 - in /llvm/trunk: include/llvm/CodeGen/MachineFrameInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/Target/CellSPU/SPURegisterInfo.cpp lib/Target/PowerPC/PPCRegisterInfo.cpp lib/Target/X86/X86RegisterInfo.cpp lib/Target/XCore/XCoreRegisterInfo.cpp Message-ID: <20100405055753.1B4912A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 00:57:52 2010 New Revision: 100415 URL: http://llvm.org/viewvc/llvm-project?rev=100415&view=rev Log: remove the MMI pointer from MachineFrameInfo. Modified: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h?rev=100415&r1=100414&r2=100415&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h Mon Apr 5 00:57:52 2010 @@ -24,7 +24,6 @@ class TargetData; class TargetRegisterClass; class Type; -class MachineModuleInfo; class MachineFunction; class MachineBasicBlock; class TargetFrameInfo; @@ -181,13 +180,6 @@ /// spill slots. SmallVector SpillObjects; - /// MMI - This field is set (via setMachineModuleInfo) by a module info - /// consumer to indicate that frame layout information - /// should be acquired. Typically, it's the responsibility of the target's - /// TargetRegisterInfo prologue/epilogue emitting code to inform - /// MachineModuleInfo of frame layouts. - MachineModuleInfo *MMI; - /// TargetFrameInfo - Target information about frame layout. /// const TargetFrameInfo &TFI; @@ -201,7 +193,6 @@ StackProtectorIdx = -1; MaxCallFrameSize = 0; CSIValid = false; - MMI = 0; } /// hasStackObjects - Return true if there are any stack objects in this @@ -444,14 +435,6 @@ /// method always returns an empty set. BitVector getPristineRegs(const MachineBasicBlock *MBB) const; - /// getMachineModuleInfo - Used by a prologue/epilogue - /// emitter (TargetRegisterInfo) to provide frame layout information. - MachineModuleInfo *getMachineModuleInfo() const { return MMI; } - - /// setMachineModuleInfo - Used by a meta info consumer to - /// indicate that frame layout information should be gathered. - void setMachineModuleInfo(MachineModuleInfo *mmi) { MMI = mmi; } - /// print - Used by the MachineFunction printer to print information about /// stack objects. Implemented in MachineFunction.cpp /// Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=100415&r1=100414&r2=100415&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Mon Apr 5 00:57:52 2010 @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" @@ -59,11 +58,6 @@ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); FrameConstantRegMap.clear(); - // Get MachineModuleInfo so that we can track the construction of the - // frame. - if (MachineModuleInfo *MMI = getAnalysisIfAvailable()) - Fn.getFrameInfo()->setMachineModuleInfo(MMI); - // Calculate the MaxCallFrameSize and HasCalls variables for the function's // frame information. Also eliminates call frame pseudo instructions. calculateCallsInformation(Fn); Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp?rev=100415&r1=100414&r2=100415&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp Mon Apr 5 00:57:52 2010 @@ -451,11 +451,11 @@ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + MachineModuleInfo &MMI = MF.getMMI(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Prepare for debug frame info. - bool hasDebugInfo = MMI && MMI->hasDebugInfo(); + bool hasDebugInfo = MMI.hasDebugInfo(); MCSymbol *FrameLabel = 0; // Move MBBI back to the beginning of the function. @@ -473,7 +473,7 @@ FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI->getContext().CreateTempSymbol(); + FrameLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel); } @@ -516,7 +516,7 @@ } if (hasDebugInfo) { - std::vector &Moves = MMI->getFrameMoves(); + std::vector &Moves = MMI.getFrameMoves(); // Show update of SP. MachineLocation SPDst(MachineLocation::VirtualFP); @@ -535,7 +535,7 @@ } // Mark effective beginning of when frame pointer is ready. - MCSymbol *ReadyLabel = MMI->getContext().CreateTempSymbol(); + MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel); MachineLocation FPDst(SPU::R1); @@ -552,7 +552,7 @@ // Insert terminator label BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)) - .addSym(MMI->getContext().CreateTempSymbol()); + .addSym(MMI.getContext().CreateTempSymbol()); } } } Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=100415&r1=100414&r2=100415&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Mon Apr 5 00:57:52 2010 @@ -1280,9 +1280,9 @@ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + MachineModuleInfo &MMI = MF.getMMI(); DebugLoc dl; - bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || + bool needsFrameMoves = MMI.hasDebugInfo() || !MF.getFunction()->doesNotThrow() || UnwindTablesMandatory; @@ -1442,13 +1442,13 @@ } } - std::vector &Moves = MMI->getFrameMoves(); + std::vector &Moves = MMI.getFrameMoves(); // Add the "machine moves" for the instructions we generated above, but in // reverse order. if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI->getContext().CreateTempSymbol(); + FrameLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(FrameLabel); // Show update of SP. @@ -1489,7 +1489,7 @@ } if (needsFrameMoves) { - ReadyLabel = MMI->getContext().CreateTempSymbol(); + ReadyLabel = MMI.getContext().CreateTempSymbol(); // Mark effective beginning of when frame pointer is ready. BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(ReadyLabel); Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=100415&r1=100414&r2=100415&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Mon Apr 5 00:57:52 2010 @@ -297,9 +297,7 @@ bool ghcCall = false; if (MF) { - const MachineFrameInfo *MFI = MF->getFrameInfo(); - const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - callsEHReturn = (MMI ? MMI->callsEHReturn() : false); + callsEHReturn = MF->getMMI().callsEHReturn(); const Function *F = MF->getFunction(); ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); } @@ -348,12 +346,8 @@ const TargetRegisterClass* const* X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { bool callsEHReturn = false; - - if (MF) { - const MachineFrameInfo *MFI = MF->getFrameInfo(); - const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - callsEHReturn = (MMI ? MMI->callsEHReturn() : false); - } + if (MF) + callsEHReturn = MF->getMMI().callsEHReturn(); static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { &X86::GR32RegClass, &X86::GR32RegClass, @@ -443,14 +437,14 @@ /// or if frame pointer elimination is disabled. bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + const MachineModuleInfo &MMI = MF.getMMI(); return (NoFramePointerElim || needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MF.getInfo()->getForceFramePointer() || - (MMI && MMI->callsUnwindInit())); + MMI.callsUnwindInit()); } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { @@ -800,14 +794,13 @@ MCSymbol *Label, unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - if (!MMI) return; + MachineModuleInfo &MMI = MF.getMMI(); // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; - std::vector &Moves = MMI->getFrameMoves(); + std::vector &Moves = MMI.getFrameMoves(); const TargetData *TD = MF.getTarget().getTargetData(); bool HasFP = hasFP(MF); @@ -874,9 +867,9 @@ MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || + bool needsFrameMoves = MMI.hasDebugInfo() || !Fn->doesNotThrow() || UnwindTablesMandatory; uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. @@ -935,7 +928,7 @@ // REG < 64 => DW_CFA_offset + Reg // ELSE => DW_CFA_offset_extended - std::vector &Moves = MMI->getFrameMoves(); + std::vector &Moves = MMI.getFrameMoves(); const TargetData *TD = MF.getTarget().getTargetData(); uint64_t NumBytes = 0; int stackGrowth = -TD->getPointerSize(); @@ -959,7 +952,7 @@ if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. - MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); // Define the current CFA rule to use the provided offset. @@ -987,7 +980,7 @@ if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. - MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. @@ -1027,7 +1020,7 @@ if (!HasFP && needsFrameMoves) { // Mark callee-saved push instruction. - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. @@ -1099,7 +1092,7 @@ if ((NumBytes || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); if (!HasFP && NumBytes) { Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=100415&r1=100414&r2=100415&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp Mon Apr 5 00:57:52 2010 @@ -67,12 +67,8 @@ return array_lengthof(XCore_ArgRegs); } -bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) -{ - const MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - return (MMI && MMI->hasDebugInfo()) || - !MF.getFunction()->doesNotThrow() || +bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { + return MF.getMMI().hasDebugInfo() || !MF.getFunction()->doesNotThrow() || UnwindTablesMandatory; } @@ -412,7 +408,7 @@ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + MachineModuleInfo *MMI = &MF.getMMI(); XCoreFunctionInfo *XFI = MF.getInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); From sabre at nondot.org Mon Apr 5 01:05:26 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 06:05:26 -0000 Subject: [llvm-commits] [llvm] r100416 - in /llvm/trunk: include/llvm/CodeGen/FastISel.h include/llvm/Target/TargetLowering.h lib/CodeGen/SelectionDAG/FastISel.cpp lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp lib/Target/X86/X86FastISel.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h Message-ID: <20100405060526.646A12A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 01:05:26 2010 New Revision: 100416 URL: http://llvm.org/viewvc/llvm-project?rev=100416&view=rev Log: unthread MMI from FastISel Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h llvm/trunk/include/llvm/Target/TargetLowering.h llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp llvm/trunk/lib/Target/X86/X86FastISel.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FastISel.h?rev=100416&r1=100415&r2=100416&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/FastISel.h (original) +++ llvm/trunk/include/llvm/CodeGen/FastISel.h Mon Apr 5 01:05:26 2010 @@ -49,7 +49,6 @@ SmallSet &CatchInfoLost; #endif MachineFunction &MF; - MachineModuleInfo *MMI; MachineRegisterInfo &MRI; MachineFrameInfo &MFI; MachineConstantPool &MCP; @@ -114,7 +113,6 @@ protected: FastISel(MachineFunction &mf, - MachineModuleInfo *mmi, DenseMap &vm, DenseMap &bm, DenseMap &am Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=100416&r1=100415&r2=100416&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Mon Apr 5 01:05:26 2010 @@ -47,7 +47,6 @@ class MachineFrameInfo; class MachineInstr; class MachineJumpTableInfo; - class MachineModuleInfo; class MCContext; class MCExpr; class SDNode; @@ -1272,7 +1271,7 @@ /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. virtual FastISel * - createFastISel(MachineFunction &, MachineModuleInfo *, + createFastISel(MachineFunction &, DenseMap &, DenseMap &, DenseMap & Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=100416&r1=100415&r2=100416&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Mon Apr 5 01:05:26 2010 @@ -326,7 +326,7 @@ case Intrinsic::dbg_declare: { DbgDeclareInst *DI = cast(I); if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) || - !MMI->hasDebugInfo()) + !MF.getMMI().hasDebugInfo()) return true; Value *Address = DI->getAddress(); @@ -340,7 +340,7 @@ if (SI == StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; if (!DI->getDebugLoc().isUnknown()) - MMI->setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); + MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. @@ -399,44 +399,39 @@ switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { default: break; case TargetLowering::Expand: { - if (MMI) { - if (MBB->isLandingPad()) - AddCatchInfo(*cast(I), MMI, MBB); - else { + if (MBB->isLandingPad()) + AddCatchInfo(*cast(I), &MF.getMMI(), MBB); + else { #ifndef NDEBUG - CatchInfoLost.insert(cast(I)); + CatchInfoLost.insert(cast(I)); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) MBB->addLiveIn(Reg); - } - + // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, - RC, RC); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - UpdateValueMap(I, ResultReg); - } else { - unsigned ResultReg = - getRegForValue(Constant::getNullValue(I->getType())); - UpdateValueMap(I, ResultReg); + if (Reg) MBB->addLiveIn(Reg); } + + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, + RC, RC); + assert(InsertedCopy && "Can't copy address registers!"); + InsertedCopy = InsertedCopy; + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + UpdateValueMap(I, ResultReg); + return true; } } @@ -733,7 +728,6 @@ } FastISel::FastISel(MachineFunction &mf, - MachineModuleInfo *mmi, DenseMap &vm, DenseMap &bm, DenseMap &am @@ -749,7 +743,6 @@ CatchInfoLost(cil), #endif MF(mf), - MMI(mmi), MRI(MF.getRegInfo()), MFI(*MF.getFrameInfo()), MCP(*MF.getConstantPool()), Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=100416&r1=100415&r2=100416&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Mon Apr 5 01:05:26 2010 @@ -845,9 +845,7 @@ // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (EnableFastISel) - FastIS = TLI.createFastISel(MF, MMI, - FuncInfo->ValueMap, - FuncInfo->MBBMap, + FastIS = TLI.createFastISel(MF, FuncInfo->ValueMap, FuncInfo->MBBMap, FuncInfo->StaticAllocaMap #ifndef NDEBUG , FuncInfo->CatchInfoLost Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=100416&r1=100415&r2=100416&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original) +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Apr 5 01:05:26 2010 @@ -54,7 +54,6 @@ public: explicit X86FastISel(MachineFunction &mf, - MachineModuleInfo *mmi, DenseMap &vm, DenseMap &bm, DenseMap &am @@ -62,7 +61,7 @@ , SmallSet &cil #endif ) - : FastISel(mf, mmi, vm, bm, am + : FastISel(mf, vm, bm, am #ifndef NDEBUG , cil #endif @@ -1752,7 +1751,6 @@ namespace llvm { llvm::FastISel *X86::createFastISel(MachineFunction &mf, - MachineModuleInfo *mmi, DenseMap &vm, DenseMap &bm, DenseMap &am @@ -1760,7 +1758,7 @@ , SmallSet &cil #endif ) { - return new X86FastISel(mf, mmi, vm, bm, am + return new X86FastISel(mf, vm, bm, am #ifndef NDEBUG , cil #endif Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=100416&r1=100415&r2=100416&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 5 01:05:26 2010 @@ -2398,7 +2398,7 @@ } FastISel * -X86TargetLowering::createFastISel(MachineFunction &mf, MachineModuleInfo *mmo, +X86TargetLowering::createFastISel(MachineFunction &mf, DenseMap &vm, DenseMap &bm, DenseMap &am @@ -2406,7 +2406,7 @@ , SmallSet &cil #endif ) { - return X86::createFastISel(mf, mmo, vm, bm, am + return X86::createFastISel(mf, vm, bm, am #ifndef NDEBUG , cil #endif Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=100416&r1=100415&r2=100416&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Apr 5 01:05:26 2010 @@ -574,7 +574,7 @@ /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. virtual FastISel * - createFastISel(MachineFunction &mf, MachineModuleInfo *mmi, + createFastISel(MachineFunction &mf, DenseMap &, DenseMap &, DenseMap & @@ -815,7 +815,6 @@ namespace X86 { FastISel *createFastISel(MachineFunction &mf, - MachineModuleInfo *mmi, DenseMap &, DenseMap &, DenseMap & From sabre at nondot.org Mon Apr 5 01:10:14 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 06:10:14 -0000 Subject: [llvm-commits] [llvm] r100417 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h include/llvm/CodeGen/SelectionDAGISel.h lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Message-ID: <20100405061014.1DFE22A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 01:10:13 2010 New Revision: 100417 URL: http://llvm.org/viewvc/llvm-project?rev=100417&view=rev Log: remove some redundant MMI arguments. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=100417&r1=100416&r2=100417&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Apr 5 01:10:13 2010 @@ -180,7 +180,7 @@ /// init - Prepare this SelectionDAG to process code in the given /// MachineFunction. /// - void init(MachineFunction &mf, MachineModuleInfo *mmi); + void init(MachineFunction &mf); /// clear - Clear state and free memory necessary to make this /// SelectionDAG ready to process a new block. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h?rev=100417&r1=100416&r2=100417&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h Mon Apr 5 01:10:13 2010 @@ -29,7 +29,6 @@ class MachineBasicBlock; class MachineFunction; class MachineInstr; - class MachineModuleInfo; class TargetLowering; class TargetInstrInfo; class FunctionLoweringInfo; @@ -283,7 +282,6 @@ const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo); void SelectAllBasicBlocks(Function &Fn, MachineFunction &MF, - MachineModuleInfo *MMI, const TargetInstrInfo &TII); void FinishBasicBlock(); Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100417&r1=100416&r2=100417&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Apr 5 01:10:13 2010 @@ -801,9 +801,9 @@ DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi) { +void SelectionDAG::init(MachineFunction &mf) { MF = &mf; - MMI = mmi; + MMI = &mf.getMMI(); Context = &mf.getFunction()->getContext(); } @@ -2256,8 +2256,7 @@ if (GA->getOffset() != 0) return false; GlobalVariable *GV = dyn_cast(GA->getGlobal()); if (!GV) return false; - MachineModuleInfo *MMI = getMachineModuleInfo(); - return MMI && MMI->hasDebugInfo(); + return MMI->hasDebugInfo(); } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=100417&r1=100416&r2=100417&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Mon Apr 5 01:10:13 2010 @@ -328,8 +328,7 @@ RegInfo = &MF->getRegInfo(); DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - MachineModuleInfo *MMI = getAnalysisIfAvailable(); - CurDAG->init(*MF, MMI); + CurDAG->init(*MF); FuncInfo->set(Fn, *MF, EnableFastISel); SDB->init(GFI, *AA); @@ -338,7 +337,7 @@ // Mark landing pad. FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); - SelectAllBasicBlocks(Fn, *MF, MMI, TII); + SelectAllBasicBlocks(Fn, *MF, TII); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before @@ -840,7 +839,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, MachineFunction &MF, - MachineModuleInfo *MMI, const TargetInstrInfo &TII) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -881,10 +879,10 @@ } } - if (MMI && BB->isLandingPad()) { + if (BB->isLandingPad()) { // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MMI->addLandingPad(BB); + MCSymbol *Label = MF.getMMI().addLandingPad(BB); const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL); BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); @@ -918,7 +916,7 @@ if (I == E) // No catch info found - try to extract some from the successor. - CopyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo); + CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF.getMMI(), *FuncInfo); } } From sabre at nondot.org Mon Apr 5 01:12:01 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 06:12:01 -0000 Subject: [llvm-commits] [llvm] r100418 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Message-ID: <20100405061201.EDA0C2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 01:12:01 2010 New Revision: 100418 URL: http://llvm.org/viewvc/llvm-project?rev=100418&view=rev Log: hopefully sate the clang self host build, which is apparently instantiating some folding set stuff that GCC isn't, requiring some types to not be incomplete. I don't know if clang is right or wrong, but unbreaking the bot is goodness. Here's the broken build: http://google1.osuosl.org:8011/builders/clang-x86_64-darwin10-selfhost/builds/1813/steps/compile.llvm.stage2/logs/stdio Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100418&r1=100417&r2=100418&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Apr 5 01:12:01 2010 @@ -15,6 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #include "llvm/CodeGen/AsmPrinter.h" +#include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" From sabre at nondot.org Mon Apr 5 01:19:28 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 06:19:28 -0000 Subject: [llvm-commits] [llvm] r100419 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Message-ID: <20100405061928.C62CF2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 01:19:28 2010 New Revision: 100419 URL: http://llvm.org/viewvc/llvm-project?rev=100419&view=rev Log: remove the now-redundant MMI pointer in SelectionDAG. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=100419&r1=100418&r2=100419&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Apr 5 01:19:28 2010 @@ -32,7 +32,6 @@ class FunctionLoweringInfo; class MachineConstantPoolValue; class MachineFunction; -class MachineModuleInfo; class MDNode; class SDNodeOrdering; class SDDbgValue; @@ -121,8 +120,7 @@ TargetLowering &TLI; MachineFunction *MF; FunctionLoweringInfo &FLI; - MachineModuleInfo *MMI; - LLVMContext* Context; + LLVMContext *Context; /// EntryNode - The starting token. SDNode EntryNode; @@ -191,7 +189,6 @@ const TargetMachine &getTarget() const; TargetLowering &getTargetLoweringInfo() const { return TLI; } FunctionLoweringInfo &getFunctionLoweringInfo() const { return FLI; } - MachineModuleInfo *getMachineModuleInfo() const { return MMI; } LLVMContext *getContext() const {return Context; } /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100419&r1=100418&r2=100419&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Apr 5 01:19:28 2010 @@ -803,7 +803,6 @@ void SelectionDAG::init(MachineFunction &mf) { MF = &mf; - MMI = &mf.getMMI(); Context = &mf.getFunction()->getContext(); } @@ -2256,7 +2255,7 @@ if (GA->getOffset() != 0) return false; GlobalVariable *GV = dyn_cast(GA->getGlobal()); if (!GV) return false; - return MMI->hasDebugInfo(); + return MF->getMMI().hasDebugInfo(); } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=100419&r1=100418&r2=100419&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Mon Apr 5 01:19:28 2010 @@ -3817,9 +3817,9 @@ return 0; // VLAs. int FI = SI->second; - if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) - if (!DI.getDebugLoc().isUnknown() && MMI->hasDebugInfo()) - MMI->setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) + MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); return 0; } case Intrinsic::dbg_value: { @@ -3867,9 +3867,9 @@ return 0; // VLAs. int FI = SI->second; - if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) - if (!DI.getDebugLoc().isUnknown() && MMI->hasDebugInfo()) - MMI->setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) + MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); return 0; } case Intrinsic::eh_exception: { @@ -3885,10 +3885,9 @@ } case Intrinsic::eh_selector: { - MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (CurMBB->isLandingPad()) - AddCatchInfo(I, MMI, CurMBB); + AddCatchInfo(I, &MMI, CurMBB); else { #ifndef NDEBUG FuncInfo.CatchInfoLost.insert(&I); @@ -3910,40 +3909,25 @@ } case Intrinsic::eh_typeid_for: { - MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - - if (MMI) { - // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); - unsigned TypeID = MMI->getTypeIDFor(GV); - Res = DAG.getConstant(TypeID, MVT::i32); - } else { - // Return something different to eh_selector. - Res = DAG.getConstant(1, MVT::i32); - } - + // Find the type id for the given typeinfo. + GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); + Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); return 0; } case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: - if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { - MMI->setCallsEHReturn(true); - DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, - MVT::Other, - getControlRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)))); - } else { - setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); - } - + DAG.getMachineFunction().getMMI().setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + MVT::Other, + getControlRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); return 0; case Intrinsic::eh_unwind_init: - if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { - MMI->setCallsUnwindInit(true); - } + DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { EVT VT = getValue(I.getOperand(1)).getValueType(); @@ -3962,12 +3946,12 @@ return 0; } case Intrinsic::eh_sjlj_callsite: { - MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast(I.getOperand(1)); assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); - assert(MMI->getCurrentCallSite() == 0 && "Overlapping call sites!"); + assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); - MMI->setCurrentCallSite(CI->getZExtValue()); + MMI.setCurrentCallSite(CI->getZExtValue()); return 0; } @@ -4352,7 +4336,7 @@ const PointerType *PT = cast(CS.getCalledValue()->getType()); const FunctionType *FTy = cast(PT->getElementType()); const Type *RetTy = FTy->getReturnType(); - MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = 0; TargetLowering::ArgListTy Args; @@ -4410,18 +4394,18 @@ Args.push_back(Entry); } - if (LandingPad && MMI) { + if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI->getContext().CreateTempSymbol(); + BeginLabel = MMI.getContext().CreateTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. - unsigned CallSiteIndex = MMI->getCurrentCallSite(); + unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { - MMI->setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); // Now that the call site is handled, stop tracking it. - MMI->setCurrentCallSite(0); + MMI.setCurrentCallSite(0); } // Both PendingLoads and PendingExports must be flushed here; @@ -4512,14 +4496,14 @@ else HasTailCall = true; - if (LandingPad && MMI) { + if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI->getContext().CreateTempSymbol(); + MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - MMI->addInvoke(LandingPad, BeginLabel, EndLabel); + MMI.addInvoke(LandingPad, BeginLabel, EndLabel); } } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h?rev=100419&r1=100418&r2=100419&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Mon Apr 5 01:19:28 2010 @@ -56,7 +56,6 @@ class Instruction; class LoadInst; class MachineBasicBlock; -class MachineFunction; class MachineInstr; class MachineRegisterInfo; class PHINode; From sabre at nondot.org Mon Apr 5 01:21:01 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 06:21:01 -0000 Subject: [llvm-commits] [llvm] r100420 - in /llvm/trunk/include/llvm/CodeGen: FastISel.h ScheduleDAG.h Message-ID: <20100405062101.CF3052A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 01:21:01 2010 New Revision: 100420 URL: http://llvm.org/viewvc/llvm-project?rev=100420&view=rev Log: trim some prototypes. Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FastISel.h?rev=100420&r1=100419&r2=100420&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/FastISel.h (original) +++ llvm/trunk/include/llvm/CodeGen/FastISel.h Mon Apr 5 01:21:01 2010 @@ -27,7 +27,6 @@ class MachineConstantPool; class MachineFunction; class MachineFrameInfo; -class MachineModuleInfo; class MachineRegisterInfo; class TargetData; class TargetInstrInfo; Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=100420&r1=100419&r2=100420&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Mon Apr 5 01:21:01 2010 @@ -27,7 +27,6 @@ class SUnit; class MachineConstantPool; class MachineFunction; - class MachineModuleInfo; class MachineRegisterInfo; class MachineInstr; class TargetRegisterInfo; From benny.kra at googlemail.com Mon Apr 5 05:17:15 2010 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Mon, 05 Apr 2010 10:17:15 -0000 Subject: [llvm-commits] [llvm] r100423 - /llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp Message-ID: <20100405101715.D29C12A6C12C@llvm.org> Author: d0k Date: Mon Apr 5 05:17:15 2010 New Revision: 100423 URL: http://llvm.org/viewvc/llvm-project?rev=100423&view=rev Log: Disambiguate else. Modified: llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp Modified: llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp?rev=100423&r1=100422&r2=100423&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp Mon Apr 5 05:17:15 2010 @@ -347,11 +347,12 @@ OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); // TODO: handle O64 ABI - if (Subtarget->isABI_EABI()) + if (Subtarget->isABI_EABI()) { if (Subtarget->isGP32bit()) OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32")); else OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64")); + } // return to previous section OutStreamer.EmitRawText(StringRef("\t.previous")); From baldrick at free.fr Mon Apr 5 06:44:05 2010 From: baldrick at free.fr (Duncan Sands) Date: Mon, 05 Apr 2010 11:44:05 -0000 Subject: [llvm-commits] [dragonegg] r100424 - in /dragonegg/trunk: llvm-convert.cpp llvm-internal.h Message-ID: <20100405114405.82AAE2A6C12C@llvm.org> Author: baldrick Date: Mon Apr 5 06:44:05 2010 New Revision: 100424 URL: http://llvm.org/viewvc/llvm-project?rev=100424&view=rev Log: Implement GIMPLE_EH_DISPATCH, which means branching to the appropriate post landing pad depending on the selector value. At the same time, clear up some confusion between landing pad and exception region numbers. Finally, avoid pointless work when a type info occurs more than once. Modified: dragonegg/trunk/llvm-convert.cpp dragonegg/trunk/llvm-internal.h Modified: dragonegg/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-convert.cpp?rev=100424&r1=100423&r2=100424&view=diff ============================================================================== --- dragonegg/trunk/llvm-convert.cpp (original) +++ dragonegg/trunk/llvm-convert.cpp Mon Apr 5 06:44:05 2010 @@ -257,8 +257,6 @@ AllocaInsertionPoint = 0; - FuncEHGetTypeID = 0; - assert(TheTreeToLLVM == 0 && "Reentering function creation?"); TheTreeToLLVM = this; } @@ -1145,7 +1143,7 @@ break; case GIMPLE_EH_DISPATCH: - // TODO: Implement this. + RenderGIMPLE_EH_DISPATCH(stmt); break; case GIMPLE_GOTO: @@ -1834,15 +1832,6 @@ // ... Control Flow ... //===----------------------------------------------------------------------===// -/// CreateExceptionValues - Create values used internally by exception handling. -void TreeToLLVM::CreateExceptionValues() { - // Check to see if the exception values have been constructed. - if (FuncEHGetTypeID) return; - - FuncEHGetTypeID = Intrinsic::getDeclaration(TheModule, - Intrinsic::eh_typeid_for); -} - /// getExceptionPtr - Return the local holding the exception pointer for the /// given exception handling region, creating it if necessary. AllocaInst *TreeToLLVM::getExceptionPtr(unsigned RegionNo) { @@ -1868,7 +1857,7 @@ AllocaInst *&ExceptionFilter = ExceptionFilters[RegionNo]; if (!ExceptionFilter) { - ExceptionFilter = CreateTemporary(Type::getInt32PtrTy(Context)); + ExceptionFilter = CreateTemporary(Type::getInt32Ty(Context)); ExceptionFilter->setName("filt_tmp"); } @@ -1881,40 +1870,40 @@ if (Invokes.empty()) return; - // If a GCC landing pad is shared by several exception handling regions, or if - // there is a normal edge to it, then create an LLVM landing pad for each eh - // region. Calls to eh.exception and eh.selector will be placed in the LLVM - // landing pad, which branches to the GCC landing pad. - for (unsigned RegionNo = 1; RegionNo < Invokes.size(); ++RegionNo){ - // Get the list of invokes for this exception handling region. - SmallVector &InvokesForRegion = Invokes[RegionNo]; + // If a GCC post landing pad is shared by several exception handling regions, + // or if there is a normal edge to it, then create LLVM landing pads for each + // eh region. Calls to eh.exception and eh.selector will then go in the LLVM + // landing pad, which branches to the GCC post landing pad. + for (unsigned LPadNo = 1; LPadNo < Invokes.size(); ++LPadNo) { + // Get the list of invokes for this GCC landing pad. + SmallVector &InvokesForPad = Invokes[LPadNo]; - if (InvokesForRegion.empty()) + if (InvokesForPad.empty()) continue; - // All of the invokes unwind to the same basic block: the GCC landing pad. - BasicBlock *OldDest = InvokesForRegion[0]->getUnwindDest(); + // All of the invokes unwind to the GCC post landing pad. + BasicBlock *PostPad = InvokesForPad[0]->getUnwindDest(); // If the number of invokes is equal to the number of predecessors of the - // landing pad then it follows that no other exception handing region has - // invokes that unwind to this GCC landing pad, and also that there are no - // normal edges to the landing pad. In this common case there is no need - // to create an LLVM specific landing pad. - if ((unsigned)std::distance(pred_begin(OldDest), pred_end(OldDest)) == - InvokesForRegion.size()) + // post landing pad then it follows that no other GCC landing pad has any + // invokes that unwind to this post landing pad, and also that no normal + // edges land at this post pad. In this case there is no need to create + // an LLVM specific landing pad. + if ((unsigned)std::distance(pred_begin(PostPad), pred_end(PostPad)) == + InvokesForPad.size()) continue; - // Create the LLVM landing pad right before the GCC landing pad. - BasicBlock *NewDest = BasicBlock::Create(Context, "lpad", Fn, OldDest); + // Create the LLVM landing pad right before the GCC post landing pad. + BasicBlock *LPad = BasicBlock::Create(Context, "lpad", Fn, PostPad); - // Redirect invoke unwind edges from the GCC landing pad to NewDest. - for (unsigned i = 0, e = InvokesForRegion.size(); i < e; ++i) - InvokesForRegion[i]->setSuccessor(1, NewDest); - - // If there are any PHI nodes in OldDest, we need to update them to merge - // incoming values from NewDest instead. - pred_iterator PB = pred_begin(NewDest), PE = pred_end(NewDest); - for (BasicBlock::iterator II = OldDest->begin(); isa(II);) { + // Redirect invoke unwind edges from the GCC post landing pad to LPad. + for (unsigned i = 0, e = InvokesForPad.size(); i < e; ++i) + InvokesForPad[i]->setSuccessor(1, LPad); + + // If there are any PHI nodes in PostPad, we need to update them to merge + // incoming values from LPad instead. + pred_iterator PB = pred_begin(LPad), PE = pred_end(LPad); + for (BasicBlock::iterator II = PostPad->begin(); isa(II);) { PHINode *PN = cast(II++); // Check to see if all of the values coming in via invoke unwind edges are @@ -1929,26 +1918,26 @@ if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node - // in NewDest. + // in LPad. PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".lpad", - NewDest); + LPad); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); - // Now use this new PHI as the common incoming value for NewDest in PN. + // Now use this new PHI as the common incoming value for LPad in PN. InVal = NewPN; } - // Revector exactly one entry in the PHI node to come from NewDest and + // Revector exactly one entry in the PHI node to come from LPad and // delete the entries that came from the invoke unwind edges. for (pred_iterator PI = PB; PI != PE; ++PI) PN->removeIncomingValue(*PI); - PN->addIncoming(InVal, NewDest); + PN->addIncoming(InVal, LPad); } - // Add a fallthrough from NewDest to the original landing pad. - BranchInst::Create(OldDest, NewDest); + // Add a fallthrough from LPad to the original landing pad. + BranchInst::Create(PostPad, LPad); } // Initialize the exception pointer and selector value for each exception @@ -1956,33 +1945,36 @@ // point each exception handling region has its own landing pad, which is // only reachable via the unwind edges of the region's invokes. std::vector Args; - Function *EHException = Intrinsic::getDeclaration(TheModule, - Intrinsic::eh_exception); - Function *EHSelector = Intrinsic::getDeclaration(TheModule, - Intrinsic::eh_selector); - for (unsigned RegionNo = 1; RegionNo < Invokes.size(); ++RegionNo){ - // Get the list of invokes for this exception handling region. - SmallVector &InvokesForRegion = Invokes[RegionNo]; + Function *ExcIntr = Intrinsic::getDeclaration(TheModule, + Intrinsic::eh_exception); + Function *SelectorIntr = Intrinsic::getDeclaration(TheModule, + Intrinsic::eh_selector); + for (unsigned LPadNo = 1; LPadNo < Invokes.size(); ++LPadNo) { + // Get the list of invokes for this GCC landing pad. + SmallVector &InvokesForPad = Invokes[LPadNo]; - if (InvokesForRegion.empty()) + if (InvokesForPad.empty()) continue; - // All of the invokes unwind to the same basic block: the landing pad. - BasicBlock *LPad = InvokesForRegion[0]->getUnwindDest(); + // All of the invokes unwind to the the landing pad. + BasicBlock *LPad = InvokesForPad[0]->getUnwindDest(); + + // The exception handling region this landing pad is for. + eh_region region = get_eh_region_from_lp_number(LPadNo); + assert(region->index > 0 && "Invalid landing pad region!"); + unsigned RegionNo = region->index; // Insert instructions at the start of the landing pad, but after any phis. Builder.SetInsertPoint(LPad, LPad->getFirstNonPHI()); // Fetch the exception pointer. - Value *ExcPtr = Builder.CreateCall(EHException, "exc_ptr"); + Value *ExcPtr = Builder.CreateCall(ExcIntr, "exc_ptr"); // Store it if made use of elsewhere. if (RegionNo < ExceptionPtrs.size() && ExceptionPtrs[RegionNo]) Builder.CreateStore(ExcPtr, ExceptionPtrs[RegionNo]); - // Fetch and store the exception selector. - - // The first argument is the exception pointer. + // Get the exception selector. The first argument is the exception pointer. Args.push_back(ExcPtr); // It is followed by the personality function. @@ -1995,47 +1987,64 @@ Args.push_back(Builder.CreateBitCast(DECL_LLVM(personality), Type::getInt8PtrTy(Context))); - for (eh_region r = get_eh_region_from_number(RegionNo); r; r = r->outer) - switch (r->type) { + bool AllCaught = false; // Did we saw a catch-all or no-throw? + SmallSet AlreadyCaught; // Typeinfos known caught already. + for (; region && !AllCaught; region = region->outer) + switch (region->type) { case ERT_ALLOWED_EXCEPTIONS: { - // Filter - note the length. - tree TypeList = r->u.allowed.type_list; - unsigned Length = list_length(TypeList); - Args.reserve(Args.size() + Length + 1); - Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), Length + 1)); + // Filter. + + // Push a fake placeholder value for the length. The real length is + // computed below, once we know which typeinfos we are going to use. + unsigned LengthIndex = Args.size(); + Args.push_back(NULL); // Fake length value. // Add the type infos. - for (; TypeList; TypeList = TREE_CHAIN(TypeList)) { - tree TType = lookup_type_for_runtime(TREE_VALUE(TypeList)); - Args.push_back(TreeConstantToLLVM::Convert(TType)); + AllCaught = true; + for (tree type = region->u.allowed.type_list; type; + type = TREE_CHAIN(type)) { + tree value = lookup_type_for_runtime(TREE_VALUE(type)); + Constant *TypeInfo = TreeConstantToLLVM::Convert(value); + // No point in permitting a typeinfo to be thrown if we know it can + // never reach the filter. + if (AlreadyCaught.count(TypeInfo)) + continue; + Args.push_back(TypeInfo); + AllCaught = false; } + + // The length is one more than the number of typeinfos. + Args[LengthIndex] = ConstantInt::get(Type::getInt32Ty(Context), + Args.size() - LengthIndex); + break; } case ERT_CLEANUP: break; case ERT_MUST_NOT_THROW: // Same as a zero-length filter. + AllCaught = true; Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), 1)); break; - case ERT_TRY: { + case ERT_TRY: // Catches. - - for (eh_catch c = r->u.eh_try.first_catch; c ; c = c->next_catch) { - tree TypeList = c->type_list; - - if (!TypeList) + for (eh_catch c = region->u.eh_try.first_catch; c ; c = c->next_catch) + if (!c->type_list) { // Catch-all - push a null pointer. - Args.push_back( - Constant::getNullValue(Type::getInt8PtrTy(Context)) - ); - else + AllCaught = true; + Args.push_back(Constant::getNullValue(Type::getInt8PtrTy(Context))); + } else { // Add the type infos. - for (; TypeList; TypeList = TREE_CHAIN(TypeList)) { - tree TType = lookup_type_for_runtime(TREE_VALUE(TypeList)); - Args.push_back(TreeConstantToLLVM::Convert(TType)); + for (tree type = c->type_list; type; type = TREE_CHAIN(type)) { + tree value = lookup_type_for_runtime(TREE_VALUE(type)); + Constant *TypeInfo = TreeConstantToLLVM::Convert(value); + // No point in trying to catch a typeinfo that was already caught. + if (!AlreadyCaught.insert(TypeInfo)) + continue; + Args.push_back(TypeInfo); } - } + } + break; } - } //FIXME if (can_throw_external_1(i, false, false)) { //FIXME // Some exceptions from this region may not be caught by any handler. @@ -2061,7 +2070,7 @@ //FIXME } // Emit the selector call. - Value *Filter = Builder.CreateCall(EHSelector, Args.begin(), Args.end(), + Value *Filter = Builder.CreateCall(SelectorIntr, Args.begin(), Args.end(), "filter"); // Store it if made use of elsewhere. @@ -2074,125 +2083,9 @@ Invokes.clear(); } -//FIXME/// EmitPostPads - Emit EH post landing pads. -//FIXMEvoid TreeToLLVM::EmitPostPads() { -//FIXME std::vector Handlers; -//FIXME -//FIXME for (unsigned i = 1; i < PostPads.size(); ++i) { -//FIXME BasicBlock *PostPad = PostPads[i]; -//FIXME -//FIXME if (!PostPad) -//FIXME continue; -//FIXME -//FIXME CreateExceptionValues(); -//FIXME -//FIXME BeginBlock(PostPad); -//FIXMEBuilder.CreateUnreachable(); -//FIXME -//FIXME eh_region region = get_eh_region(i); -//FIXME BasicBlock *Dest = getLabelDeclBlock(get_eh_region_tree_label(region)); -//FIXME -//FIXME int RegionKind = classify_eh_handler(region); -//FIXME if (!RegionKind || !get_eh_type_list(region)) { -//FIXME // Cleanup, catch-all or empty filter - no testing required. -//FIXME Builder.CreateBr(Dest); -//FIXME continue; -//FIXME } else if (RegionKind < 0) { -//FIXME // Filter - the result of a filter selection will be a negative index if -//FIXME // there is a match. -//FIXME Value *Select = Builder.CreateLoad(ExceptionSelectorValue); -//FIXME -//FIXME // Compare with the filter action value. -//FIXME Value *Zero = ConstantInt::get(Select->getType(), 0); -//FIXME Value *Compare = Builder.CreateICmpSLT(Select, Zero); -//FIXME -//FIXME // Branch on the compare. -//FIXME BasicBlock *NoFilterBB = BasicBlock::Create(Context, "nofilter"); -//FIXME Builder.CreateCondBr(Compare, Dest, NoFilterBB); -//FIXME BeginBlock(NoFilterBB); -//FIXME } else if (RegionKind > 0) { -//FIXME // Catch -//FIXME tree TypeList = get_eh_type_list(region); -//FIXME -//FIXME Value *Cond = NULL; -//FIXME for (; TypeList; TypeList = TREE_CHAIN (TypeList)) { -//FIXME Value *TType = EmitRegister(lookup_type_for_runtime(TREE_VALUE(TypeList))); -//FIXME TType = Builder.CreateBitCast(TType, -//FIXME Type::getInt8PtrTy(Context)); -//FIXME -//FIXME // Call get eh type id. -//FIXME Value *TypeID = Builder.CreateCall(FuncEHGetTypeID, TType, "eh_typeid"); -//FIXME Value *Select = Builder.CreateLoad(ExceptionSelectorValue); -//FIXME -//FIXME // Compare with the exception selector. -//FIXME Value *Compare = Builder.CreateICmpEQ(Select, TypeID); -//FIXME -//FIXME Cond = Cond ? Builder.CreateOr(Cond, Compare) : Compare; -//FIXME } -//FIXME -//FIXME BasicBlock *NoCatchBB = NULL; -//FIXME -//FIXME // If the comparion fails, branch to the next catch that has a -//FIXME // post landing pad. -//FIXME eh_region next_catch = get_eh_next_catch(region); -//FIXME for (; next_catch; next_catch = get_eh_next_catch(next_catch)) { -//FIXME unsigned CatchNo = get_eh_region_number(next_catch); -//FIXME -//FIXME if (CatchNo < PostPads.size()) -//FIXME NoCatchBB = PostPads[CatchNo]; -//FIXME -//FIXME if (NoCatchBB) -//FIXME break; -//FIXME } -//FIXME -//FIXME if (NoCatchBB) { -//FIXME // Branch on the compare. -//FIXME Builder.CreateCondBr(Cond, Dest, NoCatchBB); -//FIXME continue; -//FIXME } -//FIXME -//FIXME // If there is no such catch, execute a RESX if the comparison fails. -//FIXME NoCatchBB = BasicBlock::Create(Context, "nocatch"); -//FIXME // Branch on the compare. -//FIXME Builder.CreateCondBr(Cond, Dest, NoCatchBB); -//FIXME BeginBlock(NoCatchBB); -//FIXME } -//FIXME -//FIXME // Emit a RESX_EXPR which skips handlers with no post landing pad. -//FIXME foreach_reachable_handler(i, true, false, AddHandler, &Handlers); -//FIXME -//FIXME BasicBlock *TargetBB = NULL; -//FIXME -//FIXME for (std::vector::iterator I = Handlers.begin(), -//FIXME E = Handlers.end(); I != E; ++I) { -//FIXME unsigned UnwindNo = get_eh_region_number(*I); -//FIXME -//FIXME if (UnwindNo < PostPads.size()) -//FIXME TargetBB = PostPads[UnwindNo]; -//FIXME -//FIXME if (TargetBB) -//FIXME break; -//FIXME } -//FIXME -//FIXME if (TargetBB) { -//FIXME Builder.CreateBr(TargetBB); -//FIXME } else { -//FIXME assert(can_throw_external_1(i, true, false) && -//FIXME "Must-not-throw region handled by runtime?"); -//FIXME // Unwinding continues in the caller. -//FIXME if (!UnwindBB) -//FIXME UnwindBB = BasicBlock::Create(Context, "Unwind"); -//FIXME Builder.CreateBr(UnwindBB); -//FIXME } -//FIXME -//FIXME Handlers.clear(); -//FIXME } -//FIXME} - /// EmitUnwindBlock - Emit the lazily created EH unwind block. void TreeToLLVM::EmitUnwindBlock() { if (UnwindBB) { - CreateExceptionValues(); BeginBlock(UnwindBB); abort();//FIXME //FIXME // Fetch and store exception handler. @@ -2701,7 +2594,7 @@ Value *TreeToLLVM::EmitCallOf(Value *Callee, gimple stmt, const MemRef *DestLoc, const AttrListPtr &InPAL) { BasicBlock *LandingPad = 0; // Non-zero indicates an invoke. - int RegionNo = 0; // Non-zero if contained in an exception handling region. + int LPadNo = 0; AttrListPtr PAL = InPAL; if (PAL.isEmpty() && isa(Callee)) @@ -2715,15 +2608,15 @@ if (!PAL.paramHasAttr(~0, Attribute::NoUnwind)) { // This call may throw. Determine if we need to generate // an invoke rather than a simple call. - RegionNo = lookup_stmt_eh_lp(stmt); + LPadNo = lookup_stmt_eh_lp(stmt); // Is the call in an exception handling region with a landing pad? - if (RegionNo > 0) { + if (LPadNo > 0) { // Generate an invoke, with the GCC landing pad as the unwind destination. // The destination may change to an LLVM only landing pad, which precedes // the GCC one, after phi nodes have been populated (doing things this way // simplifies the generation of phi nodes). - eh_landing_pad lp = get_eh_landing_pad_from_number(RegionNo); + eh_landing_pad lp = get_eh_landing_pad_from_number(LPadNo); assert(lp && "Post landing pad not found!"); LandingPad = getLabelDeclBlock(lp->post_landing_pad); } @@ -2847,10 +2740,10 @@ // precedes the GCC one, after phi nodes have been populated (doing things // this way simplifies the generation of phi nodes). Record the invoke as // well as the GCC exception handling region. - assert(RegionNo > 0 && "Invoke but no GCC landing pad?"); - if ((unsigned)RegionNo >= Invokes.size()) - Invokes.resize(RegionNo + 1); - Invokes[RegionNo].push_back(cast(Call)); + assert(LPadNo > 0 && "Invoke but no GCC landing pad?"); + if ((unsigned)LPadNo >= Invokes.size()) + Invokes.resize(LPadNo + 1); + Invokes[LPadNo].push_back(cast(Call)); BeginBlock(NextBlock); } @@ -7093,6 +6986,86 @@ Builder.CreateCondBr(Cond, IfTrue, IfFalse); } +void TreeToLLVM::RenderGIMPLE_EH_DISPATCH(gimple stmt) { + int RegionNo = gimple_eh_dispatch_region(stmt); + eh_region region = get_eh_region_from_number(RegionNo); + + switch (region->type) { + default: + llvm_unreachable("Unexpected region type!"); + case ERT_ALLOWED_EXCEPTIONS: { + // Filter. + BasicBlock *Dest = getLabelDeclBlock(region->u.allowed.label); + + if (!region->u.allowed.type_list) { + // Not allowed to throw. Branch directly to the post landing pad. + Builder.CreateBr(Dest); + BeginBlock(BasicBlock::Create(Context)); + break; + } + + // The result of a filter selection will be a negative index if there is a + // match. + // FIXME: It looks like you have to compare against a specific value, + // checking for any old negative number is not enough! + Value *Filter = Builder.CreateLoad(getExceptionFilter(RegionNo)); + + // Compare with the filter action value. + Value *Zero = ConstantInt::get(Filter->getType(), 0); + Value *Compare = Builder.CreateICmpSLT(Filter, Zero); + + // Branch on the compare. + BasicBlock *NoMatchBB = BasicBlock::Create(Context); + Builder.CreateCondBr(Compare, Dest, NoMatchBB); + BeginBlock(NoMatchBB); + break; + } + case ERT_TRY: + // Catches. + Value *Filter = NULL; + SmallSet AlreadyCaught; // Typeinfos known caught. + Function *TypeIDIntr = Intrinsic::getDeclaration(TheModule, + Intrinsic::eh_typeid_for); + for (eh_catch c = region->u.eh_try.first_catch; c ; c = c->next_catch) { + BasicBlock *Dest = getLabelDeclBlock(c->label); + if (!c->type_list) { + // Catch-all. Branch directly to the post landing pad. + Builder.CreateBr(Dest); + break; + } + + Value *Cond = NULL; + for (tree type = c->type_list; type; type = TREE_CHAIN (type)) { + tree value = lookup_type_for_runtime(TREE_VALUE(type)); + Value *TypeInfo = TreeConstantToLLVM::Convert(value); + // No point in trying to catch a typeinfo that was already caught. + if (!AlreadyCaught.insert(TypeInfo)) + continue; + + TypeInfo = Builder.CreateBitCast(TypeInfo, Type::getInt8PtrTy(Context)); + + // Call get eh type id. + Value *TypeID = Builder.CreateCall(TypeIDIntr, TypeInfo, "typeid"); + + if (!Filter) + Filter = Builder.CreateLoad(getExceptionFilter(RegionNo)); + + // Compare with the exception selector. + Value *Compare = Builder.CreateICmpEQ(Filter, TypeID); + + Cond = Cond ? Builder.CreateOr(Cond, Compare) : Compare; + } + + if (Cond) { + BasicBlock *NoMatchBB = BasicBlock::Create(Context); + Builder.CreateCondBr(Cond, Dest, NoMatchBB); + BeginBlock(NoMatchBB); + } + } + break; + } +} + void TreeToLLVM::RenderGIMPLE_GOTO(gimple stmt) { tree dest = gimple_goto_dest(stmt); Modified: dragonegg/trunk/llvm-internal.h URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-internal.h?rev=100424&r1=100423&r2=100424&view=diff ============================================================================== --- dragonegg/trunk/llvm-internal.h (original) +++ dragonegg/trunk/llvm-internal.h Mon Apr 5 06:44:05 2010 @@ -424,7 +424,7 @@ //===---------------------- Exception Handling --------------------------===// - /// Invokes - The list of invoke instructions for a given EH region. + /// Invokes - The list of invoke instructions for a given landing pad. SmallVector, 16> Invokes; /// ExceptionPtrs - The local holding the exception pointer for a EH region. @@ -433,9 +433,6 @@ /// ExceptionFilters - The local holding the filter value for a EH region. SmallVector ExceptionFilters; - /// FuncEHGetTypeID - Function used to return type id for give typeinfo. - Function *FuncEHGetTypeID; - public: TreeToLLVM(tree_node *fndecl); ~TreeToLLVM(); @@ -557,10 +554,6 @@ private: // Helpers for exception handling. - /// CreateExceptionValues - Create values used internally by exception - /// handling. - void CreateExceptionValues(); - /// getLandingPad - Return the landing pad for the given exception handling /// region, creating it if necessary. BasicBlock *getLandingPad(unsigned RegionNo); @@ -590,6 +583,7 @@ void RenderGIMPLE_ASSIGN(gimple stmt); void RenderGIMPLE_CALL(gimple stmt); void RenderGIMPLE_COND(gimple stmt); + void RenderGIMPLE_EH_DISPATCH(gimple stmt); void RenderGIMPLE_GOTO(gimple stmt); void RenderGIMPLE_RESX(gimple stmt); void RenderGIMPLE_RETURN(gimple stmt); From baldrick at free.fr Mon Apr 5 07:07:45 2010 From: baldrick at free.fr (Duncan Sands) Date: Mon, 05 Apr 2010 12:07:45 -0000 Subject: [llvm-commits] [dragonegg] r100426 - /dragonegg/trunk/llvm-convert.cpp Message-ID: <20100405120745.E312A2A6C12C@llvm.org> Author: baldrick Date: Mon Apr 5 07:07:45 2010 New Revision: 100426 URL: http://llvm.org/viewvc/llvm-project?rev=100426&view=rev Log: Port commit 100303 (wangmp) from llvm-gcc, hopefully fixing the self-host build: Reapply patch for adding support for address spaces and added a isVolatile field to memcpy, memmove, and memset. Modified: dragonegg/trunk/llvm-convert.cpp Modified: dragonegg/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-convert.cpp?rev=100426&r1=100425&r2=100426&view=diff ============================================================================== --- dragonegg/trunk/llvm-convert.cpp (original) +++ dragonegg/trunk/llvm-convert.cpp Mon Apr 5 07:07:45 2010 @@ -1619,15 +1619,17 @@ unsigned Align) { const Type *SBP = Type::getInt8PtrTy(Context); const Type *IntPtr = TD.getIntPtrType(Context); - Value *Ops[4] = { + Value *Ops[5] = { Builder.CreateBitCast(DestPtr, SBP), Builder.CreateBitCast(SrcPtr, SBP), Builder.CreateIntCast(Size, IntPtr, /*isSigned*/true), - ConstantInt::get(Type::getInt32Ty(Context), Align) + ConstantInt::get(Type::getInt32Ty(Context), Align), + ConstantInt::get(Type::getInt1Ty(Context), false) }; + const Type *ArgTypes[3] = { SBP, SBP, IntPtr }; Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::memcpy, - &IntPtr, 1), Ops, Ops+4); + ArgTypes, 3), Ops, Ops+5); return Ops[0]; } @@ -1635,15 +1637,17 @@ unsigned Align) { const Type *SBP = Type::getInt8PtrTy(Context); const Type *IntPtr = TD.getIntPtrType(Context); - Value *Ops[4] = { + Value *Ops[5] = { Builder.CreateBitCast(DestPtr, SBP), Builder.CreateBitCast(SrcPtr, SBP), Builder.CreateIntCast(Size, IntPtr, /*isSigned*/true), - ConstantInt::get(Type::getInt32Ty(Context), Align) + ConstantInt::get(Type::getInt32Ty(Context), Align), + ConstantInt::get(Type::getInt1Ty(Context), false) }; + const Type *ArgTypes[3] = { SBP, SBP, IntPtr }; Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::memmove, - &IntPtr, 1), Ops, Ops+4); + ArgTypes, 3), Ops, Ops+5); return Ops[0]; } @@ -1651,15 +1655,17 @@ unsigned Align) { const Type *SBP = Type::getInt8PtrTy(Context); const Type *IntPtr = TD.getIntPtrType(Context); - Value *Ops[4] = { + Value *Ops[5] = { Builder.CreateBitCast(DestPtr, SBP), Builder.CreateIntCast(SrcVal, Type::getInt8Ty(Context), /*isSigned*/true), Builder.CreateIntCast(Size, IntPtr, /*isSigned*/true), - ConstantInt::get(Type::getInt32Ty(Context), Align) + ConstantInt::get(Type::getInt32Ty(Context), Align), + ConstantInt::get(Type::getInt1Ty(Context), false) }; + const Type *ArgTypes[2] = { SBP, IntPtr }; Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::memset, - &IntPtr, 1), Ops, Ops+4); + ArgTypes, 2), Ops, Ops+5); return Ops[0]; } From baldrick at free.fr Mon Apr 5 07:40:57 2010 From: baldrick at free.fr (Duncan Sands) Date: Mon, 05 Apr 2010 12:40:57 -0000 Subject: [llvm-commits] [dragonegg] r100427 - /dragonegg/trunk/gcc_revision_tested_with Message-ID: <20100405124057.64D912A6C12C@llvm.org> Author: baldrick Date: Mon Apr 5 07:40:57 2010 New Revision: 100427 URL: http://llvm.org/viewvc/llvm-project?rev=100427&view=rev Log: New tested gcc revision. Modified: dragonegg/trunk/gcc_revision_tested_with Modified: dragonegg/trunk/gcc_revision_tested_with URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/gcc_revision_tested_with?rev=100427&r1=100426&r2=100427&view=diff ============================================================================== --- dragonegg/trunk/gcc_revision_tested_with (original) +++ dragonegg/trunk/gcc_revision_tested_with Mon Apr 5 07:40:57 2010 @@ -1 +1 @@ -157849 +157940 From stoklund at 2pi.dk Mon Apr 5 09:42:05 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 5 Apr 2010 07:42:05 -0700 Subject: [llvm-commits] [llvm] r100233 - in /llvm/trunk: ./ include/llvm/Support/ lib/Target/ARM/ lib/Target/ARM/Disassembler/ test/MC/Disassembler/ utils/TableGen/ In-Reply-To: References: <20100402222738.E3E3D2A6C12C@llvm.org> <2582BF8C-8287-40FA-B090-C46C078D5973@apple.com> <0F76E611-D66F-4456-949D-9487D4E46D94@apple.com> <1270439298.1443.5.camel@aspire> <1270440180.1443.10.camel@aspire> <808FF74B-0D21-4F22-A064-53B27ABCFF14@apple.com> <6B4F5068-C32F-47F3-8E25-BFEF3E201CC3@2pi.dk> Message-ID: On Apr 4, 2010, at 10:10 PM, Chris Lattner wrote: > On Apr 4, 2010, at 9:30 PM, Jakob Stoklund Olesen wrote: >>>> If you left-shift a positive number until it goes negative, that's an >>>> overflow. At least that's how I understand it. >>> >>> You can shift left an N bit number between 0 and N-1 bits safely without overflow. >> >> Only an unsigned number. From the C++0X working draft: > > LLVM makes this assumption all over the place, just like it assumes a two's complement representation and that 'unsigned' is 32-bits. It is safe to rely on. Hmm. Those are implementation defined. That's fine to rely on. This is undefined behaviour. We shouldn't rely on that. Hey, we should add support for "shl nsw" to LLVM IR! Evaluating "shl nsw i32 %x, N" effectively asserts that the high N+1 bits of %x are 0. That would enable a number of optimizations and wonderfully educational surprises for users. From sabre at nondot.org Mon Apr 5 11:32:15 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 16:32:15 -0000 Subject: [llvm-commits] [llvm] r100434 - /llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Message-ID: <20100405163215.2AADF2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 11:32:14 2010 New Revision: 100434 URL: http://llvm.org/viewvc/llvm-project?rev=100434&view=rev Log: fix a couple problems I introduced handling symbols with spaces in them. Sym->getName() != OS << *Sym Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=100434&r1=100433&r2=100434&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Mon Apr 5 11:32:14 2010 @@ -237,7 +237,7 @@ if (ACPV->isLSDA()) { O << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); } else if (ACPV->isBlockAddress()) { - O << GetBlockAddressSymbol(ACPV->getBlockAddress())->getName(); + O << *GetBlockAddressSymbol(ACPV->getBlockAddress()); } else if (ACPV->isGlobalValue()) { GlobalValue *GV = ACPV->getGV(); bool isIndirect = Subtarget->isTargetDarwin() && @@ -281,10 +281,16 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { OutStreamer.EmitRawText(StringRef("\t.code\t16")); - if (Subtarget->isTargetDarwin()) - OutStreamer.EmitRawText("\t.thumb_func\t"+Twine(CurrentFnSym->getName())); - else + if (!Subtarget->isTargetDarwin()) OutStreamer.EmitRawText(StringRef("\t.thumb_func")); + else { + // This needs to emit to a temporary string to get properly quoted + // MCSymbols when they have spaces in them. + SmallString<128> Tmp; + raw_svector_ostream OS(Tmp); + OS << "\t.thumb_func\t" << *CurrentFnSym; + OutStreamer.EmitRawText(OS.str()); + } } OutStreamer.EmitLabel(CurrentFnSym); From resistor at mac.com Mon Apr 5 12:50:20 2010 From: resistor at mac.com (Owen Anderson) Date: Mon, 05 Apr 2010 17:50:20 -0000 Subject: [llvm-commits] [llvm] r100438 - in /llvm/trunk/lib/Support: regengine.inc regexec.c Message-ID: <20100405175020.448202A6C12C@llvm.org> Author: resistor Date: Mon Apr 5 12:50:20 2010 New Revision: 100438 URL: http://llvm.org/viewvc/llvm-project?rev=100438&view=rev Log: Push const through the regex engine. Fixes some of the warnings in PR6616. Modified: llvm/trunk/lib/Support/regengine.inc llvm/trunk/lib/Support/regexec.c Modified: llvm/trunk/lib/Support/regengine.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/regengine.inc?rev=100438&r1=100437&r2=100438&view=diff ============================================================================== --- llvm/trunk/lib/Support/regengine.inc (original) +++ llvm/trunk/lib/Support/regengine.inc Mon Apr 5 12:50:20 2010 @@ -72,11 +72,11 @@ struct re_guts *g; int eflags; llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ - char *offp; /* offsets work from here */ - char *beginp; /* start of string -- virtual NUL precedes */ - char *endp; /* end of string -- virtual NUL here */ - char *coldp; /* can be no match starting before here */ - char **lastpos; /* [nplus+1] */ + const char *offp; /* offsets work from here */ + const char *beginp; /* start of string -- virtual NUL precedes */ + const char *endp; /* end of string -- virtual NUL here */ + const char *coldp; /* can be no match starting before here */ + const char **lastpos; /* [nplus+1] */ STATEVARS; states st; /* current states */ states fresh; /* states for a fresh start */ @@ -84,11 +84,14 @@ states empty; /* empty set of states */ }; -static int matcher(struct re_guts *, char *, size_t, llvm_regmatch_t[], int); -static char *dissect(struct match *, char *, char *, sopno, sopno); -static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int); -static char *fast(struct match *, char *, char *, sopno, sopno); -static char *slow(struct match *, char *, char *, sopno, sopno); +static int matcher(struct re_guts *, const char *, size_t, + llvm_regmatch_t[], int); +static const char *dissect(struct match *, const char *, const char *, sopno, + sopno); +static const char *backref(struct match *, const char *, const char *, sopno, + sopno, sopno, int); +static const char *fast(struct match *, const char *, const char *, sopno, sopno); +static const char *slow(struct match *, const char *, const char *, sopno, sopno); static states step(struct re_guts *, sopno, sopno, states, int, states); #define MAX_RECURSION 100 #define BOL (OUT+1) @@ -125,18 +128,19 @@ - matcher - the actual matching engine */ static int /* 0 success, REG_NOMATCH failure */ -matcher(struct re_guts *g, char *string, size_t nmatch, llvm_regmatch_t pmatch[], +matcher(struct re_guts *g, const char *string, size_t nmatch, + llvm_regmatch_t pmatch[], int eflags) { - char *endp; + const char *endp; size_t i; struct match mv; struct match *m = &mv; - char *dp; + const char *dp; const sopno gf = g->firststate+1; /* +1 for OEND */ const sopno gl = g->laststate; - char *start; - char *stop; + const char *start; + const char *stop; /* simplify the situation where possible */ if (g->cflags®_NOSUB) @@ -216,7 +220,7 @@ dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (char **)malloc((g->nplus+1) * + m->lastpos = (const char **)malloc((g->nplus+1) * sizeof(char *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); @@ -287,21 +291,22 @@ /* - dissect - figure out what matched what, no back references */ -static char * /* == stop (success) always */ -dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +static const char * /* == stop (success) always */ +dissect(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) { int i; sopno ss; /* start sop of current subRE */ sopno es; /* end sop of current subRE */ - char *sp; /* start of string matched by it */ - char *stp; /* string matched by it cannot pass here */ - char *rest; /* start of rest of string */ - char *tail; /* string unmatched by rest of RE */ + const char *sp; /* start of string matched by it */ + const char *stp; /* string matched by it cannot pass here */ + const char *rest; /* start of rest of string */ + const char *tail; /* string unmatched by rest of RE */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ - char *ssp; /* start of string matched by subsubRE */ - char *sep; /* end of string matched by subsubRE */ - char *oldssp; /* previous ssp */ + const char *ssp; /* start of string matched by subsubRE */ + const char *sep; /* end of string matched by subsubRE */ + const char *oldssp; /* previous ssp */ AT("diss", start, stop, startst, stopst); sp = start; @@ -360,7 +365,7 @@ esub = es - 1; /* did innards match? */ if (slow(m, sp, rest, ssub, esub) != NULL) { - char *dp = dissect(m, sp, rest, ssub, esub); + const char *dp = dissect(m, sp, rest, ssub, esub); (void)dp; /* avoid warning if assertions off */ assert(dp == rest); } else /* no */ @@ -400,7 +405,7 @@ assert(sep == rest); /* must exhaust substring */ assert(slow(m, ssp, sep, ssub, esub) == rest); { - char *dp = dissect(m, ssp, sep, ssub, esub); + const char *dp = dissect(m, ssp, sep, ssub, esub); (void)dp; /* avoid warning if assertions off */ assert(dp == sep); } @@ -438,7 +443,7 @@ assert(OP(m->g->strip[esub]) == O_CH); } { - char *dp = dissect(m, sp, rest, ssub, esub); + const char *dp = dissect(m, sp, rest, ssub, esub); (void)dp; /* avoid warning if assertions off */ assert(dp == rest); } @@ -474,17 +479,17 @@ /* - backref - figure out what matched what, figuring in back references */ -static char * /* == stop (success) or NULL (failure) */ -backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, - sopno lev, int rec) /* PLUS nesting level */ +static const char * /* == stop (success) or NULL (failure) */ +backref(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst, sopno lev, int rec) /* PLUS nesting level */ { int i; sopno ss; /* start sop of current subRE */ - char *sp; /* start of string matched by it */ + const char *sp; /* start of string matched by it */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ - char *ssp; /* start of string matched by subsubRE */ - char *dp; + const char *ssp; /* start of string matched by subsubRE */ + const char *dp; size_t len; int hard; sop s; @@ -674,18 +679,19 @@ /* - fast - step through the string at top speed */ -static char * /* where tentative match ended, or NULL */ -fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +static const char * /* where tentative match ended, or NULL */ +fast(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) { states st = m->st; states fresh = m->fresh; states tmp = m->tmp; - char *p = start; + const char *p = start; int c = (start == m->beginp) ? OUT : *(start-1); int lastc; /* previous c */ int flagch; int i; - char *coldp; /* last p after which no match was underway */ + const char *coldp; /* last p after which no match was underway */ CLEAR(st); SET1(st, startst); @@ -758,18 +764,19 @@ /* - slow - step through the string more deliberately */ -static char * /* where it ended */ -slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +static const char * /* where it ended */ +slow(struct match *m, const char *start, const char *stop, sopno startst, + sopno stopst) { states st = m->st; states empty = m->empty; states tmp = m->tmp; - char *p = start; + const char *p = start; int c = (start == m->beginp) ? OUT : *(start-1); int lastc; /* previous c */ int flagch; int i; - char *matchp; /* last p at which a match ended */ + const char *matchp; /* last p at which a match ended */ AT("slow", start, stop, startst, stopst); CLEAR(st); Modified: llvm/trunk/lib/Support/regexec.c URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/regexec.c?rev=100438&r1=100437&r2=100438&view=diff ============================================================================== --- llvm/trunk/lib/Support/regexec.c (original) +++ llvm/trunk/lib/Support/regexec.c Mon Apr 5 12:50:20 2010 @@ -155,7 +155,7 @@ eflags = GOODFLAGS(eflags); if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE)) - return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); + return(smatcher(g, string, nmatch, pmatch, eflags)); else - return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); + return(lmatcher(g, string, nmatch, pmatch, eflags)); } From sabre at nondot.org Mon Apr 5 12:52:32 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 17:52:32 -0000 Subject: [llvm-commits] [llvm] r100439 - /llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Message-ID: <20100405175232.10ECF2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 12:52:31 2010 New Revision: 100439 URL: http://llvm.org/viewvc/llvm-project?rev=100439&view=rev Log: don't use emitlabel in the arm asm printer yet, the order isn't well specified. ARM really needs to have its instprinter finished at some point. Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=100439&r1=100438&r2=100439&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Mon Apr 5 12:52:31 2010 @@ -935,7 +935,9 @@ unsigned JTI = MO1.getIndex(); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); - OutStreamer.EmitLabel(JTISymbol); + // Can't use EmitLabel until instprinter happens, label comes out in the wrong + // order. + O << *JTISymbol << ":\n"; const char *JTEntryDirective = MAI->getData32bitsDirective(); @@ -974,7 +976,10 @@ unsigned JTI = MO1.getIndex(); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); - OutStreamer.EmitLabel(JTISymbol); + + // Can't use EmitLabel until instprinter happens, label comes out in the wrong + // order. + O << *JTISymbol << ":\n"; const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector &JT = MJTI->getJumpTables(); From dpatel at apple.com Mon Apr 5 12:57:28 2010 From: dpatel at apple.com (Devang Patel) Date: Mon, 5 Apr 2010 10:57:28 -0700 Subject: [llvm-commits] [llvm] r100315 - in /llvm/trunk/lib/Target/X86/AsmPrinter: X86AsmPrinter.h X86MCInstLower.cpp In-Reply-To: <20100404053820.29FD12A6C12C@llvm.org> References: <20100404053820.29FD12A6C12C@llvm.org> Message-ID: <6646DF19-CA78-4DD9-BEF1-18F64B5D48A8@apple.com> On Apr 3, 2010, at 10:38 PM, Chris Lattner wrote: > Author: lattner > Date: Sun Apr 4 00:38:19 2010 > New Revision: 100315 > > URL: http://llvm.org/viewvc/llvm-project?rev=100315&view=rev > Log: > split DEBUG_VALUE printing stuff out to its own method. > Ideally, there should be a target independent way to print DBG_VALUE machine instructions as comments. - Devang From dalej at apple.com Mon Apr 5 12:59:16 2010 From: dalej at apple.com (Dale Johannesen) Date: Mon, 5 Apr 2010 10:59:16 -0700 Subject: [llvm-commits] [llvm] r100315 - in /llvm/trunk/lib/Target/X86/AsmPrinter: X86AsmPrinter.h X86MCInstLower.cpp In-Reply-To: <6646DF19-CA78-4DD9-BEF1-18F64B5D48A8@apple.com> References: <20100404053820.29FD12A6C12C@llvm.org> <6646DF19-CA78-4DD9-BEF1-18F64B5D48A8@apple.com> Message-ID: <28331BDC-9808-488A-861B-3ADA671D18DA@apple.com> On Apr 5, 2010, at 10:57 AMPDT, Devang Patel wrote: > > On Apr 3, 2010, at 10:38 PM, Chris Lattner wrote: > >> Author: lattner >> Date: Sun Apr 4 00:38:19 2010 >> New Revision: 100315 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=100315&view=rev >> Log: >> split DEBUG_VALUE printing stuff out to its own method. >> > > Ideally, there should be a target independent way to print DBG_VALUE machine instructions as comments. Do we actually want this long term? I've been regarding the comments as a hack that will go away when the Dwarf writer knows how to deal with dbg_value. From echristo at apple.com Mon Apr 5 13:01:41 2010 From: echristo at apple.com (Eric Christopher) Date: Mon, 5 Apr 2010 11:01:41 -0700 Subject: [llvm-commits] [llvm] r100315 - in /llvm/trunk/lib/Target/X86/AsmPrinter: X86AsmPrinter.h X86MCInstLower.cpp In-Reply-To: <28331BDC-9808-488A-861B-3ADA671D18DA@apple.com> References: <20100404053820.29FD12A6C12C@llvm.org> <6646DF19-CA78-4DD9-BEF1-18F64B5D48A8@apple.com> <28331BDC-9808-488A-861B-3ADA671D18DA@apple.com> Message-ID: <40258BCD-159E-401E-9567-47AA07F24F57@apple.com> On Apr 5, 2010, at 10:59 AM, Dale Johannesen wrote: > > On Apr 5, 2010, at 10:57 AMPDT, Devang Patel wrote: > >> >> On Apr 3, 2010, at 10:38 PM, Chris Lattner wrote: >> >>> Author: lattner >>> Date: Sun Apr 4 00:38:19 2010 >>> New Revision: 100315 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=100315&view=rev >>> Log: >>> split DEBUG_VALUE printing stuff out to its own method. >>> >> >> Ideally, there should be a target independent way to print DBG_VALUE machine instructions as comments. > > Do we actually want this long term? I've been regarding the comments as a hack that will go away when the Dwarf writer knows how to deal with dbg_value. > Useful for debugging no? -eric From dpatel at apple.com Mon Apr 5 13:09:47 2010 From: dpatel at apple.com (Devang Patel) Date: Mon, 5 Apr 2010 11:09:47 -0700 Subject: [llvm-commits] [llvm] r100315 - in /llvm/trunk/lib/Target/X86/AsmPrinter: X86AsmPrinter.h X86MCInstLower.cpp In-Reply-To: <28331BDC-9808-488A-861B-3ADA671D18DA@apple.com> References: <20100404053820.29FD12A6C12C@llvm.org> <6646DF19-CA78-4DD9-BEF1-18F64B5D48A8@apple.com> <28331BDC-9808-488A-861B-3ADA671D18DA@apple.com> Message-ID: <06FFAA59-43BC-4C69-87BE-6327206D092B@apple.com> On Apr 5, 2010, at 10:59 AM, Dale Johannesen wrote: > > On Apr 5, 2010, at 10:57 AMPDT, Devang Patel wrote: > >> >> On Apr 3, 2010, at 10:38 PM, Chris Lattner wrote: >> >>> Author: lattner >>> Date: Sun Apr 4 00:38:19 2010 >>> New Revision: 100315 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=100315&view=rev >>> Log: >>> split DEBUG_VALUE printing stuff out to its own method. >>> >> >> Ideally, there should be a target independent way to print DBG_VALUE machine instructions as comments. > > Do we actually want this long term? I've been regarding the comments as a hack that will go away when the Dwarf writer knows how to deal with dbg_value. At the moment, the DwarfDebug does not remove them from the instruction stream. The nice thing about printing comment is that it is only printed when -verbose-asm is used. - Devang From tonic at nondot.org Mon Apr 5 13:35:37 2010 From: tonic at nondot.org (Tanya Lattner) Date: Mon, 05 Apr 2010 18:35:37 -0000 Subject: [llvm-commits] [llvm] r100447 - /llvm/trunk/docs/HowToReleaseLLVM.html Message-ID: <20100405183537.830BE2A6C12C@llvm.org> Author: tbrethou Date: Mon Apr 5 13:35:37 2010 New Revision: 100447 URL: http://llvm.org/viewvc/llvm-project?rev=100447&view=rev Log: Update to note that clang has a baseline now. Dropping x86-32 for MacOS. Modified: llvm/trunk/docs/HowToReleaseLLVM.html Modified: llvm/trunk/docs/HowToReleaseLLVM.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/HowToReleaseLLVM.html?rev=100447&r1=100446&r2=100447&view=diff ============================================================================== --- llvm/trunk/docs/HowToReleaseLLVM.html (original) +++ llvm/trunk/docs/HowToReleaseLLVM.html Mon Apr 5 13:35:37 2010 @@ -362,13 +362,12 @@

- - - + + - - - + + +
ArchitectureOSllvm-gcc baselineclang baseline tests
x86-32Mac OS 10.5last releasenonellvm dejagnu, clang tests, test-suite (including spec)
x86-32Linuxlast releasenonellvm dejagnu, clang tests, test-suite (including spec)
x86-32FreeBSDnonenonellvm dejagnu, clang tests, test-suite
x86-32Linuxlast releaselast releasellvm dejagnu, clang tests, test-suite (including spec)
x86-32FreeBSDnonelast releasellvm dejagnu, clang tests, test-suite
x86-32mingwlast releasenoneQT
x86-64Mac OS 10.5last releasenonellvm dejagnu, clang tests, test-suite (including spec)
x86-64Linuxlast releasenonellvm dejagnu, clang tests, test-suite (including spec)
x86-64FreeBSDnonenonellvm dejagnu, clang tests, test-suite
x86-64Mac OS 10.Xlast releaselast releasellvm dejagnu, clang tests, test-suite (including spec)
x86-64Linuxlast releaselast releasellvm dejagnu, clang tests, test-suite (including spec)
x86-64FreeBSDnonelast releasellvm dejagnu, clang tests, test-suite

From gohman at apple.com Mon Apr 5 14:16:38 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Apr 2010 19:16:38 -0000 Subject: [llvm-commits] [llvm] r100454 - /llvm/trunk/include/llvm/CodeGen/MachineDominators.h Message-ID: <20100405191638.A2ED32A6C12C@llvm.org> Author: djg Date: Mon Apr 5 14:16:38 2010 New Revision: 100454 URL: http://llvm.org/viewvc/llvm-project?rev=100454&view=rev Log: Add an isReachableFromEntry to MachineDominators, following the one in Dominators. Modified: llvm/trunk/include/llvm/CodeGen/MachineDominators.h Modified: llvm/trunk/include/llvm/CodeGen/MachineDominators.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineDominators.h?rev=100454&r1=100453&r2=100454&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineDominators.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineDominators.h Mon Apr 5 14:16:38 2010 @@ -156,8 +156,13 @@ inline void splitBlock(MachineBasicBlock* NewBB) { DT->splitBlock(NewBB); } - - + + /// isReachableFromEntry - Return true if A is dominated by the entry + /// block of the function containing it. + bool isReachableFromEntry(MachineBasicBlock *A) { + return DT->isReachableFromEntry(A); + } + virtual void releaseMemory(); virtual void print(raw_ostream &OS, const Module*) const; From gohman at apple.com Mon Apr 5 14:17:22 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Apr 2010 19:17:22 -0000 Subject: [llvm-commits] [llvm] r100455 - in /llvm/trunk: lib/CodeGen/MachineSink.cpp test/CodeGen/X86/unreachable-loop-sinking.ll Message-ID: <20100405191722.426812A6C12C@llvm.org> Author: djg Date: Mon Apr 5 14:17:22 2010 New Revision: 100455 URL: http://llvm.org/viewvc/llvm-project?rev=100455&view=rev Log: Don't do code sinking on unreachable blocks. It's unprofitable and hazardous. Added: llvm/trunk/test/CodeGen/X86/unreachable-loop-sinking.ll Modified: llvm/trunk/lib/CodeGen/MachineSink.cpp Modified: llvm/trunk/lib/CodeGen/MachineSink.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSink.cpp?rev=100455&r1=100454&r2=100455&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineSink.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineSink.cpp Mon Apr 5 14:17:22 2010 @@ -126,6 +126,11 @@ // Can't sink anything out of a block that has less than two successors. if (MBB.succ_size() <= 1 || MBB.empty()) return false; + // Don't bother sinking code out of unreachable blocks. In addition to being + // unprofitable, it can also lead to infinite looping, because in an unreachable + // loop there may be nowhere to stop. + if (!DT->isReachableFromEntry(&MBB)) return false; + bool MadeChange = false; // Walk the basic block bottom-up. Remember if we saw a store. Added: llvm/trunk/test/CodeGen/X86/unreachable-loop-sinking.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unreachable-loop-sinking.ll?rev=100455&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/unreachable-loop-sinking.ll (added) +++ llvm/trunk/test/CodeGen/X86/unreachable-loop-sinking.ll Mon Apr 5 14:17:22 2010 @@ -0,0 +1,30 @@ +; RUN: llc < %s +; PR6777 + +; MachineSink shouldn't try to sink code in unreachable blocks, as it's +; not worthwhile, and there are corner cases which it doesn't handle. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define double @fn1(i8* %arg, i64 %arg1) { +Entry: + br i1 undef, label %Body, label %Exit + +Exit: ; preds = %Brancher7, %Entry + ret double undef + +Body: ; preds = %Entry + br i1 false, label %Brancher7, label %Body3 + +Body3: ; preds = %Body6, %Body3, %Body + br label %Body3 + +Body6: ; preds = %Brancher7 + %tmp = fcmp oeq double 0xC04FBB2E40000000, undef ; [#uses=1] + br i1 %tmp, label %Body3, label %Brancher7 + +Brancher7: ; preds = %Body6, %Body + %tmp2 = icmp ult i32 undef, 10 ; [#uses=1] + br i1 %tmp2, label %Body6, label %Exit +} From stoklund at 2pi.dk Mon Apr 5 14:55:33 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 5 Apr 2010 12:55:33 -0700 Subject: [llvm-commits] [llvm] r100455 - in /llvm/trunk: lib/CodeGen/MachineSink.cpp test/CodeGen/X86/unreachable-loop-sinking.ll In-Reply-To: <20100405191722.426812A6C12C@llvm.org> References: <20100405191722.426812A6C12C@llvm.org> Message-ID: <310C7CE4-425F-4854-BA1C-1B90E00EFDAB@2pi.dk> On Apr 5, 2010, at 12:17 PM, Dan Gohman wrote: > Author: djg > Date: Mon Apr 5 14:17:22 2010 > New Revision: 100455 > > URL: http://llvm.org/viewvc/llvm-project?rev=100455&view=rev > Log: > Don't do code sinking on unreachable blocks. It's unprofitable and hazardous. Should we be running UnreachableBlockElim earlier? /jakob From clattner at apple.com Mon Apr 5 15:09:13 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Apr 2010 13:09:13 -0700 Subject: [llvm-commits] [llvm] r100315 - in /llvm/trunk/lib/Target/X86/AsmPrinter: X86AsmPrinter.h X86MCInstLower.cpp In-Reply-To: <06FFAA59-43BC-4C69-87BE-6327206D092B@apple.com> References: <20100404053820.29FD12A6C12C@llvm.org> <6646DF19-CA78-4DD9-BEF1-18F64B5D48A8@apple.com> <28331BDC-9808-488A-861B-3ADA671D18DA@apple.com> <06FFAA59-43BC-4C69-87BE-6327206D092B@apple.com> Message-ID: <2C802F4A-2C17-44D6-A9A1-1EAEDA700714@apple.com> On Apr 5, 2010, at 11:09 AM, Devang Patel wrote: >>> >>> Ideally, there should be a target independent way to print DBG_VALUE machine instructions as comments. >> >> Do we actually want this long term? I've been regarding the comments as a hack that will go away when the Dwarf writer knows how to deal with dbg_value. > > At the moment, the DwarfDebug does not remove them from the instruction stream. The nice thing about printing comment is that it is only printed when -verbose-asm is used. I completely agree devang, they should be printed as comments (by target independent code) when verbose asm is enabled. I pointed this out a long time ago. -Chris From clattner at apple.com Mon Apr 5 15:09:55 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Apr 2010 13:09:55 -0700 Subject: [llvm-commits] [llvm] r100233 - in /llvm/trunk: ./ include/llvm/Support/ lib/Target/ARM/ lib/Target/ARM/Disassembler/ test/MC/Disassembler/ utils/TableGen/ In-Reply-To: References: <20100402222738.E3E3D2A6C12C@llvm.org> <2582BF8C-8287-40FA-B090-C46C078D5973@apple.com> <0F76E611-D66F-4456-949D-9487D4E46D94@apple.com> <1270439298.1443.5.camel@aspire> <1270440180.1443.10.camel@aspire> <808FF74B-0D21-4F22-A064-53B27ABCFF14@apple.com> <6B4F5068-C32F-47F3-8E25-BFEF3E201CC3@2pi.dk> Message-ID: On Apr 5, 2010, at 7:42 AM, Jakob Stoklund Olesen wrote: >> LLVM makes this assumption all over the place, just like it assumes a two's complement representation and that 'unsigned' is 32-bits. It is safe to rely on. > > Hmm. Those are implementation defined. That's fine to rely on. This is undefined behaviour. We shouldn't rely on that. > > Hey, we should add support for "shl nsw" to LLVM IR! Evaluating "shl nsw i32 %x, N" effectively asserts that the high N+1 bits of %x are 0. That would enable a number of optimizations and wonderfully educational surprises for users. Implementations are free to define undefined behavior. If you want to use an unsigned shift left to be really careful, go for it. -Chris From sabre at nondot.org Mon Apr 5 15:11:45 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 20:11:45 -0000 Subject: [llvm-commits] [llvm] r100457 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <20100405201145.B92332A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 15:11:45 2010 New Revision: 100457 URL: http://llvm.org/viewvc/llvm-project?rev=100457&view=rev Log: lowering a volatile llvm.memcpy to a libc memcpy is ok. PR6779 Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100457&r1=100456&r2=100457&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Apr 5 15:11:45 2010 @@ -3519,7 +3519,6 @@ } // Emit a library call. - assert(!isVol && "library memcpy does not support volatile"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); From gohman at apple.com Mon Apr 5 15:24:08 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Apr 2010 20:24:08 -0000 Subject: [llvm-commits] [llvm] r100459 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <20100405202408.56E612A6C12C@llvm.org> Author: djg Date: Mon Apr 5 15:24:08 2010 New Revision: 100459 URL: http://llvm.org/viewvc/llvm-project?rev=100459&view=rev Log: Add a comment. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100459&r1=100458&r2=100459&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Apr 5 15:24:08 2010 @@ -3518,6 +3518,12 @@ true, DstSV, DstSVOff, SrcSV, SrcSVOff); } + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc + // memcpy is not guaranteed to be safe. libc memcpys aren't required to + // respect volatile, so they may do things like read or write memory + // beyond the given memory regions. But fixing this isn't easy, and most + // people don't care. + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; From dalej at apple.com Mon Apr 5 15:45:35 2010 From: dalej at apple.com (Dale Johannesen) Date: Mon, 5 Apr 2010 13:45:35 -0700 Subject: [llvm-commits] [llvm] r100315 - in /llvm/trunk/lib/Target/X86/AsmPrinter: X86AsmPrinter.h X86MCInstLower.cpp In-Reply-To: <2C802F4A-2C17-44D6-A9A1-1EAEDA700714@apple.com> References: <20100404053820.29FD12A6C12C@llvm.org> <6646DF19-CA78-4DD9-BEF1-18F64B5D48A8@apple.com> <28331BDC-9808-488A-861B-3ADA671D18DA@apple.com> <06FFAA59-43BC-4C69-87BE-6327206D092B@apple.com> <2C802F4A-2C17-44D6-A9A1-1EAEDA700714@apple.com> Message-ID: <9A0249ED-E07B-4E8C-BDFE-20B88087F37A@apple.com> On Apr 5, 2010, at 1:09 PMPDT, Chris Lattner wrote: > > On Apr 5, 2010, at 11:09 AM, Devang Patel wrote: > >>>> >>>> Ideally, there should be a target independent way to print DBG_VALUE machine instructions as comments. >>> >>> Do we actually want this long term? I've been regarding the comments as a hack that will go away when the Dwarf writer knows how to deal with dbg_value. >> >> At the moment, the DwarfDebug does not remove them from the instruction stream. The nice thing about printing comment is that it is only printed when -verbose-asm is used. > > I completely agree devang, they should be printed as comments (by target independent code) when verbose asm is enabled. I pointed this out a long time ago. Lowering dbg_declare to DBG_VALUE produces a target-dependent form, so that won't entirely work. Devang has decided we don't need to do this for now, though; what is currently used can be done target-independent. The consensus seems to be to keep the comments, so I'll move the code up. From gohman at apple.com Mon Apr 5 16:04:18 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 5 Apr 2010 14:04:18 -0700 Subject: [llvm-commits] [llvm] r100455 - in /llvm/trunk: lib/CodeGen/MachineSink.cpp test/CodeGen/X86/unreachable-loop-sinking.ll In-Reply-To: <310C7CE4-425F-4854-BA1C-1B90E00EFDAB@2pi.dk> References: <20100405191722.426812A6C12C@llvm.org> <310C7CE4-425F-4854-BA1C-1B90E00EFDAB@2pi.dk> Message-ID: <9A19FA30-B343-4669-9662-D6402AEFC0F3@apple.com> On Apr 5, 2010, at 12:55 PM, Jakob Stoklund Olesen wrote: > > On Apr 5, 2010, at 12:17 PM, Dan Gohman wrote: > >> Author: djg >> Date: Mon Apr 5 14:17:22 2010 >> New Revision: 100455 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=100455&view=rev >> Log: >> Don't do code sinking on unreachable blocks. It's unprofitable and hazardous. > > Should we be running UnreachableBlockElim earlier? It's nice to avoid dependencies like that when convenient. If someone had an alternative liveness analysis which tolerated unreachable blocks, then they wouldn't require UnreachableBlockElim at all. But it's not critical. Dan From jyasskin at google.com Mon Apr 5 16:09:12 2010 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 05 Apr 2010 21:09:12 -0000 Subject: [llvm-commits] [llvm] r100461 - /llvm/trunk/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp Message-ID: <20100405210912.7AAC62A6C12C@llvm.org> Author: jyasskin Date: Mon Apr 5 16:09:12 2010 New Revision: 100461 URL: http://llvm.org/viewvc/llvm-project?rev=100461&view=rev Log: Fix OProfileJITEventListener build for new DebugLoc. Modified: llvm/trunk/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp Modified: llvm/trunk/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp?rev=100461&r1=100460&r2=100461&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp Mon Apr 5 16:09:12 2010 @@ -19,6 +19,7 @@ #define DEBUG_TYPE "oprofile-jit-event-listener" #include "llvm/Function.h" #include "llvm/Metadata.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/ExecutionEngine/JITEventListener.h" @@ -77,10 +78,10 @@ DenseMap, std::string> Filenames; public: - const char *getFilename(DIScope Scope) { - std::string &Filename = Filenames[Scope.getNode()]; + const char *getFilename(MDNode *Scope) { + std::string &Filename = Filenames[Scope]; if (Filename.empty()) { - Filename = Scope.getFilename(); + Filename = DIScope(Scope).getFilename(); } return Filename.c_str(); } @@ -91,9 +92,9 @@ uintptr_t Address, DebugLoc Loc) { debug_line_info Result; Result.vma = Address; - DILocation DILoc = MF.getDILocation(Loc); - Result.lineno = DILoc.getLineNumber(); - Result.filename = Filenames.getFilename(DILoc.getScope()); + Result.lineno = Loc.getLine(); + Result.filename = Filenames.getFilename( + Loc.getScope(MF.getFunction()->getContext())); DEBUG(dbgs() << "Mapping " << reinterpret_cast(Result.vma) << " to " << Result.filename << ":" << Result.lineno << "\n"); return Result; From dgregor at apple.com Mon Apr 5 16:09:22 2010 From: dgregor at apple.com (Douglas Gregor) Date: Mon, 05 Apr 2010 21:09:22 -0000 Subject: [llvm-commits] [llvm] r100463 - /llvm/trunk/include/llvm/ADT/OwningPtr.h Message-ID: <20100405210922.ADA872A6C12C@llvm.org> Author: dgregor Date: Mon Apr 5 16:09:22 2010 New Revision: 100463 URL: http://llvm.org/viewvc/llvm-project?rev=100463&view=rev Log: Introduce MaybeOwningPtr, a smart pointer that may (or may not) have ownership over the pointer it contains. Useful when we want to communicate ownership while still having several clients holding on to the same pointer *without* introducing reference counting. Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/OwningPtr.h?rev=100463&r1=100462&r2=100463&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/OwningPtr.h (original) +++ llvm/trunk/include/llvm/ADT/OwningPtr.h Mon Apr 5 16:09:22 2010 @@ -128,7 +128,99 @@ a.swap(b); } +/// \brief A smart pointer that may own the object it points to. +/// +/// An instance of \c MaybeOwningPtr may own the object it points to. If so, +/// it will guarantee that the object will be deleted either on destructin of +/// the OwningPtr or via an explicit reset(). Once created, ownership of the +/// pointee object can be taken away from OwningPtr by using the \c take() +/// method. +template +class MaybeOwningPtr { + T *Ptr; + bool Owned; + + struct MaybeOwningPtrRef { + MaybeOwningPtrRef(T *Ptr, bool &Owned) : Ptr(Ptr), Owned(Owned) { } + + T *Ptr; + bool &Owned; + }; + +public: + MaybeOwningPtr() : Ptr(0), Owned(false) { } + + explicit MaybeOwningPtr(T *P, bool OwnP) : Ptr(P), Owned(OwnP) {} + + /// \brief Take ownership of the pointer stored in \c Other. + MaybeOwningPtr(MaybeOwningPtr& Other) : Ptr(Other.Ptr), Owned(Other.Owned) { + Other.Owned = false; + } + MaybeOwningPtr(MaybeOwningPtrRef Other) : Ptr(Other.Ptr), Owned(Other.Owned) { + Other.Owned = false; + } + + /// \brief Take ownership of the ppinter stored in \c Other. + MaybeOwningPtr &operator=(MaybeOwningPtr &Other) { + reset(Other.Ptr, Other.Owned); + Other.Owned = false; + return *this; + } + + ~MaybeOwningPtr() { + if (Owned) + delete Ptr; + } + + operator MaybeOwningPtrRef() { return MaybeOwningPtrRef(Ptr, Owned); } + + /// reset - Change the current pointee to the specified pointer. Note that + /// calling this with any pointer (including a null pointer) deletes the + /// current pointer. + void reset(T *P, bool OwnP) { + assert(P != Ptr); + if (Owned) + delete Ptr; + + Ptr = P; + Owned = OwnP; + } + + /// take - Return the underlying pointer and take ownership of it. This + /// \c MaybeOwningPtr must have ownership before the call, and will + /// relinquish ownership as part of the call. + T *take() { + assert(Owned && "Cannot take ownership from a non-owning pointer"); + Owned = false; + return Ptr; + } + + T &operator*() const { + assert(Ptr && "Cannot dereference null pointer"); + return *Ptr; + } + + T *operator->() const { return Ptr; } + T *get() const { return Ptr; } + operator bool() const { return Ptr != 0; } + bool operator!() const { return Ptr == 0; } + + void swap(MaybeOwningPtr &RHS) { + T *Tmp = RHS.Ptr; + RHS.Ptr = Ptr; + Ptr = Tmp; + bool TmpOwned = RHS.Owned; + RHS.Owned = Owned; + Owned = TmpOwned; + } +}; + +template +inline void swap(MaybeOwningPtr &a, MaybeOwningPtr &b) { + a.swap(b); +} + } // end namespace llvm #endif From echristo at apple.com Mon Apr 5 16:14:32 2010 From: echristo at apple.com (Eric Christopher) Date: Mon, 05 Apr 2010 21:14:32 -0000 Subject: [llvm-commits] [llvm] r100466 - /llvm/trunk/lib/Target/X86/X86InstrFormats.td Message-ID: <20100405211432.DC6A72A6C12C@llvm.org> Author: echristo Date: Mon Apr 5 16:14:32 2010 New Revision: 100466 URL: http://llvm.org/viewvc/llvm-project?rev=100466&view=rev Log: Remove FIXME. Modified: llvm/trunk/lib/Target/X86/X86InstrFormats.td Modified: llvm/trunk/lib/Target/X86/X86InstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFormats.td?rev=100466&r1=100465&r2=100466&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFormats.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFormats.td Mon Apr 5 16:14:32 2010 @@ -327,8 +327,7 @@ // AES Instruction Templates: // // AES8I -// FIXME: Verify these, they appear to use the same encoding as the SSE4.2 T8 -// and TA encodings. +// These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I o, Format F, dag outs, dag ins, string asm, listpattern> : I, T8, From evan.cheng at apple.com Mon Apr 5 16:16:25 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 05 Apr 2010 21:16:25 -0000 Subject: [llvm-commits] [llvm] r100467 - in /llvm/trunk/lib/Transforms: Scalar/LoopUnswitch.cpp Utils/BasicBlockUtils.cpp Message-ID: <20100405211625.F14FF2A6C12C@llvm.org> Author: evancheng Date: Mon Apr 5 16:16:25 2010 New Revision: 100467 URL: http://llvm.org/viewvc/llvm-project?rev=100467&view=rev Log: Code clean up. Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=100467&r1=100466&r2=100467&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp Mon Apr 5 16:16:25 2010 @@ -619,15 +619,15 @@ NewBlocks.reserve(LoopBlocks.size()); DenseMap ValueMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { - BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); - NewBlocks.push_back(New); - ValueMap[LoopBlocks[i]] = New; // Keep the BB mapping. - LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L); + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); + NewBlocks.push_back(NewBB); + ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } // Splice the newly inserted blocks into the function right before the // original preheader. - F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(), + F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. @@ -652,8 +652,8 @@ // If the successor of the exit block had PHI nodes, add an entry for // NewExit. PHINode *PN; - for (BasicBlock::iterator I = ExitSucc->begin(); - (PN = dyn_cast(I)); ++I) { + for (BasicBlock::iterator I = ExitSucc->begin(); isa(I); ++I) { + PN = cast(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); DenseMap::iterator It = ValueMap.find(V); if (It != ValueMap.end()) V = It->second; @@ -682,7 +682,7 @@ // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. - RewriteLoopBodyWithConditionConstant(L , LIC, Val, false); + RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); // It's possible that simplifying one loop could cause the other to be // deleted. If so, don't simplify it. Modified: llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp?rev=100467&r1=100466&r2=100467&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/BasicBlockUtils.cpp Mon Apr 5 16:16:25 2010 @@ -336,21 +336,19 @@ if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, LI->getBase()); - if (DominatorTree *DT = P->getAnalysisIfAvailable()) - { - // Old dominates New. New node domiantes all other nodes dominated by Old. - DomTreeNode *OldNode = DT->getNode(Old); - std::vector Children; - for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) - Children.push_back(*I); - - DomTreeNode *NewNode = DT->addNewBlock(New,Old); + if (DominatorTree *DT = P->getAnalysisIfAvailable()) { + // Old dominates New. New node domiantes all other nodes dominated by Old. + DomTreeNode *OldNode = DT->getNode(Old); + std::vector Children; + for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); + I != E; ++I) + Children.push_back(*I); + DomTreeNode *NewNode = DT->addNewBlock(New,Old); for (std::vector::iterator I = Children.begin(), E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); - } + } if (DominanceFrontier *DF = P->getAnalysisIfAvailable()) DF->splitBlock(Old); From sabre at nondot.org Mon Apr 5 16:18:32 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 21:18:32 -0000 Subject: [llvm-commits] [llvm] r100468 - /llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <20100405211832.D732A2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 16:18:32 2010 New Revision: 100468 URL: http://llvm.org/viewvc/llvm-project?rev=100468&view=rev Log: some code cleanups, use SwitchInst::findCaseValue, reduce indentation Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=100468&r1=100467&r2=100468&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp Mon Apr 5 16:18:32 2010 @@ -231,8 +231,7 @@ // block that is branching on a loop-invariant condition, we can unswitch this // loop. for (Loop::block_iterator I = currentLoop->block_begin(), - E = currentLoop->block_end(); - I != E; ++I) { + E = currentLoop->block_end(); I != E; ++I) { TerminatorInst *TI = (*I)->getTerminator(); if (BranchInst *BI = dyn_cast(TI)) { // If this isn't branching on an invariant condition, we can't unswitch @@ -474,7 +473,6 @@ static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap &VM, LoopInfo *LI, LPPassManager *LPM) { Loop *New = new Loop(); - LPM->insertLoop(New, PL); // Add all of the blocks in L to the new loop. @@ -565,8 +563,7 @@ /// SplitExitEdges - Split all of the edges from inside the loop to their exit /// blocks. Update the appropriate Phi nodes as we do so. void LoopUnswitch::SplitExitEdges(Loop *L, - const SmallVector &ExitBlocks) -{ + const SmallVector &ExitBlocks){ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; @@ -884,65 +881,66 @@ U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); } - } else { - // Otherwise, we don't know the precise value of LIC, but we do know that it - // is certainly NOT "Val". As such, simplify any uses in the loop that we - // can. This case occurs when we unswitch switch statements. - for (unsigned i = 0, e = Users.size(); i != e; ++i) - if (Instruction *U = cast(Users[i])) { - if (!L->contains(U)) - continue; + SimplifyCode(Worklist, L); + return; + } + + // Otherwise, we don't know the precise value of LIC, but we do know that it + // is certainly NOT "Val". As such, simplify any uses in the loop that we + // can. This case occurs when we unswitch switch statements. + for (unsigned i = 0, e = Users.size(); i != e; ++i) { + Instruction *U = cast(Users[i]); + if (!L->contains(U)) + continue; - Worklist.push_back(U); + Worklist.push_back(U); - // If we know that LIC is not Val, use this info to simplify code. - if (SwitchInst *SI = dyn_cast(U)) { - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { - if (SI->getCaseValue(i) == Val) { - // Found a dead case value. Don't remove PHI nodes in the - // successor if they become single-entry, those PHI nodes may - // be in the Users list. - - // FIXME: This is a hack. We need to keep the successor around - // and hooked up so as to preserve the loop structure, because - // trying to update it is complicated. So instead we preserve the - // loop structure and put the block on a dead code path. - BasicBlock *Switch = SI->getParent(); - SplitEdge(Switch, SI->getSuccessor(i), this); - // Compute the successors instead of relying on the return value - // of SplitEdge, since it may have split the switch successor - // after PHI nodes. - BasicBlock *NewSISucc = SI->getSuccessor(i); - BasicBlock *OldSISucc = *succ_begin(NewSISucc); - // Create an "unreachable" destination. - BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", - Switch->getParent(), - OldSISucc); - new UnreachableInst(Context, Abort); - // Force the new case destination to branch to the "unreachable" - // block while maintaining a (dead) CFG edge to the old block. - NewSISucc->getTerminator()->eraseFromParent(); - BranchInst::Create(Abort, OldSISucc, - ConstantInt::getTrue(Context), NewSISucc); - // Release the PHI operands for this edge. - for (BasicBlock::iterator II = NewSISucc->begin(); - PHINode *PN = dyn_cast(II); ++II) - PN->setIncomingValue(PN->getBasicBlockIndex(Switch), - UndefValue::get(PN->getType())); - // Tell the domtree about the new block. We don't fully update the - // domtree here -- instead we force it to do a full recomputation - // after the pass is complete -- but we do need to inform it of - // new blocks. - if (DT) - DT->addNewBlock(Abort, NewSISucc); - break; - } - } - } + // TODO: We could do other simplifications, for example, turning + // 'icmp eq LIC, Val' -> false. + + // If we know that LIC is not Val, use this info to simplify code. + SwitchInst *SI = dyn_cast(U); + if (SI == 0 || !isa(Val)) continue; + + unsigned DeadCase = SI->findCaseValue(cast(Val)); + if (DeadCase == 0) continue; // Default case is live for multiple values. + + // Found a dead case value. Don't remove PHI nodes in the + // successor if they become single-entry, those PHI nodes may + // be in the Users list. - // TODO: We could do other simplifications, for example, turning - // LIC == Val -> false. - } + // FIXME: This is a hack. We need to keep the successor around + // and hooked up so as to preserve the loop structure, because + // trying to update it is complicated. So instead we preserve the + // loop structure and put the block on a dead code path. + BasicBlock *Switch = SI->getParent(); + SplitEdge(Switch, SI->getSuccessor(DeadCase), this); + // Compute the successors instead of relying on the return value + // of SplitEdge, since it may have split the switch successor + // after PHI nodes. + BasicBlock *NewSISucc = SI->getSuccessor(DeadCase); + BasicBlock *OldSISucc = *succ_begin(NewSISucc); + // Create an "unreachable" destination. + BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", + Switch->getParent(), + OldSISucc); + new UnreachableInst(Context, Abort); + // Force the new case destination to branch to the "unreachable" + // block while maintaining a (dead) CFG edge to the old block. + NewSISucc->getTerminator()->eraseFromParent(); + BranchInst::Create(Abort, OldSISucc, + ConstantInt::getTrue(Context), NewSISucc); + // Release the PHI operands for this edge. + for (BasicBlock::iterator II = NewSISucc->begin(); + PHINode *PN = dyn_cast(II); ++II) + PN->setIncomingValue(PN->getBasicBlockIndex(Switch), + UndefValue::get(PN->getType())); + // Tell the domtree about the new block. We don't fully update the + // domtree here -- instead we force it to do a full recomputation + // after the pass is complete -- but we do need to inform it of + // new blocks. + if (DT) + DT->addNewBlock(Abort, NewSISucc); } SimplifyCode(Worklist, L); @@ -1054,7 +1052,10 @@ LPM->deleteSimpleAnalysisValue(Succ, L); Succ->eraseFromParent(); ++NumSimplify; - } else if (ConstantInt *CB = dyn_cast(BI->getCondition())){ + break; + } + + if (ConstantInt *CB = dyn_cast(BI->getCondition())){ // Conditional branch. Turn it into an unconditional branch, then // remove dead blocks. break; // FIXME: Enable. From sabre at nondot.org Mon Apr 5 17:14:48 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 22:14:48 -0000 Subject: [llvm-commits] [llvm] r100478 - /llvm/trunk/lib/Transforms/Scalar/SCCP.cpp Message-ID: <20100405221448.5ACBA2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 17:14:48 2010 New Revision: 100478 URL: http://llvm.org/viewvc/llvm-project?rev=100478&view=rev Log: fix a really nasty bug that Evan was tracking in SCCP. When resolving undefs in branches/switches, we have two cases: a branch on a literal undef or a branch on a symbolic value which is undef. If we have a literal undef, the code was correct: forcing it to a constant is the right thing to do. If we have a branch on a symbolic value that is undef, we should force the symbolic value to a constant, which then makes the successor block live. Forcing the condition of the branch to being a constant isn't safe if later paths become live and the value becomes overdefined. This is the case that 'forcedconstant' is designed to handle, so just use it. This fixes rdar://7765019 but there is no good testcase for this, the one I have is too insane to be useful in the future. Modified: llvm/trunk/lib/Transforms/Scalar/SCCP.cpp Modified: llvm/trunk/lib/Transforms/Scalar/SCCP.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SCCP.cpp?rev=100478&r1=100477&r2=100478&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SCCP.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SCCP.cpp Mon Apr 5 17:14:48 2010 @@ -1521,45 +1521,48 @@ } } + // Check to see if we have a branch or switch on an undefined value. If so + // we force the branch to go one way or the other to make the successor + // values live. It doesn't really matter which way we force it. TerminatorInst *TI = BB->getTerminator(); if (BranchInst *BI = dyn_cast(TI)) { if (!BI->isConditional()) continue; if (!getValueState(BI->getCondition()).isUndefined()) continue; - } else if (SwitchInst *SI = dyn_cast(TI)) { + + // If the input to SCCP is actually branch on undef, fix the undef to + // false. + if (isa(BI->getCondition())) { + BI->setCondition(ConstantInt::getFalse(BI->getContext())); + markEdgeExecutable(BB, TI->getSuccessor(1)); + return true; + } + + // Otherwise, it is a branch on a symbolic value which is currently + // considered to be undef. Handle this by forcing the input value to the + // branch to false. + markForcedConstant(BI->getCondition(), + ConstantInt::getFalse(TI->getContext())); + return true; + } + + if (SwitchInst *SI = dyn_cast(TI)) { if (SI->getNumSuccessors() < 2) // no cases continue; if (!getValueState(SI->getCondition()).isUndefined()) continue; - } else { - continue; - } - - // If the edge to the second successor isn't thought to be feasible yet, - // mark it so now. We pick the second one so that this goes to some - // enumerated value in a switch instead of going to the default destination. - if (KnownFeasibleEdges.count(Edge(BB, TI->getSuccessor(1)))) - continue; - - // Otherwise, it isn't already thought to be feasible. Mark it as such now - // and return. This will make other blocks reachable, which will allow new - // values to be discovered and existing ones to be moved in the lattice. - markEdgeExecutable(BB, TI->getSuccessor(1)); - - // This must be a conditional branch of switch on undef. At this point, - // force the old terminator to branch to the first successor. This is - // required because we are now influencing the dataflow of the function with - // the assumption that this edge is taken. If we leave the branch condition - // as undef, then further analysis could think the undef went another way - // leading to an inconsistent set of conclusions. - if (BranchInst *BI = dyn_cast(TI)) { - BI->setCondition(ConstantInt::getFalse(BI->getContext())); - } else { - SwitchInst *SI = cast(TI); - SI->setCondition(SI->getCaseValue(1)); + + // If the input to SCCP is actually switch on undef, fix the undef to + // the first constant. + if (isa(SI->getCondition())) { + SI->setCondition(SI->getCaseValue(1)); + markEdgeExecutable(BB, TI->getSuccessor(1)); + return true; + } + + markForcedConstant(SI->getCondition(), SI->getCaseValue(1)); + return true; } - - return true; } return false; From evan.cheng at apple.com Mon Apr 5 17:21:09 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 05 Apr 2010 22:21:09 -0000 Subject: [llvm-commits] [llvm] r100480 - /llvm/trunk/lib/Target/X86/X86InstrInfo.td Message-ID: <20100405222109.472672A6C12C@llvm.org> Author: evancheng Date: Mon Apr 5 17:21:09 2010 New Revision: 100480 URL: http://llvm.org/viewvc/llvm-project?rev=100480&view=rev Log: Fix ADD32rr_alt instruction encoding bug. Patch by Marius Wachtler. Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=100480&r1=100479&r2=100480&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Apr 5 17:21:09 2010 @@ -2707,7 +2707,7 @@ "add{b}\t{$src2, $dst|$dst, $src2}", []>; def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; - def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), + def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2), "add{l}\t{$src2, $dst|$dst, $src2}", []>; } From evan.cheng at apple.com Mon Apr 5 17:30:05 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 05 Apr 2010 22:30:05 -0000 Subject: [llvm-commits] [llvm] r100482 - /llvm/trunk/test/CodeGen/X86/memcpy.ll Message-ID: <20100405223005.E028F2A6C12C@llvm.org> Author: evancheng Date: Mon Apr 5 17:30:05 2010 New Revision: 100482 URL: http://llvm.org/viewvc/llvm-project?rev=100482&view=rev Log: Add nounwind. Modified: llvm/trunk/test/CodeGen/X86/memcpy.ll Modified: llvm/trunk/test/CodeGen/X86/memcpy.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy.ll?rev=100482&r1=100481&r2=100482&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/memcpy.ll (original) +++ llvm/trunk/test/CodeGen/X86/memcpy.ll Mon Apr 5 17:30:05 2010 @@ -2,13 +2,13 @@ declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) -define i8* @my_memcpy(i8* %a, i8* %b, i64 %n) { +define i8* @my_memcpy(i8* %a, i8* %b, i64 %n) nounwind { entry: tail call void @llvm.memcpy.i64( i8* %a, i8* %b, i64 %n, i32 1 ) ret i8* %a } -define i8* @my_memcpy2(i64* %a, i64* %b, i64 %n) { +define i8* @my_memcpy2(i64* %a, i64* %b, i64 %n) nounwind { entry: %tmp14 = bitcast i64* %a to i8* %tmp25 = bitcast i64* %b to i8* From sabre at nondot.org Mon Apr 5 17:42:30 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 22:42:30 -0000 Subject: [llvm-commits] [llvm] r100485 - in /llvm/trunk: include/llvm/Support/MemoryBuffer.h lib/AsmParser/Parser.cpp lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp lib/Support/MemoryBuffer.cpp tools/lto/LTOModule.cpp utils/FileCheck/FileCheck.cpp Message-ID: <20100405224230.3D8842A6C133@llvm.org> Author: lattner Date: Mon Apr 5 17:42:30 2010 New Revision: 100485 URL: http://llvm.org/viewvc/llvm-project?rev=100485&view=rev Log: stringref-ize the MemoryBuffer::get apis. This requires a co-committed clang patch. Modified: llvm/trunk/include/llvm/Support/MemoryBuffer.h llvm/trunk/lib/AsmParser/Parser.cpp llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp llvm/trunk/lib/Support/MemoryBuffer.cpp llvm/trunk/tools/lto/LTOModule.cpp llvm/trunk/utils/FileCheck/FileCheck.cpp Modified: llvm/trunk/include/llvm/Support/MemoryBuffer.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/MemoryBuffer.h?rev=100485&r1=100484&r2=100485&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/MemoryBuffer.h (original) +++ llvm/trunk/include/llvm/Support/MemoryBuffer.h Mon Apr 5 17:42:30 2010 @@ -65,13 +65,13 @@ /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that EndPtr[0] must be a null byte and be accessible! - static MemoryBuffer *getMemBuffer(const char *StartPtr, const char *EndPtr, + static MemoryBuffer *getMemBuffer(StringRef InputData, const char *BufferName = ""); /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, /// copying the contents and taking ownership of it. This has no requirements /// on EndPtr[0]. - static MemoryBuffer *getMemBufferCopy(const char *StartPtr,const char *EndPtr, + static MemoryBuffer *getMemBufferCopy(StringRef InputData, const char *BufferName = ""); /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that Modified: llvm/trunk/lib/AsmParser/Parser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/Parser.cpp?rev=100485&r1=100484&r2=100485&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/Parser.cpp (original) +++ llvm/trunk/lib/AsmParser/Parser.cpp Mon Apr 5 17:42:30 2010 @@ -56,7 +56,7 @@ Module *llvm::ParseAssemblyString(const char *AsmString, Module *M, SMDiagnostic &Err, LLVMContext &Context) { MemoryBuffer *F = - MemoryBuffer::getMemBuffer(AsmString, AsmString+strlen(AsmString), + MemoryBuffer::getMemBuffer(StringRef(AsmString, strlen(AsmString)), ""); return ParseAssembly(F, M, Err, Context); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=100485&r1=100484&r2=100485&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Mon Apr 5 17:42:30 2010 @@ -61,6 +61,7 @@ // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { + // Don't emit the comments if writing to a .o file. if (!OutStreamer.hasRawTextSupport()) return; OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ @@ -104,7 +105,7 @@ } case '\n': ++LastEmitted; // Consume newline character. - OS << '\n'; // Indent code with newline. + OS << '\n'; // Indent code with newline. break; case '$': { ++LastEmitted; // Consume '$' character. @@ -183,26 +184,23 @@ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. if (*LastEmitted == ':') { ++LastEmitted; // Consume ':' character. - if (*LastEmitted == 0) { - llvm_report_error("Bad ${:} expression in inline asm string: '" - + std::string(AsmStr) + "'"); - } + if (*LastEmitted == 0) + llvm_report_error("Bad ${:} expression in inline asm string: '" + + std::string(AsmStr) + "'"); Modifier[0] = *LastEmitted; ++LastEmitted; // Consume modifier character. } - if (*LastEmitted != '}') { + if (*LastEmitted != '}') llvm_report_error("Bad ${} expression in inline asm string: '" + std::string(AsmStr) + "'"); - } ++LastEmitted; // Consume '}' character. } - if (Val >= NumOperands-1) { + if (Val >= NumOperands-1) llvm_report_error("Invalid $ operand number in inline asm string: '" + std::string(AsmStr) + "'"); - } // Okay, we finally have a value number. Ask the target to print this // operand! Modified: llvm/trunk/lib/Support/MemoryBuffer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/MemoryBuffer.cpp?rev=100485&r1=100484&r2=100485&view=diff ============================================================================== --- llvm/trunk/lib/Support/MemoryBuffer.cpp (original) +++ llvm/trunk/lib/Support/MemoryBuffer.cpp Mon Apr 5 17:42:30 2010 @@ -71,13 +71,12 @@ class MemoryBufferMem : public MemoryBuffer { std::string FileID; public: - MemoryBufferMem(const char *Start, const char *End, StringRef FID, - bool Copy = false) + MemoryBufferMem(StringRef InputData, StringRef FID, bool Copy = false) : FileID(FID) { if (!Copy) - init(Start, End); + init(InputData.data(), InputData.data()+InputData.size()); else - initCopyOf(Start, End); + initCopyOf(InputData.data(), InputData.data()+InputData.size()); } virtual const char *getBufferIdentifier() const { @@ -88,19 +87,17 @@ /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that EndPtr[0] must be a null byte and be accessible! -MemoryBuffer *MemoryBuffer::getMemBuffer(const char *StartPtr, - const char *EndPtr, +MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, const char *BufferName) { - return new MemoryBufferMem(StartPtr, EndPtr, BufferName); + return new MemoryBufferMem(InputData, BufferName); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, /// copying the contents and taking ownership of it. This has no requirements /// on EndPtr[0]. -MemoryBuffer *MemoryBuffer::getMemBufferCopy(const char *StartPtr, - const char *EndPtr, +MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData, const char *BufferName) { - return new MemoryBufferMem(StartPtr, EndPtr, BufferName, true); + return new MemoryBufferMem(InputData, BufferName, true); } /// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size @@ -112,7 +109,7 @@ char *Buf = (char *)malloc(Size+1); if (!Buf) return 0; Buf[Size] = 0; - MemoryBufferMem *SB = new MemoryBufferMem(Buf, Buf+Size, BufferName); + MemoryBufferMem *SB = new MemoryBufferMem(StringRef(Buf, Size), BufferName); // The memory for this buffer is owned by the MemoryBuffer. SB->MustDeleteBuffer = true; return SB; Modified: llvm/trunk/tools/lto/LTOModule.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/LTOModule.cpp?rev=100485&r1=100484&r2=100485&view=diff ============================================================================== --- llvm/trunk/tools/lto/LTOModule.cpp (original) +++ llvm/trunk/tools/lto/LTOModule.cpp Mon Apr 5 17:42:30 2010 @@ -101,13 +101,13 @@ /// Also if next byte is on a different page, don't assume it is readable. MemoryBuffer* LTOModule::makeBuffer(const void* mem, size_t length) { - const char* startPtr = (char*)mem; - const char* endPtr = startPtr+length; - if ((((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0) - || (*endPtr != 0)) - return MemoryBuffer::getMemBufferCopy(startPtr, endPtr); - else - return MemoryBuffer::getMemBuffer(startPtr, endPtr); + const char *startPtr = (char*)mem; + const char *endPtr = startPtr+length; + if (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0 || + *endPtr != 0) + return MemoryBuffer::getMemBufferCopy(StringRef(startPtr, length)); + + return MemoryBuffer::getMemBuffer(StringRef(startPtr, length)); } Modified: llvm/trunk/utils/FileCheck/FileCheck.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/FileCheck/FileCheck.cpp?rev=100485&r1=100484&r2=100485&view=diff ============================================================================== --- llvm/trunk/utils/FileCheck/FileCheck.cpp (original) +++ llvm/trunk/utils/FileCheck/FileCheck.cpp Mon Apr 5 17:42:30 2010 @@ -441,7 +441,7 @@ /// CanonicalizeInputFile - Remove duplicate horizontal space from the specified /// memory buffer, free it, and return a new one. static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { - SmallVector NewFile; + SmallString<128> NewFile; NewFile.reserve(MB->getBufferSize()); for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); @@ -461,9 +461,7 @@ // Free the old buffer and return a new one. MemoryBuffer *MB2 = - MemoryBuffer::getMemBufferCopy(NewFile.data(), - NewFile.data() + NewFile.size(), - MB->getBufferIdentifier()); + MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()); delete MB; return MB2; From sabre at nondot.org Mon Apr 5 17:49:48 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 22:49:48 -0000 Subject: [llvm-commits] [llvm] r100486 - /llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Message-ID: <20100405224948.BFB272A6C12E@llvm.org> Author: lattner Date: Mon Apr 5 17:49:48 2010 New Revision: 100486 URL: http://llvm.org/viewvc/llvm-project?rev=100486&view=rev Log: update unit test for api change. Modified: llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Modified: llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp?rev=100486&r1=100485&r2=100486&view=diff ============================================================================== --- llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp (original) +++ llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Mon Apr 5 17:49:48 2010 @@ -701,9 +701,7 @@ LLVMContext &Context, const std::string &Bitcode, Module *&M) { // c_str() is null-terminated like MemoryBuffer::getMemBuffer requires. MemoryBuffer *BitcodeBuffer = - MemoryBuffer::getMemBuffer(Bitcode.c_str(), - Bitcode.c_str() + Bitcode.size(), - "Bitcode for test"); + MemoryBuffer::getMemBuffer(Bitcode, "Bitcode for test"); std::string errMsg; M = getLazyBitcodeModule(BitcodeBuffer, Context, &errMsg); if (M == NULL) { From isanbard at gmail.com Mon Apr 5 17:59:21 2010 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 05 Apr 2010 22:59:21 -0000 Subject: [llvm-commits] [llvm] r100487 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Message-ID: <20100405225921.E3BD02A6C12D@llvm.org> Author: void Date: Mon Apr 5 17:59:21 2010 New Revision: 100487 URL: http://llvm.org/viewvc/llvm-project?rev=100487&view=rev Log: Output floating point representations in DWARF format. This is done by outputing the FP encoding directly as a hex representation. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100487&r1=100486&r2=100487&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Apr 5 17:59:21 2010 @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -1532,6 +1533,31 @@ if (MCSymbol *VS = DV->getDbgValueLabel()) addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + } else if (DbgValueInsn->getOperand(0).getType() == + MachineOperand::MO_FPImmediate) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = DbgValueInsn->getOperand(0).getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block); + + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, + VS); } else { //FIXME : Handle other operand types. delete VariableDie; From sabre at nondot.org Mon Apr 5 18:07:18 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 23:07:18 -0000 Subject: [llvm-commits] [llvm] r100489 - /llvm/trunk/tools/llvm-mc/llvm-mc.cpp Message-ID: <20100405230718.35F812A6C12D@llvm.org> Author: lattner Date: Mon Apr 5 18:07:18 2010 New Revision: 100489 URL: http://llvm.org/viewvc/llvm-project?rev=100489&view=rev Log: tidy #includes. Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/llvm-mc.cpp?rev=100489&r1=100488&r2=100489&view=diff ============================================================================== --- llvm/trunk/tools/llvm-mc/llvm-mc.cpp (original) +++ llvm/trunk/tools/llvm-mc/llvm-mc.cpp Mon Apr 5 18:07:18 2010 @@ -18,6 +18,13 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/AsmParser.h" +#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" // FIXME. +#include "llvm/Target/TargetSelect.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" @@ -28,13 +35,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/System/Host.h" #include "llvm/System/Signals.h" -#include "llvm/Target/TargetAsmBackend.h" -#include "llvm/Target/TargetAsmParser.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetMachine.h" // FIXME. -#include "llvm/Target/TargetSelect.h" -#include "llvm/MC/MCParser/AsmParser.h" #include "Disassembler.h" using namespace llvm; From sabre at nondot.org Mon Apr 5 18:07:42 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 23:07:42 -0000 Subject: [llvm-commits] [llvm] r100490 - /llvm/trunk/include/llvm/Support/SourceMgr.h Message-ID: <20100405230742.8E2522A6C12D@llvm.org> Author: lattner Date: Mon Apr 5 18:07:42 2010 New Revision: 100490 URL: http://llvm.org/viewvc/llvm-project?rev=100490&view=rev Log: add a comment. Modified: llvm/trunk/include/llvm/Support/SourceMgr.h Modified: llvm/trunk/include/llvm/Support/SourceMgr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SourceMgr.h?rev=100490&r1=100489&r2=100490&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/SourceMgr.h (original) +++ llvm/trunk/include/llvm/Support/SourceMgr.h Mon Apr 5 18:07:42 2010 @@ -76,6 +76,8 @@ return Buffers[i].IncludeLoc; } + /// AddNewSourceBuffer - Add a new source buffer to this source manager. This + /// takes ownership of the memory buffer. unsigned AddNewSourceBuffer(MemoryBuffer *F, SMLoc IncludeLoc) { SrcBuffer NB; NB.Buffer = F; From sabre at nondot.org Mon Apr 5 18:11:25 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 23:11:25 -0000 Subject: [llvm-commits] [llvm] r100491 - in /llvm/trunk: lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp tools/llc/llc.cpp Message-ID: <20100405231125.24E232A6C12D@llvm.org> Author: lattner Date: Mon Apr 5 18:11:24 2010 New Revision: 100491 URL: http://llvm.org/viewvc/llvm-project?rev=100491&view=rev Log: add .o file writing for inline asm in llc. Here's a silly demo: $ clang asm.c -S -o - -emit-llvm | llc -filetype=obj -o t.o :1:2: error: unrecognized instruction abc incl %eax ^ LLVM ERROR: Error parsing inline asm Only problem seems to be that the parser finalizes OutStreamer at the end of the first inline asm, which isn't what we want. For example: $ cat asm.c int foo(int X) { __asm__ ("incl %0" : "+r" (X)); return X; } $ clang asm.c -S -o - -emit-llvm | llc ... subq $8, %rsp movl %edi, (%rsp) movl %edi, %eax ## InlineAsm Start incl %eax ## InlineAsm End movl %eax, (%rsp) movl %eax, 4(%rsp) addq $8, %rsp ret $ clang asm.c -S -o - -emit-llvm | llc -filetype=obj -o t.o $ otool -tv t.o t.o: (__TEXT,__text) section _foo: 0000000000000000 subq $0x08,%rsp 0000000000000004 movl %edi,(%rsp) 0000000000000007 movl %edi,%eax 0000000000000009 incl %eax $ don't stop at inc! Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp llvm/trunk/tools/llc/llc.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=100491&r1=100490&r2=100491&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Mon Apr 5 18:11:24 2010 @@ -18,9 +18,16 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCParser/AsmParser.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -28,6 +35,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str) const { assert(!Str.empty() && "Can't emit empty inline asm block"); + // Remember if the buffer is nul terminated or not so we can avoid a copy. + bool isNullTerminated = Str.back() == 0; + if (isNullTerminated) + Str = Str.substr(0, Str.size()-1); + // If the output streamer is actually a .s file, just emit the blob textually. // This is useful in case the asm parser doesn't handle something but the // system assembler does. @@ -36,7 +48,27 @@ return; } - errs() << "Inline asm not supported by this streamer!\n"; + SourceMgr SrcMgr; + MemoryBuffer *Buffer; + if (isNullTerminated) + Buffer = MemoryBuffer::getMemBuffer(Str, ""); + else + Buffer = MemoryBuffer::getMemBufferCopy(Str, ""); + + // Tell SrcMgr about this buffer, it takes ownership of the buffer. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI); + OwningPtr TAP(TM.getTarget().createAsmParser(Parser)); + if (!TAP) + llvm_report_error("Inline asm not supported by this streamer because" + " we don't have an asm parser for this target\n"); + Parser.setTargetParser(*TAP.get()); + + // Don't implicitly switch to the text section before the asm. + int Res = Parser.Run(/*NoInitialTextSection*/ true); + if (Res) + llvm_report_error("Error parsing inline asm\n"); } @@ -249,8 +281,7 @@ } } } - OS << "\n"; - + OS << '\n' << (char)0; // null terminate string. EmitInlineAsm(OS.str()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't Modified: llvm/trunk/tools/llc/llc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llc/llc.cpp?rev=100491&r1=100490&r2=100491&view=diff ============================================================================== --- llvm/trunk/tools/llc/llc.cpp (original) +++ llvm/trunk/tools/llc/llc.cpp Mon Apr 5 18:11:24 2010 @@ -214,6 +214,7 @@ // Initialize targets first, so that --version shows registered targets. InitializeAllTargets(); InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n"); From sabre at nondot.org Mon Apr 5 18:15:42 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Apr 2010 23:15:42 -0000 Subject: [llvm-commits] [llvm] r100492 - in /llvm/trunk: include/llvm/MC/MCParser/AsmParser.h lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp lib/MC/MCParser/AsmParser.cpp Message-ID: <20100405231542.C602B2A6C12D@llvm.org> Author: lattner Date: Mon Apr 5 18:15:42 2010 New Revision: 100492 URL: http://llvm.org/viewvc/llvm-project?rev=100492&view=rev Log: Give AsmParser an option to control whether it finalizes the stream. New demo: $ clang asm.c -S -o - -emit-llvm | llc -filetype=obj -o t.o $ otool -tv t.o t.o: (__TEXT,__text) section _foo: 0000000000000000 subq $0x08,%rsp 0000000000000004 movl %edi,(%rsp) 0000000000000007 movl %edi,%eax 0000000000000009 incl %eax 000000000000000b movl %eax,(%rsp) 000000000000000e movl %eax,0x04(%rsp) 0000000000000012 addq $0x08,%rsp 0000000000000016 ret Modified: llvm/trunk/include/llvm/MC/MCParser/AsmParser.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp llvm/trunk/lib/MC/MCParser/AsmParser.cpp Modified: llvm/trunk/include/llvm/MC/MCParser/AsmParser.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/AsmParser.h?rev=100492&r1=100491&r2=100492&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCParser/AsmParser.h (original) +++ llvm/trunk/include/llvm/MC/MCParser/AsmParser.h Mon Apr 5 18:15:42 2010 @@ -64,7 +64,7 @@ const MCAsmInfo &MAI); ~AsmParser(); - bool Run(bool NoInitialTextSection); + bool Run(bool NoInitialTextSection, bool NoFinalize = false); void AddDirectiveHandler(StringRef Directive, Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=100492&r1=100491&r2=100492&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Mon Apr 5 18:15:42 2010 @@ -66,7 +66,8 @@ Parser.setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. - int Res = Parser.Run(/*NoInitialTextSection*/ true); + int Res = Parser.Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); if (Res) llvm_report_error("Error parsing inline asm\n"); } Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=100492&r1=100491&r2=100492&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original) +++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Mon Apr 5 18:15:42 2010 @@ -138,7 +138,7 @@ return *tok; } -bool AsmParser::Run(bool NoInitialTextSection) { +bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // Create the initial section, if requested. // // FIXME: Target hook & command line option for initial section. @@ -190,7 +190,9 @@ TheCondState.Ignore != StartingCondState.Ignore) return TokError("unmatched .ifs or .elses"); - if (!HadError) + // Finalize the output stream if there are no errors and if the client wants + // us to. + if (!HadError && !NoFinalize) Out.Finish(); return HadError; From clattner at apple.com Mon Apr 5 18:31:07 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Apr 2010 16:31:07 -0700 Subject: [llvm-commits] [llvm] r100487 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp In-Reply-To: <20100405225921.E3BD02A6C12D@llvm.org> References: <20100405225921.E3BD02A6C12D@llvm.org> Message-ID: <11C0A7DC-6AE6-4E55-8FEC-5BC6B1E438BA@apple.com> On Apr 5, 2010, at 3:59 PM, Bill Wendling wrote: > Author: void > Date: Mon Apr 5 17:59:21 2010 > New Revision: 100487 > > URL: http://llvm.org/viewvc/llvm-project?rev=100487&view=rev > Log: > Output floating point representations in DWARF format. This is done by outputing > the FP encoding directly as a hex representation. Did you verify that this works on both big and little endian systems? -Chris > > Modified: > llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp > > Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100487&r1=100486&r2=100487&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) > +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Apr 5 17:59:21 2010 > @@ -14,6 +14,7 @@ > #define DEBUG_TYPE "dwarfdebug" > #include "DwarfDebug.h" > #include "DIE.h" > +#include "llvm/Constants.h" > #include "llvm/Module.h" > #include "llvm/CodeGen/MachineFunction.h" > #include "llvm/CodeGen/MachineModuleInfo.h" > @@ -1532,6 +1533,31 @@ > if (MCSymbol *VS = DV->getDbgValueLabel()) > addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, > VS); > + } else if (DbgValueInsn->getOperand(0).getType() == > + MachineOperand::MO_FPImmediate) { > + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); > + APFloat FPImm = DbgValueInsn->getOperand(0).getFPImm()->getValueAPF(); > + > + // Get the raw data form of the floating point. > + const APInt FltVal = FPImm.bitcastToAPInt(); > + const char *FltPtr = (const char*)FltVal.getRawData(); > + > + unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. > + bool LittleEndian = Asm->getTargetData().isLittleEndian(); > + int Incr = (LittleEndian ? 1 : -1); > + int Start = (LittleEndian ? 0 : NumBytes - 1); > + int Stop = (LittleEndian ? NumBytes : -1); > + > + // Output the constant to DWARF one byte at a time. > + for (; Start != Stop; Start += Incr) > + addUInt(Block, 0, dwarf::DW_FORM_data1, > + (unsigned char)0xFF & FltPtr[Start]); > + > + addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block); > + > + if (MCSymbol *VS = DV->getDbgValueLabel()) > + addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, > + VS); > } else { > //FIXME : Handle other operand types. > delete VariableDie; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From evan.cheng at apple.com Mon Apr 5 18:33:29 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 05 Apr 2010 23:33:29 -0000 Subject: [llvm-commits] [llvm] r100494 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <20100405233329.306042A6C12C@llvm.org> Author: evancheng Date: Mon Apr 5 18:33:29 2010 New Revision: 100494 URL: http://llvm.org/viewvc/llvm-project?rev=100494&view=rev Log: Fix an obvious copy-n-paste bug. It's not known to cause any miscompilation. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100494&r1=100493&r2=100494&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Apr 5 18:33:29 2010 @@ -3206,11 +3206,9 @@ NonScalarIntSafe, DAG); if (VT == MVT::Other) { - VT = TLI.getPointerTy(); - const Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - if (DstAlign >= TLI.getTargetData()->getABITypeAlignment(Ty) || + if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || TLI.allowsUnalignedMemoryAccesses(VT)) { - VT = MVT::i64; + VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { case 0: VT = MVT::i64; break; From stoklund at 2pi.dk Mon Apr 5 18:48:02 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 05 Apr 2010 23:48:02 -0000 Subject: [llvm-commits] [llvm] r100497 - /llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Message-ID: <20100405234802.B70A72A6C12C@llvm.org> Author: stoklund Date: Mon Apr 5 18:48:02 2010 New Revision: 100497 URL: http://llvm.org/viewvc/llvm-project?rev=100497&view=rev Log: Properly enable load clustering. Operand 2 on a load instruction does not have to be a RegisterSDNode for this to work. Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=100497&r1=100496&r2=100497&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Apr 5 18:48:02 2010 @@ -2961,10 +2961,6 @@ Load1->getOperand(2) == Load2->getOperand(2)) { if (cast(Load1->getOperand(1))->getZExtValue() != 1) return false; - SDValue Op2 = Load1->getOperand(2); - if (!isa(Op2) || - cast(Op2)->getReg() != 0) - return 0; // Now let's examine the displacements. if (isa(Load1->getOperand(3)) && From dgregor at apple.com Mon Apr 5 18:53:46 2010 From: dgregor at apple.com (Douglas Gregor) Date: Mon, 05 Apr 2010 23:53:46 -0000 Subject: [llvm-commits] [llvm] r100499 - /llvm/trunk/include/llvm/ADT/OwningPtr.h Message-ID: <20100405235346.49C0A2A6C12C@llvm.org> Author: dgregor Date: Mon Apr 5 18:53:46 2010 New Revision: 100499 URL: http://llvm.org/viewvc/llvm-project?rev=100499&view=rev Log: Remove MaybeOwningPtr. We're going to use reference-counting instead. Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h Modified: llvm/trunk/include/llvm/ADT/OwningPtr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/OwningPtr.h?rev=100499&r1=100498&r2=100499&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/OwningPtr.h (original) +++ llvm/trunk/include/llvm/ADT/OwningPtr.h Mon Apr 5 18:53:46 2010 @@ -128,99 +128,6 @@ a.swap(b); } -/// \brief A smart pointer that may own the object it points to. -/// -/// An instance of \c MaybeOwningPtr may own the object it points to. If so, -/// it will guarantee that the object will be deleted either on destructin of -/// the OwningPtr or via an explicit reset(). Once created, ownership of the -/// pointee object can be taken away from OwningPtr by using the \c take() -/// method. -template -class MaybeOwningPtr { - T *Ptr; - bool Owned; - - struct MaybeOwningPtrRef { - MaybeOwningPtrRef(T *Ptr, bool &Owned) : Ptr(Ptr), Owned(Owned) { } - - T *Ptr; - bool &Owned; - }; - -public: - MaybeOwningPtr() : Ptr(0), Owned(false) { } - - explicit MaybeOwningPtr(T *P, bool OwnP) : Ptr(P), Owned(OwnP) {} - - /// \brief Take ownership of the pointer stored in \c Other. - MaybeOwningPtr(MaybeOwningPtr& Other) : Ptr(Other.Ptr), Owned(Other.Owned) { - Other.Owned = false; - } - - MaybeOwningPtr(MaybeOwningPtrRef Other) : Ptr(Other.Ptr), Owned(Other.Owned) { - Other.Owned = false; - } - - /// \brief Take ownership of the ppinter stored in \c Other. - MaybeOwningPtr &operator=(MaybeOwningPtr &Other) { - reset(Other.Ptr, Other.Owned); - Other.Owned = false; - return *this; - } - - ~MaybeOwningPtr() { - if (Owned) - delete Ptr; - } - - operator MaybeOwningPtrRef() { return MaybeOwningPtrRef(Ptr, Owned); } - - /// reset - Change the current pointee to the specified pointer. Note that - /// calling this with any pointer (including a null pointer) deletes the - /// current pointer. - void reset(T *P, bool OwnP) { - assert(P != Ptr); - if (Owned) - delete Ptr; - - Ptr = P; - Owned = OwnP; - } - - /// take - Return the underlying pointer and take ownership of it. This - /// \c MaybeOwningPtr must have ownership before the call, and will - /// relinquish ownership as part of the call. - T *take() { - assert(Owned && "Cannot take ownership from a non-owning pointer"); - Owned = false; - return Ptr; - } - - T &operator*() const { - assert(Ptr && "Cannot dereference null pointer"); - return *Ptr; - } - - T *operator->() const { return Ptr; } - T *get() const { return Ptr; } - operator bool() const { return Ptr != 0; } - bool operator!() const { return Ptr == 0; } - - void swap(MaybeOwningPtr &RHS) { - T *Tmp = RHS.Ptr; - RHS.Ptr = Ptr; - Ptr = Tmp; - bool TmpOwned = RHS.Owned; - RHS.Owned = Owned; - Owned = TmpOwned; - } -}; - -template -inline void swap(MaybeOwningPtr &a, MaybeOwningPtr &b) { - a.swap(b); -} - } // end namespace llvm #endif From sabre at nondot.org Mon Apr 5 19:26:48 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 00:26:48 -0000 Subject: [llvm-commits] [llvm] r100503 - in /llvm/trunk: include/llvm/Support/IRReader.h include/llvm/Support/SourceMgr.h lib/AsmParser/Parser.cpp lib/Support/SourceMgr.cpp Message-ID: <20100406002649.005012A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 19:26:48 2010 New Revision: 100503 URL: http://llvm.org/viewvc/llvm-project?rev=100503&view=rev Log: Give llvm::SourceMgr the ability to have a client-specified diagnostic handler. Modified: llvm/trunk/include/llvm/Support/IRReader.h llvm/trunk/include/llvm/Support/SourceMgr.h llvm/trunk/lib/AsmParser/Parser.cpp llvm/trunk/lib/Support/SourceMgr.cpp Modified: llvm/trunk/include/llvm/Support/IRReader.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/IRReader.h?rev=100503&r1=100502&r2=100503&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/IRReader.h (original) +++ llvm/trunk/include/llvm/Support/IRReader.h Mon Apr 5 19:26:48 2010 @@ -38,7 +38,8 @@ std::string ErrMsg; Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg); if (M == 0) { - Err = SMDiagnostic(Buffer->getBufferIdentifier(), -1, -1, ErrMsg, ""); + Err = SMDiagnostic(SMLoc(), Buffer->getBufferIdentifier(), -1, -1, + ErrMsg, ""); // ParseBitcodeFile does not take ownership of the Buffer in the // case of an error. delete Buffer; @@ -59,7 +60,7 @@ std::string ErrMsg; MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); if (F == 0) { - Err = SMDiagnostic(Filename, -1, -1, + Err = SMDiagnostic(SMLoc(), Filename, -1, -1, "Could not open input file '" + Filename + "'", ""); return 0; } @@ -81,7 +82,8 @@ // ParseBitcodeFile does not take ownership of the Buffer. delete Buffer; if (M == 0) - Err = SMDiagnostic(Buffer->getBufferIdentifier(), -1, -1, ErrMsg, ""); + Err = SMDiagnostic(SMLoc(), Buffer->getBufferIdentifier(), + -1, -1, ErrMsg, ""); return M; } @@ -97,7 +99,7 @@ std::string ErrMsg; MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); if (F == 0) { - Err = SMDiagnostic(Filename, -1, -1, + Err = SMDiagnostic(SMLoc(), Filename, -1, -1, "Could not open input file '" + Filename + "'", ""); return 0; } Modified: llvm/trunk/include/llvm/Support/SourceMgr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SourceMgr.h?rev=100503&r1=100502&r2=100503&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/SourceMgr.h (original) +++ llvm/trunk/include/llvm/Support/SourceMgr.h Mon Apr 5 19:26:48 2010 @@ -31,6 +31,12 @@ /// SourceMgr - This owns the files read by a parser, handles include stacks, /// and handles diagnostic wrangling. class SourceMgr { +public: + /// DiagHandlerTy - Clients that want to handle their own diagnostics in a + /// custom way can register a function pointer+context as a diagnostic + /// handler. It gets called each time PrintMessage is invoked. + typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context); +private: struct SrcBuffer { /// Buffer - The memory buffer for the file. MemoryBuffer *Buffer; @@ -51,16 +57,26 @@ /// is really private to SourceMgr.cpp. mutable void *LineNoCache; + DiagHandlerTy DiagHandler; + void *DiagContext; + SourceMgr(const SourceMgr&); // DO NOT IMPLEMENT void operator=(const SourceMgr&); // DO NOT IMPLEMENT public: - SourceMgr() : LineNoCache(0) {} + SourceMgr() : LineNoCache(0), DiagHandler(0), DiagContext(0) {} ~SourceMgr(); void setIncludeDirs(const std::vector &Dirs) { IncludeDirectories = Dirs; } + /// setDiagHandler - Specify a diagnostic handler to be invoked every time + /// PrintMessage is called. + void setDiagHandler(DiagHandlerTy DH, void *Ctx = 0) { + DiagHandler = DH; + DiagContext = Ctx; + } + const SrcBuffer &getBufferInfo(unsigned i) const { assert(i < Buffers.size() && "Invalid Buffer ID!"); return Buffers[i]; @@ -128,6 +144,7 @@ /// SMDiagnostic - Instances of this class encapsulate one diagnostic report, /// allowing printing to a raw_ostream as a caret diagnostic. class SMDiagnostic { + SMLoc Loc; std::string Filename; int LineNo, ColumnNo; std::string Message, LineContents; @@ -135,12 +152,20 @@ public: SMDiagnostic() : LineNo(0), ColumnNo(0), ShowLine(0) {} - SMDiagnostic(const std::string &FN, int Line, int Col, + SMDiagnostic(SMLoc L, const std::string &FN, int Line, int Col, const std::string &Msg, const std::string &LineStr, bool showline = true) - : Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg), + : Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg), LineContents(LineStr), ShowLine(showline) {} + SMLoc getLoc() const { return Loc; } + const std::string getFilename() { return Filename; } + int getLineNo() const { return LineNo; } + int getColumnNo() const { return ColumnNo; } + const std::string &getMessage() const { return Message; } + const std::string &getLineContents() const { return LineContents; } + bool getShowLine() const { return ShowLine; } + void Print(const char *ProgName, raw_ostream &S) const; }; Modified: llvm/trunk/lib/AsmParser/Parser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/Parser.cpp?rev=100503&r1=100502&r2=100503&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/Parser.cpp (original) +++ llvm/trunk/lib/AsmParser/Parser.cpp Mon Apr 5 19:26:48 2010 @@ -44,7 +44,7 @@ std::string ErrorStr; MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr); if (F == 0) { - Err = SMDiagnostic("", -1, -1, + Err = SMDiagnostic(SMLoc(), "", -1, -1, "Could not open input file '" + Filename + "': " + ErrorStr, ""); return 0; Modified: llvm/trunk/lib/Support/SourceMgr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/SourceMgr.cpp?rev=100503&r1=100502&r2=100503&view=diff ============================================================================== --- llvm/trunk/lib/Support/SourceMgr.cpp (original) +++ llvm/trunk/lib/Support/SourceMgr.cpp Mon Apr 5 19:26:48 2010 @@ -168,13 +168,20 @@ } PrintedMsg += Msg; - return SMDiagnostic(CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf), + return SMDiagnostic(Loc, + CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf), Loc.getPointer()-LineStart, PrintedMsg, LineStr, ShowLine); } void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type, bool ShowLine) const { + // Report the message with the diagnostic handler if present. + if (DiagHandler) { + DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext); + return; + } + raw_ostream &OS = errs(); int CurBuf = FindBufferContainingLoc(Loc); From sabre at nondot.org Mon Apr 5 19:33:43 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 00:33:43 -0000 Subject: [llvm-commits] [llvm] r100504 - in /llvm/trunk: include/llvm/Support/SourceMgr.h lib/Support/SourceMgr.cpp Message-ID: <20100406003343.C68A82A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 19:33:43 2010 New Revision: 100504 URL: http://llvm.org/viewvc/llvm-project?rev=100504&view=rev Log: give the SourceMgr object a cookie. Modified: llvm/trunk/include/llvm/Support/SourceMgr.h llvm/trunk/lib/Support/SourceMgr.cpp Modified: llvm/trunk/include/llvm/Support/SourceMgr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SourceMgr.h?rev=100504&r1=100503&r2=100504&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/SourceMgr.h (original) +++ llvm/trunk/include/llvm/Support/SourceMgr.h Mon Apr 5 19:33:43 2010 @@ -35,7 +35,8 @@ /// DiagHandlerTy - Clients that want to handle their own diagnostics in a /// custom way can register a function pointer+context as a diagnostic /// handler. It gets called each time PrintMessage is invoked. - typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context); + typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context, + unsigned LocCookie); private: struct SrcBuffer { /// Buffer - The memory buffer for the file. @@ -59,6 +60,7 @@ DiagHandlerTy DiagHandler; void *DiagContext; + unsigned DiagLocCookie; SourceMgr(const SourceMgr&); // DO NOT IMPLEMENT void operator=(const SourceMgr&); // DO NOT IMPLEMENT @@ -71,10 +73,12 @@ } /// setDiagHandler - Specify a diagnostic handler to be invoked every time - /// PrintMessage is called. - void setDiagHandler(DiagHandlerTy DH, void *Ctx = 0) { + /// PrintMessage is called. Ctx and Cookie are passed into the handler when + /// it is invoked. + void setDiagHandler(DiagHandlerTy DH, void *Ctx = 0, unsigned Cookie = 0) { DiagHandler = DH; DiagContext = Ctx; + DiagLocCookie = Cookie; } const SrcBuffer &getBufferInfo(unsigned i) const { Modified: llvm/trunk/lib/Support/SourceMgr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/SourceMgr.cpp?rev=100504&r1=100503&r2=100504&view=diff ============================================================================== --- llvm/trunk/lib/Support/SourceMgr.cpp (original) +++ llvm/trunk/lib/Support/SourceMgr.cpp Mon Apr 5 19:33:43 2010 @@ -178,7 +178,8 @@ const char *Type, bool ShowLine) const { // Report the message with the diagnostic handler if present. if (DiagHandler) { - DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext); + DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), + DiagContext, DiagLocCookie); return; } From dpatel at apple.com Mon Apr 5 19:38:32 2010 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Apr 2010 00:38:32 -0000 Subject: [llvm-commits] [llvm] r100505 - /llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Message-ID: <20100406003832.45B2B2A6C12C@llvm.org> Author: dpatel Date: Mon Apr 5 19:38:32 2010 New Revision: 100505 URL: http://llvm.org/viewvc/llvm-project?rev=100505&view=rev Log: Remove unnecessary include. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100505&r1=100504&r2=100505&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Mon Apr 5 19:38:32 2010 @@ -37,7 +37,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" From sabre at nondot.org Mon Apr 5 19:44:45 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 00:44:45 -0000 Subject: [llvm-commits] [llvm] r100506 - in /llvm/trunk: include/llvm/LLVMContext.h lib/VMCore/LLVMContext.cpp lib/VMCore/LLVMContextImpl.cpp lib/VMCore/LLVMContextImpl.h Message-ID: <20100406004445.BBDB42A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 19:44:45 2010 New Revision: 100506 URL: http://llvm.org/viewvc/llvm-project?rev=100506&view=rev Log: give LLVMContext an inline asm diagnostic hook member. Modified: llvm/trunk/include/llvm/LLVMContext.h llvm/trunk/lib/VMCore/LLVMContext.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.h Modified: llvm/trunk/include/llvm/LLVMContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LLVMContext.h?rev=100506&r1=100505&r2=100506&view=diff ============================================================================== --- llvm/trunk/include/llvm/LLVMContext.h (original) +++ llvm/trunk/include/llvm/LLVMContext.h Mon Apr 5 19:44:45 2010 @@ -50,6 +50,24 @@ /// custom metadata IDs registered in this LLVMContext. ID #0 is not used, /// so it is filled in as an empty string. void getMDKindNames(SmallVectorImpl &Result) const; + + /// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked + /// when problems with inline asm are detected by the backend. The first + /// argument is a function pointer (of type SourceMgr::DiagHandlerTy) and the + /// second is a context pointer that gets passed into the DiagHandler. + /// + /// LLVMContext doesn't take ownership or interpreter either of these + /// pointers. + void setInlineAsmDiagnosticHandler(void *DiagHandler, void *DiagContext = 0); + + /// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by + /// setInlineAsmDiagnosticHandler. + void *getInlineAsmDiagnosticHandler() const; + + /// getInlineAsmDiagnosticContext - Return the diagnostic context set by + /// setInlineAsmDiagnosticHandler. + void *getInlineAsmDiagnosticContext() const; + }; /// getGlobalContext - Returns a global context. This is for LLVM clients that Modified: llvm/trunk/lib/VMCore/LLVMContext.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContext.cpp?rev=100506&r1=100505&r2=100506&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContext.cpp (original) +++ llvm/trunk/lib/VMCore/LLVMContext.cpp Mon Apr 5 19:44:45 2010 @@ -33,6 +33,23 @@ } LLVMContext::~LLVMContext() { delete pImpl; } +void LLVMContext::setInlineAsmDiagnosticHandler(void *DiagHandler, + void *DiagContext) { + pImpl->InlineAsmDiagHandler = DiagHandler; + pImpl->InlineAsmDiagContext = DiagContext; +} + +/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by +/// setInlineAsmDiagnosticHandler. +void *LLVMContext::getInlineAsmDiagnosticHandler() const { + return pImpl->InlineAsmDiagHandler; +} + +/// getInlineAsmDiagnosticContext - Return the diagnostic context set by +/// setInlineAsmDiagnosticHandler. +void *LLVMContext::getInlineAsmDiagnosticContext() const { + return pImpl->InlineAsmDiagContext; +} #ifndef NDEBUG /// isValidName - Return true if Name is a valid custom metadata handler name. @@ -73,5 +90,3 @@ // MD Handlers are numbered from 1. Names[I->second] = I->first(); } - - Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.cpp?rev=100506&r1=100505&r2=100506&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.cpp (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.cpp Mon Apr 5 19:44:45 2010 @@ -30,6 +30,9 @@ Int32Ty(C, 32), Int64Ty(C, 64), AlwaysOpaqueTy(new OpaqueType(C)) { + InlineAsmDiagHandler = 0; + InlineAsmDiagContext = 0; + // Make sure the AlwaysOpaqueTy stays alive as long as the Context. AlwaysOpaqueTy->addRef(); OpaqueTypes.insert(AlwaysOpaqueTy); Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=100506&r1=100505&r2=100506&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Mon Apr 5 19:44:45 2010 @@ -115,6 +115,8 @@ class LLVMContextImpl { public: + void *InlineAsmDiagHandler, *InlineAsmDiagContext; + typedef DenseMap IntMapTy; IntMapTy IntConstants; From sabre at nondot.org Mon Apr 5 19:51:52 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 00:51:52 -0000 Subject: [llvm-commits] [llvm] r100508 - in /llvm/trunk: include/llvm/CodeGen/MachineFunctionAnalysis.h include/llvm/CodeGen/MachineModuleInfo.h lib/CodeGen/MachineFunctionAnalysis.cpp lib/CodeGen/MachineModuleInfo.cpp Message-ID: <20100406005152.6BF992A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 19:51:52 2010 New Revision: 100508 URL: http://llvm.org/viewvc/llvm-project?rev=100508&view=rev Log: Give MachineModuleInfo an actual Module*. Modified: llvm/trunk/include/llvm/CodeGen/MachineFunctionAnalysis.h llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineFunctionAnalysis.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineFunctionAnalysis.h?rev=100508&r1=100507&r2=100508&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineFunctionAnalysis.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineFunctionAnalysis.h Mon Apr 5 19:51:52 2010 @@ -39,7 +39,7 @@ CodeGenOpt::Level getOptLevel() const { return OptLevel; } private: - virtual bool doInitialization(Module &) { NextFnNum = 1; return false; } + virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); virtual void releaseMemory(); virtual void getAnalysisUsage(AnalysisUsage &AU) const; Modified: llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h?rev=100508&r1=100507&r2=100508&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h Mon Apr 5 19:51:52 2010 @@ -100,6 +100,9 @@ /// Context - This is the MCContext used for the entire code generator. MCContext Context; + /// TheModule - This is the LLVM Module being worked on. + Module *TheModule; + /// ObjFileMMI - This is the object-file-format-specific implementation of /// MachineModuleInfoImpl, which lets targets accumulate whatever info they /// want. @@ -176,6 +179,9 @@ const MCContext &getContext() const { return Context; } MCContext &getContext() { return Context; } + void setModule(Module *M) { TheModule = M; } + Module *getModule() const { return TheModule; } + /// getInfo - Keep track of various per-function pieces of information for /// backends that would like to do so. /// Modified: llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp?rev=100508&r1=100507&r2=100508&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineFunctionAnalysis.cpp Mon Apr 5 19:51:52 2010 @@ -35,6 +35,19 @@ assert(!MF && "MachineFunctionAnalysis left initialized!"); } +void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} + +bool MachineFunctionAnalysis::doInitialization(Module &M) { + MachineModuleInfo *MMI = getAnalysisIfAvailable(); + assert(MMI && "MMI not around yet??"); + MMI->setModule(&M); + NextFnNum = 1; return false; +} + + bool MachineFunctionAnalysis::runOnFunction(Function &F) { assert(!MF && "MachineFunctionAnalysis already initialized!"); MF = new MachineFunction(&F, TM, NextFnNum++, @@ -46,8 +59,3 @@ delete MF; MF = 0; } - -void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); -} Modified: llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp?rev=100508&r1=100507&r2=100508&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp Mon Apr 5 19:51:52 2010 @@ -262,6 +262,7 @@ // Always emit some info, by default "no personality" info. Personalities.push_back(NULL); AddrLabelSymbols = 0; + TheModule = 0; } MachineModuleInfo::MachineModuleInfo() From isanbard at gmail.com Mon Apr 5 19:54:08 2010 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 5 Apr 2010 17:54:08 -0700 Subject: [llvm-commits] [llvm] r100487 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp In-Reply-To: <11C0A7DC-6AE6-4E55-8FEC-5BC6B1E438BA@apple.com> References: <20100405225921.E3BD02A6C12D@llvm.org> <11C0A7DC-6AE6-4E55-8FEC-5BC6B1E438BA@apple.com> Message-ID: <697898B9-F60D-4CED-A285-C55258950627@gmail.com> On Apr 5, 2010, at 4:31 PM, Chris Lattner wrote: > On Apr 5, 2010, at 3:59 PM, Bill Wendling wrote: > >> Author: void >> Date: Mon Apr 5 17:59:21 2010 >> New Revision: 100487 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=100487&view=rev >> Log: >> Output floating point representations in DWARF format. This is done by outputing >> the FP encoding directly as a hex representation. > > Did you verify that this works on both big and little endian systems? > Yes. -bw From sabre at nondot.org Mon Apr 5 19:55:39 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 00:55:39 -0000 Subject: [llvm-commits] [llvm] r100509 - /llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Message-ID: <20100406005539.5B49E2A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 19:55:39 2010 New Revision: 100509 URL: http://llvm.org/viewvc/llvm-project?rev=100509&view=rev Log: report errors through LLVMContext's inline asm handler if available. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=100509&r1=100508&r2=100509&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Mon Apr 5 19:55:39 2010 @@ -14,7 +14,10 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/InlineAsm.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -49,6 +52,17 @@ } SourceMgr SrcMgr; + + // If the current LLVMContext has an inline asm handler, set it in SourceMgr. + LLVMContext &LLVMCtx = MMI->getModule()->getContext(); + bool HasDiagHandler = false; + if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) { + unsigned Cookie = 0; // no cookie yet. + SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler, + LLVMCtx.getInlineAsmDiagnosticContext(), Cookie); + HasDiagHandler = true; + } + MemoryBuffer *Buffer; if (isNullTerminated) Buffer = MemoryBuffer::getMemBuffer(Str, ""); @@ -68,7 +82,7 @@ // Don't implicitly switch to the text section before the asm. int Res = Parser.Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); - if (Res) + if (Res && !HasDiagHandler) llvm_report_error("Error parsing inline asm\n"); } From sabre at nondot.org Mon Apr 5 19:58:50 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 00:58:50 -0000 Subject: [llvm-commits] [llvm] r100510 - in /llvm/trunk: include/llvm/CodeGen/AsmPrinter.h lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Message-ID: <20100406005850.C3FA32A6C12C@llvm.org> Author: lattner Date: Mon Apr 5 19:58:50 2010 New Revision: 100510 URL: http://llvm.org/viewvc/llvm-project?rev=100510&view=rev Log: propagate cookie management out one layer of function calls. Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/AsmPrinter.h?rev=100510&r1=100509&r2=100510&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h (original) +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h Mon Apr 5 19:58:50 2010 @@ -423,7 +423,7 @@ mutable unsigned SetCounter; /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. - void EmitInlineAsm(StringRef Str) const; + void EmitInlineAsm(StringRef Str, unsigned LocCookie) const; /// EmitInlineAsm - This method formats and emits the specified machine /// instruction that is an inline asm. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100510&r1=100509&r2=100510&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Mon Apr 5 19:58:50 2010 @@ -144,7 +144,7 @@ if (!M.getModuleInlineAsm().empty()) { OutStreamer.AddComment("Start of file scope inline assembly"); OutStreamer.AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()); + EmitInlineAsm(M.getModuleInlineAsm(), 0/*no loc cookie*/); OutStreamer.AddComment("End of file scope inline assembly"); OutStreamer.AddBlankLine(); } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=100510&r1=100509&r2=100510&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Mon Apr 5 19:58:50 2010 @@ -35,7 +35,7 @@ using namespace llvm; /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str) const { +void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { assert(!Str.empty() && "Can't emit empty inline asm block"); // Remember if the buffer is nul terminated or not so we can avoid a copy. @@ -57,9 +57,8 @@ LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) { - unsigned Cookie = 0; // no cookie yet. SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler, - LLVMCtx.getInlineAsmDiagnosticContext(), Cookie); + LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie); HasDiagHandler = true; } @@ -297,7 +296,7 @@ } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str()); + EmitInlineAsm(OS.str(), 0/*no loc cookie*/); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. From gohman at apple.com Mon Apr 5 20:31:12 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 06 Apr 2010 01:31:12 -0000 Subject: [llvm-commits] [llvm] r100515 - /llvm/trunk/lib/Analysis/IVUsers.cpp Message-ID: <20100406013113.58A662A6C12C@llvm.org> Author: djg Date: Mon Apr 5 20:31:12 2010 New Revision: 100515 URL: http://llvm.org/viewvc/llvm-project?rev=100515&view=rev Log: Add a const qualifier. Modified: llvm/trunk/lib/Analysis/IVUsers.cpp Modified: llvm/trunk/lib/Analysis/IVUsers.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=100515&r1=100514&r2=100515&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IVUsers.cpp (original) +++ llvm/trunk/lib/Analysis/IVUsers.cpp Mon Apr 5 20:31:12 2010 @@ -142,7 +142,7 @@ /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we /// should use the post-inc value). static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - Loop *L, DominatorTree *DT) { + const Loop *L, DominatorTree *DT) { // If the user is in the loop, use the preinc value. if (L->contains(User)) return false; From stoklund at 2pi.dk Mon Apr 5 22:12:43 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 06 Apr 2010 03:12:43 -0000 Subject: [llvm-commits] [llvm] r100517 - /llvm/trunk/include/llvm/Support/MathExtras.h Message-ID: <20100406031243.84DD22A6C12C@llvm.org> Author: stoklund Date: Mon Apr 5 22:12:43 2010 New Revision: 100517 URL: http://llvm.org/viewvc/llvm-project?rev=100517&view=rev Log: Avoid overflowing a signed integer which triggers undefined behaviour. Overflowing an unsigned integer is fine and behaves as you would expect. Also fix a pasto, allowing SignExtend64 to take a 64-bit argument. Modified: llvm/trunk/include/llvm/Support/MathExtras.h Modified: llvm/trunk/include/llvm/Support/MathExtras.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/MathExtras.h?rev=100517&r1=100516&r2=100517&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/MathExtras.h (original) +++ llvm/trunk/include/llvm/Support/MathExtras.h Mon Apr 5 22:12:43 2010 @@ -459,14 +459,14 @@ /// SignExtend32 - Sign extend B-bit number x to 32-bit int. /// Usage int32_t r = SignExtend32<5>(x); -template inline int32_t SignExtend32(int32_t x) { - return (x << (32 - B)) >> (32 - B); +template inline int32_t SignExtend32(uint32_t x) { + return int32_t(x << (32 - B)) >> (32 - B); } /// SignExtend64 - Sign extend B-bit number x to 64-bit int. /// Usage int64_t r = SignExtend64<5>(x); -template inline int64_t SignExtend64(int32_t x) { - return (x << (64 - B)) >> (64 - B); +template inline int64_t SignExtend64(uint64_t x) { + return int64_t(x << (64 - B)) >> (64 - B); } } // End llvm namespace From resistor at mac.com Mon Apr 5 23:20:48 2010 From: resistor at mac.com (Owen Anderson) Date: Tue, 06 Apr 2010 04:20:48 -0000 Subject: [llvm-commits] [llvm] r100519 - /llvm/trunk/lib/VMCore/Pass.cpp Message-ID: <20100406042048.601B82A6C12C@llvm.org> Author: resistor Date: Mon Apr 5 23:20:48 2010 New Revision: 100519 URL: http://llvm.org/viewvc/llvm-project?rev=100519&view=rev Log: Use a ManagedCleanup to prevent leaking the PassRegistrar map. In breaks the use case where someone wants to resurrect LLVM after calling llvm_shutdown, but I'm not aware of any clients that are affected by this. Modified: llvm/trunk/lib/VMCore/Pass.cpp Modified: llvm/trunk/lib/VMCore/Pass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Pass.cpp?rev=100519&r1=100518&r2=100519&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Pass.cpp (original) +++ llvm/trunk/lib/VMCore/Pass.cpp Mon Apr 5 23:20:48 2010 @@ -294,13 +294,8 @@ static std::vector *Listeners = 0; static sys::SmartMutex ListenersLock; -// FIXME: This should use ManagedStatic to manage the pass registrar. -// Unfortunately, we can't do this, because passes are registered with static -// ctors, and having llvm_shutdown clear this map prevents successful -// ressurection after llvm_shutdown is run. +static PassRegistrar *PassRegistrarObj = 0; static PassRegistrar *getPassRegistrar() { - static PassRegistrar *PassRegistrarObj = 0; - // Use double-checked locking to safely initialize the registrar when // we're running in multithreaded mode. PassRegistrar* tmp = PassRegistrarObj; @@ -323,6 +318,19 @@ return PassRegistrarObj; } +// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown. +// Unfortunately, passes are registered with static ctors, and having +// llvm_shutdown clear this map prevents successful ressurection after +// llvm_shutdown is run. Ideally we should find a solution so that we don't +// leak the map, AND can still resurrect after shutdown. +void cleanupPassRegistrar(void*) { + if (PassRegistrarObj) { + delete PassRegistrarObj; + PassRegistrarObj = 0; + } +} +ManagedCleanup<&cleanupPassRegistrar> registrarCleanup; + // getPassInfo - Return the PassInfo data structure that corresponds to this // pass... const PassInfo *Pass::getPassInfo() const { From clattner at apple.com Mon Apr 5 23:30:15 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Apr 2010 21:30:15 -0700 Subject: [llvm-commits] [llvm] r100487 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp In-Reply-To: <697898B9-F60D-4CED-A285-C55258950627@gmail.com> References: <20100405225921.E3BD02A6C12D@llvm.org> <11C0A7DC-6AE6-4E55-8FEC-5BC6B1E438BA@apple.com> <697898B9-F60D-4CED-A285-C55258950627@gmail.com> Message-ID: <8C3A4F7B-5E56-42FD-82E3-BB5E09D943CC@apple.com> great, thanks! On Apr 5, 2010, at 5:54 PM, Bill Wendling wrote: > On Apr 5, 2010, at 4:31 PM, Chris Lattner wrote: >> On Apr 5, 2010, at 3:59 PM, Bill Wendling wrote: >> >>> Author: void >>> Date: Mon Apr 5 17:59:21 2010 >>> New Revision: 100487 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=100487&view=rev >>> Log: >>> Output floating point representations in DWARF format. This is done by outputing >>> the FP encoding directly as a hex representation. >> >> Did you verify that this works on both big and little endian systems? >> > Yes. > > -bw > > From baldrick at free.fr Tue Apr 6 02:13:43 2010 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Apr 2010 09:13:43 +0200 Subject: [llvm-commits] [llvm] r100459 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp In-Reply-To: <20100405202408.56E612A6C12C@llvm.org> References: <20100405202408.56E612A6C12C@llvm.org> Message-ID: <4BBADF27.3040608@free.fr> Hi Dan, > + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc > + // memcpy is not guaranteed to be safe. libc memcpys aren't required to > + // respect volatile, so they may do things like read or write memory > + // beyond the given memory regions. But fixing this isn't easy, and most > + // people don't care. are volatile loads/stores really not allowed to read or write beyond the given memory regions? I don't know about C, but Ada has both "atomic" and "volatile", where "atomic" means that exactly the given piece of memory is read/written, and it must be done in a single (atomic) processor operation. As far as I know "volatile" is the same as in C, and doesn't imply either of these things. That said, from a quality of implementation point of view it would be good if volatile loads/stores in LLVM turn into atomic processor operations whenever possible (I did some codegen work to ensure this in a bunch of cases a year or two ago). Also, some memcpy implementations do stuff like: zero out the destination before doing the copy (this speeds things up on some machines due to cache effects). That means that the "volatile" copy results in multiple stores to each of your I/O registers, once for the zero and once for the actual copy. Ciao, Duncan. From baldrick at free.fr Tue Apr 6 02:21:34 2010 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Apr 2010 09:21:34 +0200 Subject: [llvm-commits] [llvm] r100492 - in /llvm/trunk: include/llvm/MC/MCParser/AsmParser.h lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp lib/MC/MCParser/AsmParser.cpp In-Reply-To: <20100405231542.C602B2A6C12D@llvm.org> References: <20100405231542.C602B2A6C12D@llvm.org> Message-ID: <4BBAE0FE.9090304@free.fr> > Give AsmParser an option to control whether it finalizes > the stream. New demo: > > $ clang asm.c -S -o - -emit-llvm | llc -filetype=obj -o t.o > $ otool -tv t.o > t.o: > (__TEXT,__text) section > _foo: > 0000000000000000 subq $0x08,%rsp > 0000000000000004 movl %edi,(%rsp) > 0000000000000007 movl %edi,%eax > 0000000000000009 incl %eax > 000000000000000b movl %eax,(%rsp) > 000000000000000e movl %eax,0x04(%rsp) > 0000000000000012 addq $0x08,%rsp > 0000000000000016 ret Nice! Duncan. From wangmp at apple.com Tue Apr 6 03:27:51 2010 From: wangmp at apple.com (Mon P Wang) Date: Tue, 06 Apr 2010 08:27:51 -0000 Subject: [llvm-commits] [llvm] r100521 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <20100406082751.E615D2A6C12C@llvm.org> Author: wangmp Date: Tue Apr 6 03:27:51 2010 New Revision: 100521 URL: http://llvm.org/viewvc/llvm-project?rev=100521&view=rev Log: Remove assert to treat memmove and memset like memcpy Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100521&r1=100520&r2=100521&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Apr 6 03:27:51 2010 @@ -3571,8 +3571,10 @@ if (Result.getNode()) return Result; + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may + // not be safe. See memcpy above for more details. + // Emit a library call. - assert(!isVol && "library memmove does not support volatile"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); @@ -3620,8 +3622,7 @@ if (Result.getNode()) return Result; - // Emit a library call. - assert(!isVol && "library memset does not support volatile"); + // Emit a library call. const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; From criswell at uiuc.edu Tue Apr 6 09:52:15 2010 From: criswell at uiuc.edu (John Criswell) Date: Tue, 06 Apr 2010 14:52:15 -0000 Subject: [llvm-commits] [llvm] r100525 - /llvm/trunk/docs/ReleaseNotes.html Message-ID: <20100406145215.2BBD92A6C12C@llvm.org> Author: criswell Date: Tue Apr 6 09:52:14 2010 New Revision: 100525 URL: http://llvm.org/viewvc/llvm-project?rev=100525&view=rev Log: Adding SAFECode to the list of projects that work with LLVM 2.7. Modified: llvm/trunk/docs/ReleaseNotes.html Modified: llvm/trunk/docs/ReleaseNotes.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/ReleaseNotes.html?rev=100525&r1=100524&r2=100525&view=diff ============================================================================== --- llvm/trunk/docs/ReleaseNotes.html (original) +++ llvm/trunk/docs/ReleaseNotes.html Tue Apr 6 09:52:14 2010 @@ -456,6 +456,22 @@ + + + +
+

+SAFECode is a memory safe C +compiler built using LLVM. It takes standard, unannotated C code, analyzes the +code to ensure that memory accesses and array indexing operations are safe, and +instruments the code with run-time checks when safety cannot be proven +statically. +

+
+ +
What's New in LLVM 2.7? From stuart at apple.com Tue Apr 6 12:19:32 2010 From: stuart at apple.com (Stuart Hastings) Date: Tue, 06 Apr 2010 17:19:32 -0000 Subject: [llvm-commits] [llvm] r100530 - in /llvm/trunk: lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h test/FrontendC++/2010-02-17-DbgArtificialArg.cpp Message-ID: <20100406171932.C1F522A6C12C@llvm.org> Author: stuart Date: Tue Apr 6 12:19:32 2010 New Revision: 100530 URL: http://llvm.org/viewvc/llvm-project?rev=100530&view=rev Log: Revise debug info machinery to digest nested functions and classes. A certain GDB testsuite case (local.cc) has a function nested inside a class nested inside another function. GCC presents the innermost function to llvm-convert first. Heretofore, the debug info mistakenly placed the inner function at module scope. This patch walks the GCC context links and instantiates the outer class and function so the debug info is properly nested. Radar 7426545. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100530&r1=100529&r2=100530&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Apr 6 12:19:32 2010 @@ -800,12 +800,26 @@ } else if (Context.isNameSpace()) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context.getNode())); ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context.getNode()), + /*MakeDecl=*/false); + ContextDIE->addChild(Die); } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode())) ContextDIE->addChild(Die); else ModuleCU->addDie(Die); } +/// isFunctionContext - True if given Context is nested within a function. +bool DwarfDebug::isFunctionContext(DIE *context) { + if (context == (DIE *)0) + return false; + if (context->getTag() == dwarf::DW_TAG_subprogram) + return true; + else + return isFunctionContext(context->getParent()); +} + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { @@ -987,6 +1001,10 @@ if (DIDescriptor(ContainingType.getNode()).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType.getNode()))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } break; } default: @@ -1802,19 +1820,15 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { DISubprogram SP(N); - // Check for pre-existence. - if (ModuleCU->getDIE(N)) - return; - if (!SP.isDefinition()) // This is a method declaration which will be handled while constructing // class type. return; - DIE *SubprogramDie = createSubprogramDIE(SP); - - // Add to map. - ModuleCU->insertDIE(N, SubprogramDie); + // Check for pre-existence. + DIE *SubprogramDie = ModuleCU->getDIE(N); + if (!SubprogramDie) + SubprogramDie = createSubprogramDIE(SP); // Add to context owner. addToContextOwner(SubprogramDie, SP.getContext()); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100530&r1=100529&r2=100530&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Tue Apr 6 12:19:32 2010 @@ -332,6 +332,9 @@ /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); + /// isFunctionContext - True if given Context is nested within a function. + bool isFunctionContext(DIE *context); + /// addType - Add a new type attribute to the specified entity. void addType(DIE *Entity, DIType Ty); Modified: llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC%2B%2B/2010-02-17-DbgArtificialArg.cpp?rev=100530&r1=100529&r2=100530&view=diff ============================================================================== --- llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp (original) +++ llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp Tue Apr 6 12:19:32 2010 @@ -1,4 +1,4 @@ -// RUN: %llvmgcc -g -S %s -o - | grep DW_TAG_pointer_type | grep "i32 524303, metadata .., metadata ..., metadata .., i32 ., i64 .., i64 .., i64 0, i32 64, metadata ..." +// RUN: %llvmgcc -g -S %s -o - | FileCheck %s // Here, second to last argument "i32 64" indicates that artificial type is set. // Test to artificial attribute attahed to "this" pointer type. // Radar 7655792 and 7655002 @@ -10,5 +10,7 @@ int foo() { A a; + // Matching "i32 64, metadata !} ; [ DW_TAG_pointer_type ]" + // CHECK: i32 64, metadata {{![0-9]+\} ; \[ DW_TAG_pointer_type \]}} return a.fn1(1); } From stuart at apple.com Tue Apr 6 12:19:48 2010 From: stuart at apple.com (Stuart Hastings) Date: Tue, 06 Apr 2010 17:19:48 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r100531 - in /llvm-gcc-4.2/trunk/gcc: llvm-backend.cpp llvm-convert.cpp llvm-debug.cpp llvm-debug.h llvm-internal.h Message-ID: <20100406171948.3F1562A6C12C@llvm.org> Author: stuart Date: Tue Apr 6 12:19:47 2010 New Revision: 100531 URL: http://llvm.org/viewvc/llvm-project?rev=100531&view=rev Log: Revise debug info machinery to digest nested functions and classes. A certain GDB testsuite case (local.cc) has a function nested inside a class nested inside another function. GCC presents the innermost function to llvm-convert first. Heretofore, the debug info mistakenly placed the inner function at module scope. This patch walks the GCC context links and instantiates the outer class and function so the debug info is properly nested. Radar 7426545. Modified: llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.h llvm-gcc-4.2/trunk/gcc/llvm-internal.h Modified: llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp?rev=100531&r1=100530&r2=100531&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp Tue Apr 6 12:19:47 2010 @@ -512,6 +512,8 @@ if (!flag_pch_file && debug_info_level > DINFO_LEVEL_NONE) TheDebugInfo = new DebugInfo(TheModule); + else + TheDebugInfo = 0; } /// performLateBackendInitialization - Set backend options that may only be @@ -533,8 +535,6 @@ } void llvm_lang_dependent_init(const char *Name) { - if (TheDebugInfo) - TheDebugInfo->Initialize(); if (Name) TheModule->setModuleIdentifier(Name); } @@ -1010,7 +1010,7 @@ // Convert the AST to raw/ugly LLVM code. Function *Fn; { - TreeToLLVM Emitter(fndecl); + TreeToLLVM *Emitter = getTreeToLLVM(fndecl); enum symbol_visibility vis = DECL_VISIBILITY (fndecl); if (vis != VISIBILITY_DEFAULT) @@ -1018,7 +1018,7 @@ // visibility that's not supported by the target. targetm.asm_out.visibility(fndecl, vis); - Fn = Emitter.EmitFunction(); + Fn = Emitter->EmitFunction(); } #if 0 @@ -1317,6 +1317,10 @@ timevar_push(TV_LLVM_GLOBALS); + // Insure debug info machinery initialized, even if current module + // lacks functions. + getTreeToLLVM(decl); + // Get or create the global variable now. GlobalVariable *GV = cast(DECL_LLVM(decl)); Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=100531&r1=100530&r2=100531&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Tue Apr 6 12:19:47 2010 @@ -148,7 +148,6 @@ //===----------------------------------------------------------------------===// /// TheTreeToLLVM - Keep track of the current function being compiled. -static TreeToLLVM *TheTreeToLLVM = 0; const TargetData &getTargetData() { return *TheTarget->getTargetData(); @@ -157,20 +156,22 @@ /// EmitDebugInfo - Return true if debug info is to be emitted for current /// function. bool TreeToLLVM::EmitDebugInfo() { - if (TheDebugInfo && !DECL_IGNORED_P(getFUNCTION_DECL())) + if (TheDebugInfo && + (!getFUNCTION_DECL() || !DECL_IGNORED_P(getFUNCTION_DECL()))) return true; return false; } -TreeToLLVM::TreeToLLVM(tree fndecl) : - TD(getTargetData()), Builder(Context, *TheFolder) { - FnDecl = fndecl; +TreeToLLVM::TreeToLLVM(tree decl) : + TD(getTargetData()), Builder(Context, *TheFolder) { + // If this isn't a FUNCITON_DECL, use only the source loc info from it. + FnDecl = (decl && TREE_CODE(decl) == FUNCTION_DECL) ? decl : NULL_TREE; Fn = 0; ReturnBB = UnwindBB = 0; ReturnOffset = 0; if (EmitDebugInfo()) { - expanded_location Location = expand_location(DECL_SOURCE_LOCATION (fndecl)); + expanded_location Location = expand_location(DECL_SOURCE_LOCATION (decl)); if (Location.file) { TheDebugInfo->setLocationFile(Location.file); @@ -179,6 +180,7 @@ TheDebugInfo->setLocationFile(""); TheDebugInfo->setLocationLine(0); } + TheDebugInfo->Initialize(); } AllocaInsertionPoint = 0; @@ -188,13 +190,25 @@ FuncEHException = 0; FuncEHSelector = 0; FuncEHGetTypeID = 0; - - assert(TheTreeToLLVM == 0 && "Reentering function creation?"); - TheTreeToLLVM = this; } -TreeToLLVM::~TreeToLLVM() { - TheTreeToLLVM = 0; + +TreeToLLVM::~TreeToLLVM() {} + +TreeToLLVM *getTreeToLLVM(tree decl) { + // FIXME: should this static move into the TreeToLLVM class decl? + static std::map FunctionMap; + TreeToLLVM *newTreeToLLVM = FunctionMap[decl]; + if (!newTreeToLLVM) { + tree fndecl = (decl && TREE_CODE(decl) == FUNCTION_DECL) ? decl : NULL_TREE; + newTreeToLLVM = FunctionMap[fndecl] = new TreeToLLVM(decl); + } + return newTreeToLLVM; +} + +TreeToLLVM *getCurrentTreeToLLVM(void) { + assert(current_function_decl && "no current_function_decl?"); + return getTreeToLLVM(current_function_decl); } /// getLabelDeclBlock - Lazily get and create a basic block for the specified @@ -308,7 +322,8 @@ assert(TREE_CODE(TREE_TYPE(ResultDecl)) == REFERENCE_TYPE && "Not type match and not passing by reference?"); // Create an alloca for the ResultDecl. - Value *Tmp = TheTreeToLLVM->CreateTemporary(AI->getType()); + TreeToLLVM *Emitter = getCurrentTreeToLLVM(); + Value *Tmp = Emitter->CreateTemporary(AI->getType()); Builder.CreateStore(AI, Tmp); SET_DECL_LLVM(ResultDecl, Tmp); @@ -451,7 +466,7 @@ } } -void TreeToLLVM::StartFunctionBody() { +Function *TreeToLLVM::StartFunctionBody() { const char *Name = ""; // Get the name of the function. if (tree ID = DECL_ASSEMBLER_NAME(FnDecl)) @@ -610,10 +625,10 @@ // Set the BLOCK_NUMBER()s to the depth of each lexical block. setLexicalBlockDepths(FnDecl, block_declared_vars, 1); - SeenBlocks.clear(); - - if (EmitDebugInfo()) - TheDebugInfo->EmitFunctionStart(FnDecl, Fn, Builder.GetInsertBlock()); + if (TheDebugInfo) { + TheDebugInfo->EmitFunctionStart(FnDecl); + Builder.GetInsertBlock(); + } // Loop over all of the arguments to the function, setting Argument names and // creating argument alloca's for the PARM_DECLs in case their address is @@ -628,7 +643,7 @@ ABIConverter.HandleReturnType(TREE_TYPE(TREE_TYPE(FnDecl)), FnDecl, DECL_BUILT_IN(FnDecl)); // Remember this for use by FinishFunctionBody. - TheTreeToLLVM->ReturnOffset = Client.Offset; + ReturnOffset = Client.Offset; // Prepend the static chain (if any) to the list of arguments. tree Args = static_chain ? static_chain : DECL_ARGUMENTS(FnDecl); @@ -709,12 +724,7 @@ block_declared_vars.count(TREE_VALUE(t)) == 0) EmitAutomaticVariableDecl(TREE_VALUE(t)); } - - // Push the outermost lexical block onto the RegionStack. - switchLexicalBlock(DECL_INITIAL(FnDecl)); - - // Create a new block for the return node, but don't insert it yet. - ReturnBB = BasicBlock::Create(Context, "return"); + return Fn; } Function *TreeToLLVM::FinishFunctionBody() { @@ -802,9 +812,19 @@ } Function *TreeToLLVM::EmitFunction() { - // Set up parameters and prepare for return, for the function. + // Set up parameters for the function. StartFunctionBody(); + // We'll remember the lexical BLOCKs we've seen here. + SeenBlocks.clear(); + + // FIXME: Should these two statements move to StartFunctionBody() ? + // Push the outermost lexical block onto the RegionStack. + switchLexicalBlock(DECL_INITIAL(FnDecl)); + + // Create a new block for the return node, but don't insert it yet. + ReturnBB = BasicBlock::Create(Context, "return"); + // Emit the body of the function iterating over all BBs basic_block bb; edge e; @@ -2616,7 +2636,7 @@ if (!Loc) { // A value. Store to a temporary, and return the temporary's address. // Any future access to this argument will reuse the same address. - Loc = TheTreeToLLVM->CreateTemporary(TheValue->getType()); + Loc = getCurrentTreeToLLVM()->CreateTemporary(TheValue->getType()); Builder.CreateStore(TheValue, Loc); } return Loc; @@ -2656,7 +2676,7 @@ assert(ConvertType(type) == cast(RetBuf.Ptr->getType())->getElementType() && "Inconsistent result types!"); - TheTreeToLLVM->EmitAggregateCopy(*DestLoc, RetBuf, type); + getCurrentTreeToLLVM()->EmitAggregateCopy(*DestLoc, RetBuf, type); return 0; } else { // Read out the scalar return value now. @@ -2699,7 +2719,7 @@ if (DestLoc == 0) { // The result is unused, but still needs to be stored somewhere. - Value *Buf = TheTreeToLLVM->CreateTemporary(PtrArgTy->getElementType()); + Value *Buf = getCurrentTreeToLLVM()->CreateTemporary(PtrArgTy->getElementType()); CallOperands.push_back(Buf); } else if (useReturnSlot) { // Letting the call write directly to the final destination is safe and @@ -2709,7 +2729,7 @@ // Letting the call write directly to the final destination may not be // safe (eg: if DestLoc aliases a parameter) and is not required - pass // a buffer and copy it to DestLoc after the call. - RetBuf = TheTreeToLLVM->CreateTempLoc(PtrArgTy->getElementType()); + RetBuf = getCurrentTreeToLLVM()->CreateTempLoc(PtrArgTy->getElementType()); CallOperands.push_back(RetBuf.Ptr); } @@ -2730,7 +2750,7 @@ "Call returns a scalar but caller expects aggregate!"); // Create a buffer to hold the result. The result will be loaded out of // it after the call. - RetBuf = TheTreeToLLVM->CreateTempLoc(PtrArgTy->getElementType()); + RetBuf = getCurrentTreeToLLVM()->CreateTempLoc(PtrArgTy->getElementType()); CallOperands.push_back(RetBuf.Ptr); // Note the use of a shadow argument. @@ -2754,7 +2774,7 @@ if (Loc->getType() != CalledTy) { assert(type && "Inconsistent parameter types?"); bool isSigned = !TYPE_UNSIGNED(type); - Loc = TheTreeToLLVM->CastToAnyType(Loc, isSigned, CalledTy, false); + Loc = getCurrentTreeToLLVM()->CastToAnyType(Loc, isSigned, CalledTy, false); } } @@ -8457,18 +8477,18 @@ /// EmitLV_LABEL_DECL - Someone took the address of a label. Constant *TreeConstantToLLVM::EmitLV_LABEL_DECL(tree exp) { - assert(TheTreeToLLVM && + assert(getCurrentTreeToLLVM() && "taking the address of a label while not compiling the function!"); // Figure out which function this is for, verify it's the one we're compiling. if (DECL_CONTEXT(exp)) { assert(TREE_CODE(DECL_CONTEXT(exp)) == FUNCTION_DECL && "Address of label in nested function?"); - assert(TheTreeToLLVM->getFUNCTION_DECL() == DECL_CONTEXT(exp) && + assert(getCurrentTreeToLLVM()->getFUNCTION_DECL() == DECL_CONTEXT(exp) && "Taking the address of a label that isn't in the current fn!?"); } - return TheTreeToLLVM->EmitLV_LABEL_DECL(exp); + return getCurrentTreeToLLVM()->EmitLV_LABEL_DECL(exp); } Constant *TreeConstantToLLVM::EmitLV_COMPLEX_CST(tree exp) { Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=100531&r1=100530&r2=100531&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Tue Apr 6 12:19:47 2010 @@ -289,12 +289,10 @@ setCurrentLexicalBlock(desired); } -/// EmitFunctionStart - Constructs the debug code for entering a function - -/// "llvm.dbg.func.start." -void DebugInfo::EmitFunctionStart(tree FnDecl, Function *Fn, - BasicBlock *CurBB) { - setCurrentLexicalBlock(FnDecl); +/// CreateSubprogramFromFnDecl - Constructs the debug code for +/// entering a function - "llvm.dbg.func.start." +DISubprogram DebugInfo::CreateSubprogramFromFnDecl(tree FnDecl) { DIType FNType = getOrCreateType(TREE_TYPE(FnDecl)); std::map::iterator I = SPCache.find(FnDecl); @@ -302,12 +300,9 @@ DISubprogram SPDecl(cast(I->second)); DISubprogram SP = DebugFactory.CreateSubprogramDefinition(SPDecl); - SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); - - // Push function on region stack. - RegionStack.push_back(WeakVH(SP.getNode())); - RegionMap[FnDecl] = WeakVH(SP.getNode()); - return; + if (SP.getNode() != SPDecl.getNode()) + SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); + return SP; } bool ArtificialFnWithAbstractOrigin = false; @@ -329,12 +324,13 @@ DISubprogram SPDecl(cast(I->second)); DISubprogram SP = DebugFactory.CreateSubprogramDefinition(SPDecl); - SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); + if (SP.getNode() != SPDecl.getNode()) + SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); // Push function on region stack. RegionStack.push_back(WeakVH(SP.getNode())); RegionMap[FnDecl] = WeakVH(SP.getNode()); - return; + return SP; } // Gather location information. @@ -356,23 +352,36 @@ } StringRef FnName = getFunctionName(FnDecl); - + // If the Function * hasn't been created yet, use a bogus value for + // the debug internal linkage bit. + bool hasInternalLinkage = true; + if (GET_DECL_LLVM_INDEX(FnDecl)) { + Function *Fn = castDECL_LLVM(FnDecl); + hasInternalLinkage = Fn->hasInternalLinkage(); + } DISubprogram SP = DebugFactory.CreateSubprogram(SPContext, FnName, FnName, LinkageName, getOrCreateFile(Loc.file), lineno, FNType, - Fn->hasInternalLinkage(), + hasInternalLinkage, true /*definition*/, Virtuality, VIndex, ContainingType); SPCache[FnDecl] = WeakVH(SP.getNode()); + RegionMap[FnDecl] = WeakVH(SP.getNode()); + return SP; +} +/// EmitFunctionStart - Constructs the debug code for entering a function - +/// "llvm.dbg.func.start", and pushes it onto the RegionStack. +void DebugInfo::EmitFunctionStart(tree FnDecl) { + setCurrentLexicalBlock(FnDecl); + DISubprogram SP = CreateSubprogramFromFnDecl(FnDecl); // Push function on region stack. RegionStack.push_back(WeakVH(SP.getNode())); - RegionMap[FnDecl] = WeakVH(SP.getNode()); } /// getOrCreateNameSpace - Get name space descriptor for the tree node. @@ -405,12 +414,20 @@ DIType Ty = getOrCreateType(Node); return DIDescriptor(Ty.getNode()); } else if (DECL_P (Node)) { - if (TREE_CODE (Node) == NAMESPACE_DECL) { + switch (TREE_CODE(Node)) { + default: + /// What kind of DECL is this? + return findRegion (DECL_CONTEXT (Node)); + case NAMESPACE_DECL: { DIDescriptor NSContext = findRegion(DECL_CONTEXT(Node)); DINameSpace NS = getOrCreateNameSpace(Node, NSContext); return DIDescriptor(NS.getNode()); } - return findRegion (DECL_CONTEXT (Node)); + case FUNCTION_DECL: { + DISubprogram SP = CreateSubprogramFromFnDecl(Node); + return SP; + } + } } else if (TREE_CODE(Node) == BLOCK) { // TREE_BLOCK is GCC's lexical block. // Recursively create all necessary contexts: @@ -623,7 +640,7 @@ sprintf(FwdTypeName, "fwd.type.%d", FwdTypeCount++); llvm::DIType FwdType = DebugFactory.CreateCompositeType(llvm::dwarf::DW_TAG_subroutine_type, - getOrCreateFile(main_input_filename), + findRegion(TYPE_CONTEXT(type)), FwdTypeName, getOrCreateFile(main_input_filename), 0, 0, 0, 0, 0, @@ -709,9 +726,10 @@ return Ty; } + tree type_with_context = TYPE_CONTEXT(type) ? type : TREE_TYPE(type); StringRef PName = FromTy.getName(); DIType PTy = - DebugFactory.CreateDerivedType(Tag, findRegion(TYPE_CONTEXT(type)), + DebugFactory.CreateDerivedType(Tag, findRegion(type_with_context), Tag == DW_TAG_pointer_type ? StringRef() : PName, getOrCreateFile(main_input_filename), Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.h?rev=100531&r1=100530&r2=100531&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.h Tue Apr 6 12:19:47 2010 @@ -118,9 +118,13 @@ // by GCC's cfglayout.c:change_scope(). void change_regions(tree_node *desired, tree_node *grand); - /// EmitFunctionStart - Constructs the debug code for entering a function - + /// CreateSubprogramFromFnDecl - Constructs the debug code for entering a function - /// "llvm.dbg.func.start." - void EmitFunctionStart(tree_node *FnDecl, Function *Fn, BasicBlock *CurBB); + DISubprogram CreateSubprogramFromFnDecl(tree_node *FnDecl); + + /// EmitFunctionStart - Constructs the debug code for entering a function - + /// "llvm.dbg.func.start", and pushes it onto the RegionStack. + void EmitFunctionStart(tree_node *FnDecl); /// EmitFunctionEnd - Constructs the debug code for exiting a declarative /// region - "llvm.dbg.region.end." Modified: llvm-gcc-4.2/trunk/gcc/llvm-internal.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-internal.h?rev=100531&r1=100530&r2=100531&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-internal.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-internal.h Tue Apr 6 12:19:47 2010 @@ -75,7 +75,7 @@ extern llvm::Module *TheModule; /// TheDebugInfo - This object is responsible for gather all debug information. -/// If it's value is NULL then no debug information should be gathered. +/// If its value is NULL then no debug information should be gathered. extern llvm::DebugInfo *TheDebugInfo; /// TheTarget - The current target being compiled for. @@ -281,6 +281,7 @@ BasicBlock *ReturnBB; BasicBlock *UnwindBB; unsigned ReturnOffset; + // Lexical BLOCKS that we have previously seen and processed. treeset SeenBlocks; @@ -397,6 +398,10 @@ // allocation would change with -g, and users dislike that. void switchLexicalBlock(tree_node *exp); + /// StartFunctionBody - Start the emission of 'FnDecl', outputing all + /// declarations for parameters and setting things up. + Function *StartFunctionBody(); + private: // Helper functions. // Walk over the lexical BLOCK() tree of the given FUNCTION_DECL; @@ -405,10 +410,6 @@ // the given set. void setLexicalBlockDepths(tree_node *t, treeset &s, unsigned level); - /// StartFunctionBody - Start the emission of 'fndecl', outputing all - /// declarations for parameters and setting things up. - void StartFunctionBody(); - /// FinishFunctionBody - Once the body of the function has been emitted, this /// cleans up and returns the result function. Function *FinishFunctionBody(); @@ -608,6 +609,9 @@ Constant *EmitLV_LABEL_DECL(tree_node *exp); }; +/// Locate a previously exiting TreeToLLVM. Construct one if necessary. +TreeToLLVM *getTreeToLLVM(tree_node *fndecl); + /// TreeConstantToLLVM - An instance of this class is created and used to /// convert tree constant values to LLVM. This is primarily for things like /// global variable initializers. From evan.cheng at apple.com Tue Apr 6 12:19:55 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 06 Apr 2010 17:19:55 -0000 Subject: [llvm-commits] [llvm] r100532 - in /llvm/trunk/lib/CodeGen: VirtRegRewriter.cpp VirtRegRewriter.h Message-ID: <20100406171955.E01E12A6C12C@llvm.org> Author: evancheng Date: Tue Apr 6 12:19:55 2010 New Revision: 100532 URL: http://llvm.org/viewvc/llvm-project?rev=100532&view=rev Log: Code clean up. Move includes from VirtRegRewriter.h to VirtRegRewriter.cpp. Modified: llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp llvm/trunk/lib/CodeGen/VirtRegRewriter.h Modified: llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp?rev=100532&r1=100531&r2=100532&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp (original) +++ llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp Tue Apr 6 12:19:55 2010 @@ -9,7 +9,9 @@ #define DEBUG_TYPE "virtregrewriter" #include "VirtRegRewriter.h" +#include "VirtRegMap.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" Modified: llvm/trunk/lib/CodeGen/VirtRegRewriter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/VirtRegRewriter.h?rev=100532&r1=100531&r2=100532&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/VirtRegRewriter.h (original) +++ llvm/trunk/lib/CodeGen/VirtRegRewriter.h Tue Apr 6 12:19:55 2010 @@ -10,11 +10,10 @@ #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H #define LLVM_CODEGEN_VIRTREGREWRITER_H -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "VirtRegMap.h" - namespace llvm { + class LiveIntervals; + class MachineFunction; + class VirtRegMap; /// VirtRegRewriter interface: Implementations of this interface assign /// spilled virtual registers to stack slots, rewriting the code. From sabre at nondot.org Tue Apr 6 13:06:18 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 18:06:18 -0000 Subject: [llvm-commits] [llvm] r100538 - in /llvm/trunk: include/llvm/Support/IRReader.h include/llvm/Support/SourceMgr.h lib/AsmParser/Parser.cpp lib/Support/SourceMgr.cpp Message-ID: <20100406180618.59BEE2A6C12C@llvm.org> Author: lattner Date: Tue Apr 6 13:06:18 2010 New Revision: 100538 URL: http://llvm.org/viewvc/llvm-project?rev=100538&view=rev Log: enhance SMDiagnostic to also maintain a pointer to the SourceMgr. Add a simplified constructor for clients that don't have locations like "file not found" errors. Modified: llvm/trunk/include/llvm/Support/IRReader.h llvm/trunk/include/llvm/Support/SourceMgr.h llvm/trunk/lib/AsmParser/Parser.cpp llvm/trunk/lib/Support/SourceMgr.cpp Modified: llvm/trunk/include/llvm/Support/IRReader.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/IRReader.h?rev=100538&r1=100537&r2=100538&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/IRReader.h (original) +++ llvm/trunk/include/llvm/Support/IRReader.h Tue Apr 6 13:06:18 2010 @@ -38,8 +38,7 @@ std::string ErrMsg; Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg); if (M == 0) { - Err = SMDiagnostic(SMLoc(), Buffer->getBufferIdentifier(), -1, -1, - ErrMsg, ""); + Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg); // ParseBitcodeFile does not take ownership of the Buffer in the // case of an error. delete Buffer; @@ -60,8 +59,8 @@ std::string ErrMsg; MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); if (F == 0) { - Err = SMDiagnostic(SMLoc(), Filename, -1, -1, - "Could not open input file '" + Filename + "'", ""); + Err = SMDiagnostic(Filename, + "Could not open input file '" + Filename + "'"); return 0; } @@ -82,8 +81,7 @@ // ParseBitcodeFile does not take ownership of the Buffer. delete Buffer; if (M == 0) - Err = SMDiagnostic(SMLoc(), Buffer->getBufferIdentifier(), - -1, -1, ErrMsg, ""); + Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg); return M; } @@ -99,8 +97,8 @@ std::string ErrMsg; MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); if (F == 0) { - Err = SMDiagnostic(SMLoc(), Filename, -1, -1, - "Could not open input file '" + Filename + "'", ""); + Err = SMDiagnostic(Filename, + "Could not open input file '" + Filename + "'"); return 0; } Modified: llvm/trunk/include/llvm/Support/SourceMgr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SourceMgr.h?rev=100538&r1=100537&r2=100538&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/SourceMgr.h (original) +++ llvm/trunk/include/llvm/Support/SourceMgr.h Tue Apr 6 13:06:18 2010 @@ -148,6 +148,7 @@ /// SMDiagnostic - Instances of this class encapsulate one diagnostic report, /// allowing printing to a raw_ostream as a caret diagnostic. class SMDiagnostic { + const SourceMgr *SM; SMLoc Loc; std::string Filename; int LineNo, ColumnNo; @@ -155,13 +156,23 @@ unsigned ShowLine : 1; public: - SMDiagnostic() : LineNo(0), ColumnNo(0), ShowLine(0) {} - SMDiagnostic(SMLoc L, const std::string &FN, int Line, int Col, + // Null diagnostic. + SMDiagnostic() : SM(0), LineNo(0), ColumnNo(0), ShowLine(0) {} + // Diagnostic with no location (e.g. file not found, command line arg error). + SMDiagnostic(const std::string &filename, const std::string &Msg, + bool showline = true) + : SM(0), Loc(), Filename(filename), LineNo(-1), ColumnNo(-1), + Message(Msg), LineContents(""), ShowLine(showline) {} + + // Diagnostic with a location. + SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, + int Line, int Col, const std::string &Msg, const std::string &LineStr, bool showline = true) - : Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg), + : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg), LineContents(LineStr), ShowLine(showline) {} + const SourceMgr *getSourceMgr() const { return SM; } SMLoc getLoc() const { return Loc; } const std::string getFilename() { return Filename; } int getLineNo() const { return LineNo; } Modified: llvm/trunk/lib/AsmParser/Parser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/Parser.cpp?rev=100538&r1=100537&r2=100538&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/Parser.cpp (original) +++ llvm/trunk/lib/AsmParser/Parser.cpp Tue Apr 6 13:06:18 2010 @@ -44,9 +44,9 @@ std::string ErrorStr; MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr); if (F == 0) { - Err = SMDiagnostic(SMLoc(), "", -1, -1, + Err = SMDiagnostic(Filename, "Could not open input file '" + Filename + "': " + - ErrorStr, ""); + ErrorStr); return 0; } Modified: llvm/trunk/lib/Support/SourceMgr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/SourceMgr.cpp?rev=100538&r1=100537&r2=100538&view=diff ============================================================================== --- llvm/trunk/lib/Support/SourceMgr.cpp (original) +++ llvm/trunk/lib/Support/SourceMgr.cpp Tue Apr 6 13:06:18 2010 @@ -168,7 +168,7 @@ } PrintedMsg += Msg; - return SMDiagnostic(Loc, + return SMDiagnostic(*this, Loc, CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf), Loc.getPointer()-LineStart, PrintedMsg, LineStr, ShowLine); From dpatel at apple.com Tue Apr 6 13:25:02 2010 From: dpatel at apple.com (Devang Patel) Date: Tue, 6 Apr 2010 11:25:02 -0700 Subject: [llvm-commits] [llvm] r100530 - in /llvm/trunk: lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h test/FrontendC++/2010-02-17-DbgArtificialArg.cpp In-Reply-To: <20100406171932.C1F522A6C12C@llvm.org> References: <20100406171932.C1F522A6C12C@llvm.org> Message-ID: <63F39DBB-46E2-4A79-8EEA-40E83BEF7DD2@apple.com> On Apr 6, 2010, at 10:19 AM, Stuart Hastings wrote: > > +/// isFunctionContext - True if given Context is nested within a function. > +bool DwarfDebug::isFunctionContext(DIE *context) { > + if (context == (DIE *)0) > + return false; > + if (context->getTag() == dwarf::DW_TAG_subprogram) > + return true; > + else > + return isFunctionContext(context->getParent()); > +} > + I do not see any use of this function. - Devang From stuart at apple.com Tue Apr 6 13:33:46 2010 From: stuart at apple.com (Stuart Hastings) Date: Tue, 6 Apr 2010 11:33:46 -0700 Subject: [llvm-commits] [llvm] r100530 - in /llvm/trunk: lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h test/FrontendC++/2010-02-17-DbgArtificialArg.cpp In-Reply-To: <63F39DBB-46E2-4A79-8EEA-40E83BEF7DD2@apple.com> References: <20100406171932.C1F522A6C12C@llvm.org> <63F39DBB-46E2-4A79-8EEA-40E83BEF7DD2@apple.com> Message-ID: On Apr 6, 2010, at 11:25 AM, Devang Patel wrote: > > On Apr 6, 2010, at 10:19 AM, Stuart Hastings wrote: > >> >> +/// isFunctionContext - True if given Context is nested within a function. >> +bool DwarfDebug::isFunctionContext(DIE *context) { >> + if (context == (DIE *)0) >> + return false; >> + if (context->getTag() == dwarf::DW_TAG_subprogram) >> + return true; >> + else >> + return isFunctionContext(context->getParent()); >> +} >> + > > I do not see any use of this function. Dunno how I screwed that up. It was created to be invoked from DwarfDebug.cpp:DwarfDebug::updateSubprogramScopeDIE(); there's a check there that gets fooled by a nested function/class/function. I'll fix this after the BuildBots have ruled on my previous checkin. Thanks for pointing this out, stuart From sabre at nondot.org Tue Apr 6 13:37:22 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Apr 2010 18:37:22 -0000 Subject: [llvm-commits] [llvm] r100542 - /llvm/trunk/include/llvm/Support/SourceMgr.h Message-ID: <20100406183722.4783C2A6C12D@llvm.org> Author: lattner Date: Tue Apr 6 13:37:22 2010 New Revision: 100542 URL: http://llvm.org/viewvc/llvm-project?rev=100542&view=rev Log: this accessor doesn't need to copy the string. Modified: llvm/trunk/include/llvm/Support/SourceMgr.h Modified: llvm/trunk/include/llvm/Support/SourceMgr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SourceMgr.h?rev=100542&r1=100541&r2=100542&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/SourceMgr.h (original) +++ llvm/trunk/include/llvm/Support/SourceMgr.h Tue Apr 6 13:37:22 2010 @@ -174,7 +174,7 @@ const SourceMgr *getSourceMgr() const { return SM; } SMLoc getLoc() const { return Loc; } - const std::string getFilename() { return Filename; } + const std::string &getFilename() { return Filename; } int getLineNo() const { return LineNo; } int getColumnNo() const { return ColumnNo; } const std::string &getMessage() const { return Message; } From ggreif at gmail.com Tue Apr 6 13:45:08 2010 From: ggreif at gmail.com (Gabor Greif) Date: Tue, 06 Apr 2010 18:45:08 -0000 Subject: [llvm-commits] [llvm] r100544 - /llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Message-ID: <20100406184508.77D5C2A6C12C@llvm.org> Author: ggreif Date: Tue Apr 6 13:45:08 2010 New Revision: 100544 URL: http://llvm.org/viewvc/llvm-project?rev=100544&view=rev Log: use CallSite to access calls vs. invokes uniformly and remove assumptions about operand order Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=100544&r1=100543&r2=100544&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Tue Apr 6 13:45:08 2010 @@ -682,16 +682,17 @@ Changed = true; } } else if (isa(I) || isa(I)) { - if (I->getOperand(0) == V) { + CallSite CS(I); + if (CS.getCalledValue() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. - I->setOperand(0, NewV); + CS.setCalledFunction(NewV); Changed = true; bool PassedAsArg = false; - for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i) == V) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == V) { PassedAsArg = true; - I->setOperand(i, NewV); + CS.setArgument(i, NewV); } if (PassedAsArg) { From dpatel at apple.com Tue Apr 6 13:49:40 2010 From: dpatel at apple.com (Devang Patel) Date: Tue, 6 Apr 2010 11:49:40 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r100531 - in /llvm-gcc-4.2/trunk/gcc: llvm-backend.cpp llvm-convert.cpp llvm-debug.cpp llvm-debug.h llvm-internal.h In-Reply-To: <20100406171948.3F1562A6C12C@llvm.org> References: <20100406171948.3F1562A6C12C@llvm.org> Message-ID: <2D8A1959-2464-4BAC-A0F4-E0129AB90545@apple.com> Stuart, On Apr 6, 2010, at 10:19 AM, Stuart Hastings wrote: > Author: stuart > Date: Tue Apr 6 12:19:47 2010 > New Revision: 100531 > > URL: http://llvm.org/viewvc/llvm-project?rev=100531&view=rev > Log: > Revise debug info machinery to digest nested functions and classes. > > A certain GDB testsuite case (local.cc) has a function nested inside a > class nested inside another function. GCC presents the innermost > function to llvm-convert first. Heretofore, the debug info mistakenly > placed the inner function at module scope. This patch walks the GCC > context links and instantiates the outer class and function so the > debug info is properly nested. Radar 7426545. > > Modified: > llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp > llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp > llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp > llvm-gcc-4.2/trunk/gcc/llvm-debug.h > llvm-gcc-4.2/trunk/gcc/llvm-internal.h > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp?rev=100531&r1=100530&r2=100531&view=diff > ============================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp Tue Apr 6 12:19:47 2010 > @@ -512,6 +512,8 @@ > if (!flag_pch_file && > debug_info_level > DINFO_LEVEL_NONE) > TheDebugInfo = new DebugInfo(TheModule); > + else > + TheDebugInfo = 0; > } > > /// performLateBackendInitialization - Set backend options that may only be > @@ -533,8 +535,6 @@ > } > > void llvm_lang_dependent_init(const char *Name) { > - if (TheDebugInfo) > - TheDebugInfo->Initialize(); What is the motivation behind this ? > if (Name) > TheModule->setModuleIdentifier(Name); > } > @@ -1010,7 +1010,7 @@ > // Convert the AST to raw/ugly LLVM code. > Function *Fn; > { > - TreeToLLVM Emitter(fndecl); > + TreeToLLVM *Emitter = getTreeToLLVM(fndecl); > enum symbol_visibility vis = DECL_VISIBILITY (fndecl); > > if (vis != VISIBILITY_DEFAULT) > @@ -1018,7 +1018,7 @@ > // visibility that's not supported by the target. > targetm.asm_out.visibility(fndecl, vis); > > - Fn = Emitter.EmitFunction(); > + Fn = Emitter->EmitFunction(); > } > > #if 0 > @@ -1317,6 +1317,10 @@ > > timevar_push(TV_LLVM_GLOBALS); > > + // Insure debug info machinery initialized, even if current module > + // lacks functions. > + getTreeToLLVM(decl); > + > // Get or create the global variable now. > GlobalVariable *GV = cast(DECL_LLVM(decl)); > > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=100531&r1=100530&r2=100531&view=diff > ============================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Tue Apr 6 12:19:47 2010 > @@ -148,7 +148,6 @@ > //===----------------------------------------------------------------------===// > > /// TheTreeToLLVM - Keep track of the current function being compiled. > -static TreeToLLVM *TheTreeToLLVM = 0; > > const TargetData &getTargetData() { > return *TheTarget->getTargetData(); > @@ -157,20 +156,22 @@ > /// EmitDebugInfo - Return true if debug info is to be emitted for current > /// function. > bool TreeToLLVM::EmitDebugInfo() { > - if (TheDebugInfo && !DECL_IGNORED_P(getFUNCTION_DECL())) > + if (TheDebugInfo && > + (!getFUNCTION_DECL() || !DECL_IGNORED_P(getFUNCTION_DECL()))) > return true; > return false; > } > > -TreeToLLVM::TreeToLLVM(tree fndecl) : > - TD(getTargetData()), Builder(Context, *TheFolder) { > - FnDecl = fndecl; > +TreeToLLVM::TreeToLLVM(tree decl) : > + TD(getTargetData()), Builder(Context, *TheFolder) { > + // If this isn't a FUNCITON_DECL, use only the source loc info from it. > + FnDecl = (decl && TREE_CODE(decl) == FUNCTION_DECL) ? decl : NULL_TREE; > Fn = 0; > ReturnBB = UnwindBB = 0; > ReturnOffset = 0; > > if (EmitDebugInfo()) { > - expanded_location Location = expand_location(DECL_SOURCE_LOCATION (fndecl)); > + expanded_location Location = expand_location(DECL_SOURCE_LOCATION (decl)); > > if (Location.file) { > TheDebugInfo->setLocationFile(Location.file); > @@ -179,6 +180,7 @@ > TheDebugInfo->setLocationFile(""); > TheDebugInfo->setLocationLine(0); > } > + TheDebugInfo->Initialize(); > } I do not think this is correct. DebugInfo::Initialize() should be called only once. Are you sure, you are not losing debug info with this change ? > > AllocaInsertionPoint = 0; > @@ -188,13 +190,25 @@ > FuncEHException = 0; > FuncEHSelector = 0; > FuncEHGetTypeID = 0; > - > - assert(TheTreeToLLVM == 0 && "Reentering function creation?"); > - TheTreeToLLVM = this; > } > > -TreeToLLVM::~TreeToLLVM() { > - TheTreeToLLVM = 0; > + > +TreeToLLVM::~TreeToLLVM() {} > + > +TreeToLLVM *getTreeToLLVM(tree decl) { > + // FIXME: should this static move into the TreeToLLVM class decl? > + static std::map FunctionMap; Why not? - Devang > + TreeToLLVM *newTreeToLLVM = FunctionMap[decl]; > + if (!newTreeToLLVM) { > + tree fndecl = (decl && TREE_CODE(decl) == FUNCTION_DECL) ? decl : NULL_TREE; > + newTreeToLLVM = FunctionMap[fndecl] = new TreeToLLVM(decl); > + } > + return newTreeToLLVM; > +} > + > +TreeToLLVM *getCurrentTreeToLLVM(void) { > + assert(current_function_decl && "no current_function_decl?"); > + return getTreeToLLVM(current_function_decl); > } > > /// getLabelDeclBlock - Lazily get and create a basic block for the specified > @@ -308,7 +322,8 @@ > assert(TREE_CODE(TREE_TYPE(ResultDecl)) == REFERENCE_TYPE && > "Not type match and not passing by reference?"); > // Create an alloca for the ResultDecl. > - Value *Tmp = TheTreeToLLVM->CreateTemporary(AI->getType()); > + TreeToLLVM *Emitter = getCurrentTreeToLLVM(); > + Value *Tmp = Emitter->CreateTemporary(AI->getType()); > Builder.CreateStore(AI, Tmp); > > SET_DECL_LLVM(ResultDecl, Tmp); > @@ -451,7 +466,7 @@ > } > } > > -void TreeToLLVM::StartFunctionBody() { > +Function *TreeToLLVM::StartFunctionBody() { > const char *Name = ""; > // Get the name of the function. > if (tree ID = DECL_ASSEMBLER_NAME(FnDecl)) > @@ -610,10 +625,10 @@ > // Set the BLOCK_NUMBER()s to the depth of each lexical block. > setLexicalBlockDepths(FnDecl, block_declared_vars, 1); > > - SeenBlocks.clear(); > - > - if (EmitDebugInfo()) > - TheDebugInfo->EmitFunctionStart(FnDecl, Fn, Builder.GetInsertBlock()); > + if (TheDebugInfo) { > + TheDebugInfo->EmitFunctionStart(FnDecl); > + Builder.GetInsertBlock(); > + } > > // Loop over all of the arguments to the function, setting Argument names and > // creating argument alloca's for the PARM_DECLs in case their address is > @@ -628,7 +643,7 @@ > ABIConverter.HandleReturnType(TREE_TYPE(TREE_TYPE(FnDecl)), FnDecl, > DECL_BUILT_IN(FnDecl)); > // Remember this for use by FinishFunctionBody. > - TheTreeToLLVM->ReturnOffset = Client.Offset; > + ReturnOffset = Client.Offset; > > // Prepend the static chain (if any) to the list of arguments. > tree Args = static_chain ? static_chain : DECL_ARGUMENTS(FnDecl); > @@ -709,12 +724,7 @@ > block_declared_vars.count(TREE_VALUE(t)) == 0) > EmitAutomaticVariableDecl(TREE_VALUE(t)); > } > - > - // Push the outermost lexical block onto the RegionStack. > - switchLexicalBlock(DECL_INITIAL(FnDecl)); > - > - // Create a new block for the return node, but don't insert it yet. > - ReturnBB = BasicBlock::Create(Context, "return"); > + return Fn; > } > > Function *TreeToLLVM::FinishFunctionBody() { > @@ -802,9 +812,19 @@ > } > > Function *TreeToLLVM::EmitFunction() { > - // Set up parameters and prepare for return, for the function. > + // Set up parameters for the function. > StartFunctionBody(); > > + // We'll remember the lexical BLOCKs we've seen here. > + SeenBlocks.clear(); > + > + // FIXME: Should these two statements move to StartFunctionBody() ? > + // Push the outermost lexical block onto the RegionStack. > + switchLexicalBlock(DECL_INITIAL(FnDecl)); > + > + // Create a new block for the return node, but don't insert it yet. > + ReturnBB = BasicBlock::Create(Context, "return"); > + > // Emit the body of the function iterating over all BBs > basic_block bb; > edge e; > @@ -2616,7 +2636,7 @@ > if (!Loc) { > // A value. Store to a temporary, and return the temporary's address. > // Any future access to this argument will reuse the same address. > - Loc = TheTreeToLLVM->CreateTemporary(TheValue->getType()); > + Loc = getCurrentTreeToLLVM()->CreateTemporary(TheValue->getType()); > Builder.CreateStore(TheValue, Loc); > } > return Loc; > @@ -2656,7 +2676,7 @@ > assert(ConvertType(type) == > cast(RetBuf.Ptr->getType())->getElementType() && > "Inconsistent result types!"); > - TheTreeToLLVM->EmitAggregateCopy(*DestLoc, RetBuf, type); > + getCurrentTreeToLLVM()->EmitAggregateCopy(*DestLoc, RetBuf, type); > return 0; > } else { > // Read out the scalar return value now. > @@ -2699,7 +2719,7 @@ > > if (DestLoc == 0) { > // The result is unused, but still needs to be stored somewhere. > - Value *Buf = TheTreeToLLVM->CreateTemporary(PtrArgTy->getElementType()); > + Value *Buf = getCurrentTreeToLLVM()->CreateTemporary(PtrArgTy->getElementType()); > CallOperands.push_back(Buf); > } else if (useReturnSlot) { > // Letting the call write directly to the final destination is safe and > @@ -2709,7 +2729,7 @@ > // Letting the call write directly to the final destination may not be > // safe (eg: if DestLoc aliases a parameter) and is not required - pass > // a buffer and copy it to DestLoc after the call. > - RetBuf = TheTreeToLLVM->CreateTempLoc(PtrArgTy->getElementType()); > + RetBuf = getCurrentTreeToLLVM()->CreateTempLoc(PtrArgTy->getElementType()); > CallOperands.push_back(RetBuf.Ptr); > } > > @@ -2730,7 +2750,7 @@ > "Call returns a scalar but caller expects aggregate!"); > // Create a buffer to hold the result. The result will be loaded out of > // it after the call. > - RetBuf = TheTreeToLLVM->CreateTempLoc(PtrArgTy->getElementType()); > + RetBuf = getCurrentTreeToLLVM()->CreateTempLoc(PtrArgTy->getElementType()); > CallOperands.push_back(RetBuf.Ptr); > > // Note the use of a shadow argument. > @@ -2754,7 +2774,7 @@ > if (Loc->getType() != CalledTy) { > assert(type && "Inconsistent parameter types?"); > bool isSigned = !TYPE_UNSIGNED(type); > - Loc = TheTreeToLLVM->CastToAnyType(Loc, isSigned, CalledTy, false); > + Loc = getCurrentTreeToLLVM()->CastToAnyType(Loc, isSigned, CalledTy, false); > } > } > > @@ -8457,18 +8477,18 @@ > > /// EmitLV_LABEL_DECL - Someone took the address of a label. > Constant *TreeConstantToLLVM::EmitLV_LABEL_DECL(tree exp) { > - assert(TheTreeToLLVM && > + assert(getCurrentTreeToLLVM() && > "taking the address of a label while not compiling the function!"); > > // Figure out which function this is for, verify it's the one we're compiling. > if (DECL_CONTEXT(exp)) { > assert(TREE_CODE(DECL_CONTEXT(exp)) == FUNCTION_DECL && > "Address of label in nested function?"); > - assert(TheTreeToLLVM->getFUNCTION_DECL() == DECL_CONTEXT(exp) && > + assert(getCurrentTreeToLLVM()->getFUNCTION_DECL() == DECL_CONTEXT(exp) && > "Taking the address of a label that isn't in the current fn!?"); > } > > - return TheTreeToLLVM->EmitLV_LABEL_DECL(exp); > + return getCurrentTreeToLLVM()->EmitLV_LABEL_DECL(exp); > } > > Constant *TreeConstantToLLVM::EmitLV_COMPLEX_CST(tree exp) { > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=100531&r1=100530&r2=100531&view=diff > ============================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Tue Apr 6 12:19:47 2010 > @@ -289,12 +289,10 @@ > setCurrentLexicalBlock(desired); > } > > -/// EmitFunctionStart - Constructs the debug code for entering a function - > -/// "llvm.dbg.func.start." > -void DebugInfo::EmitFunctionStart(tree FnDecl, Function *Fn, > - BasicBlock *CurBB) { > - setCurrentLexicalBlock(FnDecl); > +/// CreateSubprogramFromFnDecl - Constructs the debug code for > +/// entering a function - "llvm.dbg.func.start." > > +DISubprogram DebugInfo::CreateSubprogramFromFnDecl(tree FnDecl) { > DIType FNType = getOrCreateType(TREE_TYPE(FnDecl)); > > std::map::iterator I = SPCache.find(FnDecl); > @@ -302,12 +300,9 @@ > DISubprogram SPDecl(cast(I->second)); > DISubprogram SP = > DebugFactory.CreateSubprogramDefinition(SPDecl); > - SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); > - > - // Push function on region stack. > - RegionStack.push_back(WeakVH(SP.getNode())); > - RegionMap[FnDecl] = WeakVH(SP.getNode()); > - return; > + if (SP.getNode() != SPDecl.getNode()) > + SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); > + return SP; > } > > bool ArtificialFnWithAbstractOrigin = false; > @@ -329,12 +324,13 @@ > DISubprogram SPDecl(cast(I->second)); > DISubprogram SP = > DebugFactory.CreateSubprogramDefinition(SPDecl); > - SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); > + if (SP.getNode() != SPDecl.getNode()) > + SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); > > // Push function on region stack. > RegionStack.push_back(WeakVH(SP.getNode())); > RegionMap[FnDecl] = WeakVH(SP.getNode()); > - return; > + return SP; > } > > // Gather location information. > @@ -356,23 +352,36 @@ > } > > StringRef FnName = getFunctionName(FnDecl); > - > + // If the Function * hasn't been created yet, use a bogus value for > + // the debug internal linkage bit. > + bool hasInternalLinkage = true; > + if (GET_DECL_LLVM_INDEX(FnDecl)) { > + Function *Fn = castDECL_LLVM(FnDecl); > + hasInternalLinkage = Fn->hasInternalLinkage(); > + } > DISubprogram SP = > DebugFactory.CreateSubprogram(SPContext, > FnName, FnName, > LinkageName, > getOrCreateFile(Loc.file), lineno, > FNType, > - Fn->hasInternalLinkage(), > + hasInternalLinkage, > true /*definition*/, > Virtuality, VIndex, ContainingType); > > > SPCache[FnDecl] = WeakVH(SP.getNode()); > + RegionMap[FnDecl] = WeakVH(SP.getNode()); > + return SP; > +} > > +/// EmitFunctionStart - Constructs the debug code for entering a function - > +/// "llvm.dbg.func.start", and pushes it onto the RegionStack. > +void DebugInfo::EmitFunctionStart(tree FnDecl) { > + setCurrentLexicalBlock(FnDecl); > + DISubprogram SP = CreateSubprogramFromFnDecl(FnDecl); > // Push function on region stack. > RegionStack.push_back(WeakVH(SP.getNode())); > - RegionMap[FnDecl] = WeakVH(SP.getNode()); > } > > /// getOrCreateNameSpace - Get name space descriptor for the tree node. > @@ -405,12 +414,20 @@ > DIType Ty = getOrCreateType(Node); > return DIDescriptor(Ty.getNode()); > } else if (DECL_P (Node)) { > - if (TREE_CODE (Node) == NAMESPACE_DECL) { > + switch (TREE_CODE(Node)) { > + default: > + /// What kind of DECL is this? > + return findRegion (DECL_CONTEXT (Node)); > + case NAMESPACE_DECL: { > DIDescriptor NSContext = findRegion(DECL_CONTEXT(Node)); > DINameSpace NS = getOrCreateNameSpace(Node, NSContext); > return DIDescriptor(NS.getNode()); > } > - return findRegion (DECL_CONTEXT (Node)); > + case FUNCTION_DECL: { > + DISubprogram SP = CreateSubprogramFromFnDecl(Node); > + return SP; > + } > + } > } else if (TREE_CODE(Node) == BLOCK) { > // TREE_BLOCK is GCC's lexical block. > // Recursively create all necessary contexts: > @@ -623,7 +640,7 @@ > sprintf(FwdTypeName, "fwd.type.%d", FwdTypeCount++); > llvm::DIType FwdType = > DebugFactory.CreateCompositeType(llvm::dwarf::DW_TAG_subroutine_type, > - getOrCreateFile(main_input_filename), > + findRegion(TYPE_CONTEXT(type)), > FwdTypeName, > getOrCreateFile(main_input_filename), > 0, 0, 0, 0, 0, > @@ -709,9 +726,10 @@ > return Ty; > } > > + tree type_with_context = TYPE_CONTEXT(type) ? type : TREE_TYPE(type); > StringRef PName = FromTy.getName(); > DIType PTy = > - DebugFactory.CreateDerivedType(Tag, findRegion(TYPE_CONTEXT(type)), > + DebugFactory.CreateDerivedType(Tag, findRegion(type_with_context), > Tag == DW_TAG_pointer_type ? > StringRef() : PName, > getOrCreateFile(main_input_filename), > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.h > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.h?rev=100531&r1=100530&r2=100531&view=diff > ============================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-debug.h (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.h Tue Apr 6 12:19:47 2010 > @@ -118,9 +118,13 @@ > // by GCC's cfglayout.c:change_scope(). > void change_regions(tree_node *desired, tree_node *grand); > > - /// EmitFunctionStart - Constructs the debug code for entering a function - > + /// CreateSubprogramFromFnDecl - Constructs the debug code for entering a function - > /// "llvm.dbg.func.start." > - void EmitFunctionStart(tree_node *FnDecl, Function *Fn, BasicBlock *CurBB); > + DISubprogram CreateSubprogramFromFnDecl(tree_node *FnDecl); > + > + /// EmitFunctionStart - Constructs the debug code for entering a function - > + /// "llvm.dbg.func.start", and pushes it onto the RegionStack. > + void EmitFunctionStart(tree_node *FnDecl); > > /// EmitFunctionEnd - Constructs the debug code for exiting a declarative > /// region - "llvm.dbg.region.end." > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-internal.h > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-internal.h?rev=100531&r1=100530&r2=100531&view=diff > ============================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-internal.h (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-internal.h Tue Apr 6 12:19:47 2010 > @@ -75,7 +75,7 @@ > extern llvm::Module *TheModule; > > /// TheDebugInfo - This object is responsible for gather all debug information. > -/// If it's value is NULL then no debug information should be gathered. > +/// If its value is NULL then no debug information should be gathered. > extern llvm::DebugInfo *TheDebugInfo; > > /// TheTarget - The current target being compiled for. > @@ -281,6 +281,7 @@ > BasicBlock *ReturnBB; > BasicBlock *UnwindBB; > unsigned ReturnOffset; > + > // Lexical BLOCKS that we have previously seen and processed. > treeset SeenBlocks; > > @@ -397,6 +398,10 @@ > // allocation would change with -g, and users dislike that. > void switchLexicalBlock(tree_node *exp); > > + /// StartFunctionBody - Start the emission of 'FnDecl', outputing all > + /// declarations for parameters and setting things up. > + Function *StartFunctionBody(); > + > private: // Helper functions. > > // Walk over the lexical BLOCK() tree of the given FUNCTION_DECL; > @@ -405,10 +410,6 @@ > // the given set. > void setLexicalBlockDepths(tree_node *t, treeset &s, unsigned level); > > - /// StartFunctionBody - Start the emission of 'fndecl', outputing all > - /// declarations for parameters and setting things up. > - void StartFunctionBody(); > - > /// FinishFunctionBody - Once the body of the function has been emitted, this > /// cleans up and returns the result function. > Function *FinishFunctionBody(); > @@ -608,6 +609,9 @@ > Constant *EmitLV_LABEL_DECL(tree_node *exp); > }; > > +/// Locate a previously exiting TreeToLLVM. Construct one if necessary. > +TreeToLLVM *getTreeToLLVM(tree_node *fndecl); > + > /// TreeConstantToLLVM - An instance of this class is created and used to > /// convert tree constant values to LLVM. This is primarily for things like > /// global variable initializers. > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From ggreif at gmail.com Tue Apr 6 13:58:22 2010 From: ggreif at gmail.com (Gabor Greif) Date: Tue, 06 Apr 2010 18:58:22 -0000 Subject: [llvm-commits] [llvm] r100546 - /llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Message-ID: <20100406185822.DFED22A6C12C@llvm.org> Author: ggreif Date: Tue Apr 6 13:58:22 2010 New Revision: 100546 URL: http://llvm.org/viewvc/llvm-project?rev=100546&view=rev Log: const-ize predicate ValueIsOnlyUsedLocallyOrStoredToOneGlobal Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=100546&r1=100545&r2=100546&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Tue Apr 6 13:58:22 2010 @@ -939,17 +939,18 @@ /// to make sure that there are no complex uses of V. We permit simple things /// like dereferencing the pointer, but not storing through the address, unless /// it is to the specified global. -static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, - GlobalVariable *GV, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *Inst = cast(*UI); +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E;++UI){ + const Instruction *Inst = cast(*UI); if (isa(Inst) || isa(Inst)) { continue; // Fine, ignore. } - if (StoreInst *SI = dyn_cast(Inst)) { + if (const StoreInst *SI = dyn_cast(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. @@ -961,7 +962,7 @@ continue; } - if (PHINode *PN = dyn_cast(Inst)) { + if (const PHINode *PN = dyn_cast(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. if (PHIs.insert(PN)) @@ -970,7 +971,7 @@ continue; } - if (BitCastInst *BCI = dyn_cast(Inst)) { + if (const BitCastInst *BCI = dyn_cast(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; @@ -1468,7 +1469,7 @@ // GEP'd. These are all things we could transform to using the global // for. { - SmallPtrSet PHIs; + SmallPtrSet PHIs; if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) return false; } From baldrick at free.fr Tue Apr 6 14:08:06 2010 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Apr 2010 21:08:06 +0200 Subject: [llvm-commits] [llvm-gcc-4.2] r100531 - in /llvm-gcc-4.2/trunk/gcc: llvm-backend.cpp llvm-convert.cpp llvm-debug.cpp llvm-debug.h llvm-internal.h In-Reply-To: <20100406171948.3F1562A6C12C@llvm.org> References: <20100406171948.3F1562A6C12C@llvm.org> Message-ID: <4BBB8696.9000600@free.fr> Hi Stuart, > Revise debug info machinery to digest nested functions and classes. the comments I made about your previous version still seem to apply. Ciao, Duncan. From ggreif at gmail.com Tue Apr 6 14:14:05 2010 From: ggreif at gmail.com (Gabor Greif) Date: Tue, 06 Apr 2010 19:14:05 -0000 Subject: [llvm-commits] [llvm] r100547 - /llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Message-ID: <20100406191405.58AC02A6C12C@llvm.org> Author: ggreif Date: Tue Apr 6 14:14:05 2010 New Revision: 100547 URL: http://llvm.org/viewvc/llvm-project?rev=100547&view=rev Log: performance: get rid of repeated dereferencing of use_iterator by caching its result Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=100547&r1=100546&r2=100547&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Tue Apr 6 14:14:05 2010 @@ -612,40 +612,44 @@ /// phi nodes we've seen to avoid reprocessing them. static bool AllUsesOfValueWillTrapIfNull(Value *V, SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa(*UI)) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + User *U = *UI; + + if (isa(U)) { // Will trap. - } else if (StoreInst *SI = dyn_cast(*UI)) { + } else if (StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } - } else if (CallInst *CI = dyn_cast(*UI)) { + } else if (CallInst *CI = dyn_cast(U)) { if (CI->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (InvokeInst *II = dyn_cast(*UI)) { + } else if (InvokeInst *II = dyn_cast(U)) { if (II->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (BitCastInst *CI = dyn_cast(*UI)) { + } else if (BitCastInst *CI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; - } else if (GetElementPtrInst *GEPI = dyn_cast(*UI)) { + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; - } else if (PHINode *PN = dyn_cast(*UI)) { + } else if (PHINode *PN = dyn_cast(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; - } else if (isa(*UI) && + } else if (isa(U) && isa(UI->getOperand(1))) { // Ignore icmp X, null } else { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; } + } return true; } @@ -653,19 +657,22 @@ /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) - if (LoadInst *LI = dyn_cast(*UI)) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + User *U = *UI; + + if (LoadInst *LI = dyn_cast(U)) { SmallPtrSet PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) return false; - } else if (isa(*UI)) { + } else if (isa(U)) { // Ignore stores to the global. } else { // We don't know or understand this user, bail out. - //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; return false; } - + } return true; } @@ -941,11 +948,11 @@ /// it is to the specified global. static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, const GlobalVariable *GV, - SmallPtrSet &PHIs) { + SmallPtrSet &PHIs) { for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E;++UI){ + UI != E; ++UI) { const Instruction *Inst = cast(*UI); - + if (isa(Inst) || isa(Inst)) { continue; // Fine, ignore. } From ggreif at gmail.com Tue Apr 6 14:24:19 2010 From: ggreif at gmail.com (Gabor Greif) Date: Tue, 06 Apr 2010 19:24:19 -0000 Subject: [llvm-commits] [llvm] r100549 - /llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Message-ID: <20100406192419.2595C2A6C12C@llvm.org> Author: ggreif Date: Tue Apr 6 14:24:18 2010 New Revision: 100549 URL: http://llvm.org/viewvc/llvm-project?rev=100549&view=rev Log: make more two predicates constant Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=100549&r1=100548&r2=100549&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Tue Apr 6 14:24:18 2010 @@ -610,34 +610,34 @@ /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified /// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. -static bool AllUsesOfValueWillTrapIfNull(Value *V, - SmallPtrSet &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSet &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { - User *U = *UI; + const User *U = *UI; if (isa(U)) { // Will trap. - } else if (StoreInst *SI = dyn_cast(U)) { + } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } - } else if (CallInst *CI = dyn_cast(U)) { + } else if (const CallInst *CI = dyn_cast(U)) { if (CI->getCalledValue() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (InvokeInst *II = dyn_cast(U)) { + } else if (const InvokeInst *II = dyn_cast(U)) { if (II->getCalledValue() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (BitCastInst *CI = dyn_cast(U)) { + } else if (const BitCastInst *CI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; - } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; - } else if (PHINode *PN = dyn_cast(U)) { + } else if (const PHINode *PN = dyn_cast(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) @@ -656,13 +656,13 @@ /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. -static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; ++UI) { - User *U = *UI; + const User *U = *UI; - if (LoadInst *LI = dyn_cast(U)) { - SmallPtrSet PHIs; + if (const LoadInst *LI = dyn_cast(U)) { + SmallPtrSet PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) return false; } else if (isa(U)) { From ggreif at gmail.com Tue Apr 6 14:32:30 2010 From: ggreif at gmail.com (Gabor Greif) Date: Tue, 06 Apr 2010 19:32:30 -0000 Subject: [llvm-commits] [llvm] r100550 - /llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Message-ID: <20100406193230.58EDA2A6C12C@llvm.org> Author: ggreif Date: Tue Apr 6 14:32:30 2010 New Revision: 100550 URL: http://llvm.org/viewvc/llvm-project?rev=100550&view=rev Log: performance: get rid of repeated dereferencing of use_iterator by caching its result Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=100550&r1=100549&r2=100550&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Apr 6 14:32:30 2010 @@ -1702,18 +1702,20 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, bool isOffset) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - if (LoadInst *LI = dyn_cast(*UI)) + User *U = cast(*UI); + + if (LoadInst *LI = dyn_cast(U)) // Ignore non-volatile loads, they are always ok. if (!LI->isVolatile()) continue; - if (BitCastInst *BCI = dyn_cast(*UI)) { + if (BitCastInst *BCI = dyn_cast(U)) { // If uses of the bitcast are ok, we are ok. if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) return false; continue; } - if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + if (GetElementPtrInst *GEP = dyn_cast(U)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, @@ -1724,7 +1726,7 @@ // If this is isn't our memcpy/memmove, reject it as something we can't // handle. - if (!isa(*UI)) + if (!isa(U)) return false; // If we already have seen a copy, reject the second one. @@ -1737,7 +1739,7 @@ // If the memintrinsic isn't using the alloca as the dest, reject it. if (UI.getOperandNo() != 1) return false; - MemIntrinsic *MI = cast(*UI); + MemIntrinsic *MI = cast(U); // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getOperand(2))) From stoklund at 2pi.dk Tue Apr 6 14:48:56 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 06 Apr 2010 19:48:56 -0000 Subject: [llvm-commits] [llvm] r100553 - in /llvm/trunk: lib/Target/X86/SSEDomainFix.cpp test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll Message-ID: <20100406194856.D28562A6C12C@llvm.org> Author: stoklund Date: Tue Apr 6 14:48:56 2010 New Revision: 100553 URL: http://llvm.org/viewvc/llvm-project?rev=100553&view=rev Log: Don't try to collapse DomainValues onto an incompatible SSE domain. This fixes the Bullet regression on i386/nocona. Added: llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll Modified: llvm/trunk/lib/Target/X86/SSEDomainFix.cpp Modified: llvm/trunk/lib/Target/X86/SSEDomainFix.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/SSEDomainFix.cpp?rev=100553&r1=100552&r2=100553&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/SSEDomainFix.cpp (original) +++ llvm/trunk/lib/Target/X86/SSEDomainFix.cpp Tue Apr 6 14:48:56 2010 @@ -216,8 +216,15 @@ if (LiveRegs && (dv = LiveRegs[rx])) { if (dv->isCollapsed()) dv->addDomain(domain); - else + else if (dv->hasDomain(domain)) Collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and force + // the new value into domain. This costs a domain crossing. + Collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx] && "Not live after collapse?"); + LiveRegs[rx]->addDomain(domain); + } } else { // Set up basic collapsed DomainValue. SetLiveReg(rx, Alloc(domain)); @@ -281,8 +288,9 @@ // We have a live DomainValue from more than one predecessor. if (LiveRegs[rx]->isCollapsed()) { // We are already collapsed, but predecessor is not. Force him. - if (!pdv->isCollapsed()) - Collapse(pdv, LiveRegs[rx]->getFirstDomain()); + unsigned domain = LiveRegs[rx]->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(domain)) + Collapse(pdv, domain); continue; } @@ -290,7 +298,7 @@ if (!pdv->isCollapsed()) Merge(LiveRegs[rx], pdv); else - Collapse(LiveRegs[rx], pdv->getFirstDomain()); + Force(rx, pdv->getFirstDomain()); } } } Added: llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll?rev=100553&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll (added) +++ llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll Tue Apr 6 14:48:56 2010 @@ -0,0 +1,130 @@ +; RUN: llc < %s -O3 -relocation-model=pic -disable-fp-elim -mcpu=nocona +; +; This test case is reduced from Bullet. It crashes SSEDomainFix. +; +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10.0" + +%struct.CONTACT_KEY_TOKEN_COMP = type <{ i8 }> +%struct.GIM_AABB = type { %struct.btSimdScalar, %struct.btSimdScalar } +%struct.HullDesc = type { i32, i32, %struct.btSimdScalar*, i32, float, i32, i32 } +%struct.HullLibrary = type { %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray" } +%struct.HullResult = type { i8, i32, %"struct.btAlignedObjectArray", i32, i32, %"struct.btAlignedObjectArray" } +%struct.btActionInterface = type { i32 (...)** } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, i8*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btCollisionObject**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btDbvt::sStkCLN"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btHullTriangle**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Anchor"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Cluster"**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Face"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Joint"**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Link"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Material"**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Node"**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Node"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Note"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::RContact"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::SContact"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Tetra"*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, i32*, i8 } +%"struct.btAlignedObjectArray::Cell*>" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSparseSdf<3>::Cell"**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btTypedConstraint**, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btSimdScalar*, i8 } +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, float*, i8 } +%struct.btBroadphaseProxy = type { i8*, i16, i16, i8*, i32, %struct.btSimdScalar, %struct.btSimdScalar } +%struct.btCollisionObject = type { i32 (...)**, %struct.btTransform, %struct.btTransform, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, i8, float, %struct.btBroadphaseProxy*, %struct.btCollisionShape*, %struct.btCollisionShape*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] } +%struct.btCollisionShape = type { i32 (...)**, i32, i8* } +%struct.btDbvt = type { %struct.btDbvtNode*, %struct.btDbvtNode*, i32, i32, i32, %"struct.btAlignedObjectArray" } +%"struct.btDbvt::sStkCLN" = type { %struct.btDbvtNode*, %struct.btDbvtNode* } +%struct.btDbvtNode = type { %struct.GIM_AABB, %struct.btDbvtNode*, %"union.btDbvtNode::$_12" } +%"struct.btHashKey" = type { i32 } +%struct.btHullTriangle = type { %struct.int3, %struct.int3, i32, i32, float } +%struct.btMatrix3x3 = type { [3 x %struct.btSimdScalar] } +%"struct.btRaycastVehicle::btVehicleTuning" = type { float, float, float, float, float } +%struct.btRigidBody = type { %struct.btCollisionObject, %struct.btMatrix3x3, %struct.btSimdScalar, %struct.btSimdScalar, float, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, float, float, i8, float, float, float, float, float, float, %struct.btActionInterface*, %"struct.btAlignedObjectArray", i32, i32, i32 } +%struct.btSimdScalar = type { %"union.btSimdScalar::$_13" } +%struct.btSoftBody = type { [268 x i8], %"struct.btAlignedObjectArray", %"struct.btSoftBody::Config", %"struct.btRaycastVehicle::btVehicleTuning", %"struct.btSoftBody::Pose", i8*, %struct.btSoftBodyWorldInfo*, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", float, [2 x %struct.btSimdScalar], i8, %struct.btDbvt, %struct.btDbvt, %struct.btDbvt, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %struct.btTransform, %"struct.btAlignedObjectArray" } +%"struct.btSoftBody::Anchor" = type { %"struct.btSoftBody::Node"*, %struct.btSimdScalar, %struct.btRigidBody*, %struct.btMatrix3x3, %struct.btSimdScalar, float } +%"struct.btSoftBody::Body" = type { %"struct.btSoftBody::Cluster"*, %struct.btRigidBody*, %struct.btCollisionObject* } +%"struct.btSoftBody::Cluster" = type { %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %struct.btTransform, float, float, %struct.btMatrix3x3, %struct.btMatrix3x3, %struct.btSimdScalar, [2 x %struct.btSimdScalar], [2 x %struct.btSimdScalar], i32, i32, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btDbvtNode*, float, float, float, float, float, float, i8, i8, i32 } +%"struct.btSoftBody::Config" = type { i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray" } +%"struct.btSoftBody::Element" = type { i8* } +%"struct.btSoftBody::Face" = type { %"struct.btSoftBody::Feature", [3 x %"struct.btSoftBody::Node"*], %struct.btSimdScalar, float, %struct.btDbvtNode* } +%"struct.btSoftBody::Feature" = type { %"struct.btSoftBody::Element", %"struct.btSoftBody::Material"* } +%"struct.btSoftBody::Joint" = type { i32 (...)**, [2 x %"struct.btSoftBody::Body"], [2 x %struct.btSimdScalar], float, float, float, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btMatrix3x3, i8 } +%"struct.btSoftBody::Link" = type { %"struct.btSoftBody::Feature", [2 x %"struct.btSoftBody::Node"*], float, i8, float, float, float, %struct.btSimdScalar } +%"struct.btSoftBody::Material" = type { %"struct.btSoftBody::Element", float, float, float, i32 } +%"struct.btSoftBody::Node" = type { %"struct.btSoftBody::Feature", %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, float, float, %struct.btDbvtNode*, i8 } +%"struct.btSoftBody::Note" = type { %"struct.btSoftBody::Element", i8*, %struct.btSimdScalar, i32, [4 x %"struct.btSoftBody::Node"*], [4 x float] } +%"struct.btSoftBody::Pose" = type { i8, i8, float, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %struct.btSimdScalar, %struct.btMatrix3x3, %struct.btMatrix3x3, %struct.btMatrix3x3 } +%"struct.btSoftBody::RContact" = type { %"struct.btSoftBody::sCti", %"struct.btSoftBody::Node"*, %struct.btMatrix3x3, %struct.btSimdScalar, float, float, float } +%"struct.btSoftBody::SContact" = type { %"struct.btSoftBody::Node"*, %"struct.btSoftBody::Face"*, %struct.btSimdScalar, %struct.btSimdScalar, float, float, [2 x float] } +%"struct.btSoftBody::Tetra" = type { %"struct.btSoftBody::Feature", [4 x %"struct.btSoftBody::Node"*], float, %struct.btDbvtNode*, [4 x %struct.btSimdScalar], float, float } +%"struct.btSoftBody::sCti" = type { %struct.btCollisionObject*, %struct.btSimdScalar, float } +%struct.btSoftBodyWorldInfo = type { float, float, float, %struct.btSimdScalar, %struct.btActionInterface*, %struct.btActionInterface*, %struct.btSimdScalar, %"struct.btSparseSdf<3>" } +%"struct.btSparseSdf<3>" = type { %"struct.btAlignedObjectArray::Cell*>", float, i32, i32, i32, i32 } +%"struct.btSparseSdf<3>::Cell" = type { [4 x [4 x [4 x float]]], [3 x i32], i32, i32, %struct.btCollisionShape*, %"struct.btSparseSdf<3>::Cell"* } +%struct.btTransform = type { %struct.btMatrix3x3, %struct.btSimdScalar } +%struct.btTypedConstraint = type { i32 (...)**, %"struct.btHashKey", i32, i32, i8, %struct.btRigidBody*, %struct.btRigidBody*, float, float, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar } +%struct.int3 = type { i32, i32, i32 } +%"union.btDbvtNode::$_12" = type { [2 x %struct.btDbvtNode*] } +%"union.btSimdScalar::$_13" = type { <4 x float> } + +declare i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(%struct.HullLibrary*, %struct.HullDesc* nocapture, %struct.HullResult* nocapture) ssp align 2 + +define void @_ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi(%struct.btSoftBody* %psb, %struct.btActionInterface* %idraw, i32 %drawflags) ssp align 2 { +entry: + br i1 undef, label %bb92, label %bb58 + +bb58: ; preds = %entry + %0 = invoke i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(%struct.HullLibrary* undef, %struct.HullDesc* undef, %struct.HullResult* undef) + to label %invcont64 unwind label %lpad159 ; [#uses=0] + +invcont64: ; preds = %bb58 + br i1 undef, label %invcont65, label %bb.i.i + +bb.i.i: ; preds = %invcont64 + %1 = load <4 x float>* undef, align 16 ; <<4 x float>> [#uses=5] + br i1 undef, label %bb.nph.i.i, label %invcont65 + +bb.nph.i.i: ; preds = %bb.i.i + %tmp22.i.i = bitcast <4 x float> %1 to i128 ; [#uses=1] + %tmp23.i.i = trunc i128 %tmp22.i.i to i32 ; [#uses=1] + %2 = bitcast i32 %tmp23.i.i to float ; [#uses=1] + %tmp6.i = extractelement <4 x float> %1, i32 1 ; [#uses=1] + %tmp2.i = extractelement <4 x float> %1, i32 2 ; [#uses=1] + br label %bb1.i.i + +bb1.i.i: ; preds = %bb1.i.i, %bb.nph.i.i + %.tmp6.0.i.i = phi float [ %tmp2.i, %bb.nph.i.i ], [ %5, %bb1.i.i ] ; [#uses=1] + %.tmp5.0.i.i = phi float [ %tmp6.i, %bb.nph.i.i ], [ %4, %bb1.i.i ] ; [#uses=1] + %.tmp.0.i.i = phi float [ %2, %bb.nph.i.i ], [ %3, %bb1.i.i ] ; [#uses=1] + %3 = fadd float %.tmp.0.i.i, undef ; [#uses=2] + %4 = fadd float %.tmp5.0.i.i, undef ; [#uses=2] + %5 = fadd float %.tmp6.0.i.i, undef ; [#uses=2] + br i1 undef, label %bb2.return.loopexit_crit_edge.i.i, label %bb1.i.i + +bb2.return.loopexit_crit_edge.i.i: ; preds = %bb1.i.i + %tmp8.i = insertelement <4 x float> %1, float %3, i32 0 ; <<4 x float>> [#uses=1] + %tmp4.i = insertelement <4 x float> %tmp8.i, float %4, i32 1 ; <<4 x float>> [#uses=1] + %tmp.i = insertelement <4 x float> %tmp4.i, float %5, i32 2 ; <<4 x float>> [#uses=1] + br label %invcont65 + +invcont65: ; preds = %bb2.return.loopexit_crit_edge.i.i, %bb.i.i, %invcont64 + %.0.i = phi <4 x float> [ %tmp.i, %bb2.return.loopexit_crit_edge.i.i ], [ undef, %invcont64 ], [ %1, %bb.i.i ] ; <<4 x float>> [#uses=1] + %tmp15.i = extractelement <4 x float> %.0.i, i32 2 ; [#uses=1] + %6 = fmul float %tmp15.i, undef ; [#uses=1] + br label %bb.i265 + +bb.i265: ; preds = %bb.i265, %invcont65 + %7 = fsub float 0.000000e+00, %6 ; [#uses=1] + store float %7, float* undef, align 4 + br label %bb.i265 + +bb92: ; preds = %entry + unreachable + +lpad159: ; preds = %bb58 + unreachable +} From isanbard at gmail.com Tue Apr 6 15:07:21 2010 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 06 Apr 2010 20:07:21 -0000 Subject: [llvm-commits] [llvm] r100556 - /llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Message-ID: <20100406200721.C226C2A6C12C@llvm.org> Author: void Date: Tue Apr 6 15:07:21 2010 New Revision: 100556 URL: http://llvm.org/viewvc/llvm-project?rev=100556&view=rev Log: Make this hack more specific to the Apple gcc 4.2 non-LLVM compiler. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=100556&r1=100555&r2=100556&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Tue Apr 6 15:07:21 2010 @@ -1564,7 +1564,7 @@ public: // FIXME: Remove the "noinline" attribute once is // fixed. -#ifdef __GNUC__ +#if __GNUC__==4 && __GNUC_MINOR__==2 && defined(__APPLE__) && !defined(__llvm__) explicit __attribute__((__noinline__)) HandleSDNode(SDValue X) #else explicit HandleSDNode(SDValue X) From grosbach at apple.com Tue Apr 6 15:26:37 2010 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 06 Apr 2010 20:26:37 -0000 Subject: [llvm-commits] [llvm] r100559 - in /llvm/trunk/lib/Target/X86: X86.h X86MachineFunctionInfo.h X86RegisterInfo.cpp X86RegisterInfo.td X86TargetMachine.cpp Message-ID: <20100406202637.3DCFE2A6C12C@llvm.org> Author: grosbach Date: Tue Apr 6 15:26:37 2010 New Revision: 100559 URL: http://llvm.org/viewvc/llvm-project?rev=100559&view=rev Log: Fix PR6696 and PR6663 When a frame pointer is not otherwise required, and dynamic stack alignment is necessary solely due to the spilling of a register with larger alignment requirements than the default stack alignment, the frame pointer can be both used as a general purpose register and a frame pointer. That goes poorly, for obvious reasons. This patch brings back a bit of old logic for identifying the use of such registers and conservatively reserves the frame pointer during register allocation in such cases. For now, implement for X86 only since it's 32-bit linux which is hitting this, and we want a targeted fix for 2.7. As a follow-on, this will be expanded to handle other targets, as theoretically the problem could arise elsewhere as well. Modified: llvm/trunk/lib/Target/X86/X86.h llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp llvm/trunk/lib/Target/X86/X86RegisterInfo.td llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Modified: llvm/trunk/lib/Target/X86/X86.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.h?rev=100559&r1=100558&r2=100559&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86.h (original) +++ llvm/trunk/lib/Target/X86/X86.h Tue Apr 6 15:26:37 2010 @@ -69,6 +69,12 @@ /// FunctionPass *createEmitX86CodeToMemory(); +/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass +/// which determines whether the frame pointer register should be +/// reserved in case dynamic stack alignment is later required. +/// +FunctionPass *createX86MaxStackAlignmentHeuristicPass(); + extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace Modified: llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h?rev=100559&r1=100558&r2=100559&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h (original) +++ llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h Tue Apr 6 15:26:37 2010 @@ -52,6 +52,10 @@ /// relocation models. unsigned GlobalBaseReg; + /// ReserveFP - whether the function should reserve the frame pointer + /// when allocating, even if there may not actually be a frame pointer used. + bool ReserveFP; + public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), @@ -68,7 +72,8 @@ ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), - GlobalBaseReg(0) {} + GlobalBaseReg(0), + ReserveFP(false) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -90,6 +95,9 @@ unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + bool getReserveFP() const { return ReserveFP; } + void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; } }; } // End llvm namespace Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=100559&r1=100558&r2=100559&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Tue Apr 6 15:26:37 2010 @@ -1487,3 +1487,46 @@ } #include "X86GenRegisterInfo.inc" + +namespace { + struct MSAH : public MachineFunctionPass { + static char ID; + MSAH() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast(&MF.getTarget()); + const X86RegisterInfo *X86RI = TM->getRegisterInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + unsigned StackAlignment = X86RI->getStackAlignment(); + + // Be over-conservative: scan over all vreg defs and find whether vector + // registers are used. If yes, there is a possibility that vector register + // will be spilled and thus require dynamic stack realignment. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) { + FuncInfo->setReserveFP(true); + return true; + } + + // Nothing to do + return false; + } + + virtual const char *getPassName() const { + return "X86 Maximal Stack Alignment Check"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MSAH::ID = 0; +} + +FunctionPass* +llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); } Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=100559&r1=100558&r2=100559&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Tue Apr 6 15:26:37 2010 @@ -352,11 +352,12 @@ const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) // In 32-mode, none of the 8-bit registers aliases EBP or ESP. return begin() + 8; - else if (RI->hasFP(MF)) + else if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SPL or BPL. return array_endof(X86_GR8_AO_64) - 1; else @@ -396,9 +397,10 @@ const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return array_endof(X86_GR16_AO_64) - 1; else @@ -406,7 +408,7 @@ return array_endof(X86_GR16_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return begin() + 6; else @@ -447,9 +449,10 @@ const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return array_endof(X86_GR32_AO_64) - 1; else @@ -457,7 +460,7 @@ return array_endof(X86_GR32_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return begin() + 6; else @@ -484,9 +487,11 @@ const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. - if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF) || MFI->getReserveFP()) return end()-3; // If so, don't allocate RIP, RSP or RBP else return end()-2; // If not, just don't allocate RIP or RSP @@ -589,8 +594,9 @@ GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return end() - 2; else @@ -611,8 +617,9 @@ GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return end() - 2; else @@ -633,8 +640,9 @@ GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RIP, RSP or RBP. return end() - 3; else @@ -675,9 +683,10 @@ const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return array_endof(X86_GR32_NOSP_AO_64) - 1; else @@ -685,7 +694,7 @@ return array_endof(X86_GR32_NOSP_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return begin() + 6; else @@ -710,9 +719,11 @@ const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. - if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF) || MFI->getReserveFP()) return end()-1; // If so, don't allocate RBP else return end(); // If not, any reg in this class is ok. @@ -733,8 +744,9 @@ { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RBP. return end() - 1; else Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=100559&r1=100558&r2=100559&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original) +++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Tue Apr 6 15:26:37 2010 @@ -161,6 +161,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } From stuart at apple.com Tue Apr 6 15:33:21 2010 From: stuart at apple.com (Stuart Hastings) Date: Tue, 6 Apr 2010 13:33:21 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r100531 - in /llvm-gcc-4.2/trunk/gcc: llvm-backend.cpp llvm-convert.cpp llvm-debug.cpp llvm-debug.h llvm-internal.h In-Reply-To: <2D8A1959-2464-4BAC-A0F4-E0129AB90545@apple.com> References: <20100406171948.3F1562A6C12C@llvm.org> <2D8A1959-2464-4BAC-A0F4-E0129AB90545@apple.com> Message-ID: <1F9CBE71-EAF5-4B38-BFDC-9D20AF8259A2@apple.com> On Apr 6, 2010, at 11:49 AM, Devang Patel wrote: > Stuart, > > On Apr 6, 2010, at 10:19 AM, Stuart Hastings wrote: > >> Author: stuart >> Date: Tue Apr 6 12:19:47 2010 >> New Revision: 100531 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=100531&view=rev >> Log: >> Revise debug info machinery to digest nested functions and classes. >> >> A certain GDB testsuite case (local.cc) has a function nested inside a >> class nested inside another function. GCC presents the innermost >> function to llvm-convert first. Heretofore, the debug info mistakenly >> placed the inner function at module scope. This patch walks the GCC >> context links and instantiates the outer class and function so the >> debug info is properly nested. Radar 7426545. >> >> Modified: >> llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp >> llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp >> llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp >> llvm-gcc-4.2/trunk/gcc/llvm-debug.h >> llvm-gcc-4.2/trunk/gcc/llvm-internal.h [snip] >> /// performLateBackendInitialization - Set backend options that may only be >> @@ -533,8 +535,6 @@ >> } >> >> void llvm_lang_dependent_init(const char *Name) { >> - if (TheDebugInfo) >> - TheDebugInfo->Initialize(); > > What is the motivation behind this ? [snip] >> -TreeToLLVM::TreeToLLVM(tree fndecl) : >> - TD(getTargetData()), Builder(Context, *TheFolder) { >> - FnDecl = fndecl; >> +TreeToLLVM::TreeToLLVM(tree decl) : >> + TD(getTargetData()), Builder(Context, *TheFolder) { >> + // If this isn't a FUNCITON_DECL, use only the source loc info from it. >> + FnDecl = (decl && TREE_CODE(decl) == FUNCTION_DECL) ? decl : NULL_TREE; >> Fn = 0; >> ReturnBB = UnwindBB = 0; >> ReturnOffset = 0; >> >> if (EmitDebugInfo()) { >> - expanded_location Location = expand_location(DECL_SOURCE_LOCATION (fndecl)); >> + expanded_location Location = expand_location(DECL_SOURCE_LOCATION (decl)); >> >> if (Location.file) { >> TheDebugInfo->setLocationFile(Location.file); >> @@ -179,6 +180,7 @@ >> TheDebugInfo->setLocationFile(""); >> TheDebugInfo->setLocationLine(0); >> } >> + TheDebugInfo->Initialize(); >> } > > I do not think this is correct. DebugInfo::Initialize() should be called only once. Are you sure, you are not losing debug info with this change ? You may be right. I'll look into this. >> AllocaInsertionPoint = 0; >> @@ -188,13 +190,25 @@ >> FuncEHException = 0; >> FuncEHSelector = 0; >> FuncEHGetTypeID = 0; >> - >> - assert(TheTreeToLLVM == 0 && "Reentering function creation?"); >> - TheTreeToLLVM = this; >> } >> >> -TreeToLLVM::~TreeToLLVM() { >> - TheTreeToLLVM = 0; >> + >> +TreeToLLVM::~TreeToLLVM() {} >> + >> +TreeToLLVM *getTreeToLLVM(tree decl) { >> + // FIXME: should this static move into the TreeToLLVM class decl? >> + static std::map FunctionMap; > > Why not? I put this here for expediency, and later realized it might be "more elegant" if this moved to the class. It works fine as-is, so I guess this is a "preferred style" question. stuart From stuart at apple.com Tue Apr 6 16:38:29 2010 From: stuart at apple.com (Stuart Hastings) Date: Tue, 06 Apr 2010 21:38:29 -0000 Subject: [llvm-commits] [llvm] r100563 - in /llvm/trunk: lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h test/FrontendC++/2010-02-17-DbgArtificialArg.cpp Message-ID: <20100406213829.3C2652A6C12C@llvm.org> Author: stuart Date: Tue Apr 6 16:38:29 2010 New Revision: 100563 URL: http://llvm.org/viewvc/llvm-project?rev=100563&view=rev Log: Reverting 100530 & 100531 due to regressions in the GDB test suite. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100563&r1=100562&r2=100563&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Apr 6 16:38:29 2010 @@ -800,26 +800,12 @@ } else if (Context.isNameSpace()) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context.getNode())); ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context.getNode()), - /*MakeDecl=*/false); - ContextDIE->addChild(Die); } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode())) ContextDIE->addChild(Die); else ModuleCU->addDie(Die); } -/// isFunctionContext - True if given Context is nested within a function. -bool DwarfDebug::isFunctionContext(DIE *context) { - if (context == (DIE *)0) - return false; - if (context->getTag() == dwarf::DW_TAG_subprogram) - return true; - else - return isFunctionContext(context->getParent()); -} - /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { @@ -1001,10 +987,6 @@ if (DIDescriptor(ContainingType.getNode()).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType.getNode()))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } break; } default: @@ -1820,15 +1802,19 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { DISubprogram SP(N); + // Check for pre-existence. + if (ModuleCU->getDIE(N)) + return; + if (!SP.isDefinition()) // This is a method declaration which will be handled while constructing // class type. return; - // Check for pre-existence. - DIE *SubprogramDie = ModuleCU->getDIE(N); - if (!SubprogramDie) - SubprogramDie = createSubprogramDIE(SP); + DIE *SubprogramDie = createSubprogramDIE(SP); + + // Add to map. + ModuleCU->insertDIE(N, SubprogramDie); // Add to context owner. addToContextOwner(SubprogramDie, SP.getContext()); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100563&r1=100562&r2=100563&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Tue Apr 6 16:38:29 2010 @@ -332,9 +332,6 @@ /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); - /// isFunctionContext - True if given Context is nested within a function. - bool isFunctionContext(DIE *context); - /// addType - Add a new type attribute to the specified entity. void addType(DIE *Entity, DIType Ty); Modified: llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC%2B%2B/2010-02-17-DbgArtificialArg.cpp?rev=100563&r1=100562&r2=100563&view=diff ============================================================================== --- llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp (original) +++ llvm/trunk/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp Tue Apr 6 16:38:29 2010 @@ -1,4 +1,4 @@ -// RUN: %llvmgcc -g -S %s -o - | FileCheck %s +// RUN: %llvmgcc -g -S %s -o - | grep DW_TAG_pointer_type | grep "i32 524303, metadata .., metadata ..., metadata .., i32 ., i64 .., i64 .., i64 0, i32 64, metadata ..." // Here, second to last argument "i32 64" indicates that artificial type is set. // Test to artificial attribute attahed to "this" pointer type. // Radar 7655792 and 7655002 @@ -10,7 +10,5 @@ int foo() { A a; - // Matching "i32 64, metadata !} ; [ DW_TAG_pointer_type ]" - // CHECK: i32 64, metadata {{![0-9]+\} ; \[ DW_TAG_pointer_type \]}} return a.fn1(1); } From stuart at apple.com Tue Apr 6 16:38:33 2010 From: stuart at apple.com (Stuart Hastings) Date: Tue, 06 Apr 2010 21:38:33 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r100564 - in /llvm-gcc-4.2/trunk/gcc: llvm-backend.cpp llvm-convert.cpp llvm-debug.cpp llvm-debug.h llvm-internal.h Message-ID: <20100406213833.700C12A6C12D@llvm.org> Author: stuart Date: Tue Apr 6 16:38:33 2010 New Revision: 100564 URL: http://llvm.org/viewvc/llvm-project?rev=100564&view=rev Log: Reverting 100530 & 100531 due to regressions in the GDB test suite. Modified: llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.h llvm-gcc-4.2/trunk/gcc/llvm-internal.h Modified: llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp?rev=100564&r1=100563&r2=100564&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp Tue Apr 6 16:38:33 2010 @@ -512,8 +512,6 @@ if (!flag_pch_file && debug_info_level > DINFO_LEVEL_NONE) TheDebugInfo = new DebugInfo(TheModule); - else - TheDebugInfo = 0; } /// performLateBackendInitialization - Set backend options that may only be @@ -535,6 +533,8 @@ } void llvm_lang_dependent_init(const char *Name) { + if (TheDebugInfo) + TheDebugInfo->Initialize(); if (Name) TheModule->setModuleIdentifier(Name); } @@ -1010,7 +1010,7 @@ // Convert the AST to raw/ugly LLVM code. Function *Fn; { - TreeToLLVM *Emitter = getTreeToLLVM(fndecl); + TreeToLLVM Emitter(fndecl); enum symbol_visibility vis = DECL_VISIBILITY (fndecl); if (vis != VISIBILITY_DEFAULT) @@ -1018,7 +1018,7 @@ // visibility that's not supported by the target. targetm.asm_out.visibility(fndecl, vis); - Fn = Emitter->EmitFunction(); + Fn = Emitter.EmitFunction(); } #if 0 @@ -1317,10 +1317,6 @@ timevar_push(TV_LLVM_GLOBALS); - // Insure debug info machinery initialized, even if current module - // lacks functions. - getTreeToLLVM(decl); - // Get or create the global variable now. GlobalVariable *GV = cast(DECL_LLVM(decl)); Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=100564&r1=100563&r2=100564&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Tue Apr 6 16:38:33 2010 @@ -148,6 +148,7 @@ //===----------------------------------------------------------------------===// /// TheTreeToLLVM - Keep track of the current function being compiled. +static TreeToLLVM *TheTreeToLLVM = 0; const TargetData &getTargetData() { return *TheTarget->getTargetData(); @@ -156,22 +157,20 @@ /// EmitDebugInfo - Return true if debug info is to be emitted for current /// function. bool TreeToLLVM::EmitDebugInfo() { - if (TheDebugInfo && - (!getFUNCTION_DECL() || !DECL_IGNORED_P(getFUNCTION_DECL()))) + if (TheDebugInfo && !DECL_IGNORED_P(getFUNCTION_DECL())) return true; return false; } -TreeToLLVM::TreeToLLVM(tree decl) : - TD(getTargetData()), Builder(Context, *TheFolder) { - // If this isn't a FUNCITON_DECL, use only the source loc info from it. - FnDecl = (decl && TREE_CODE(decl) == FUNCTION_DECL) ? decl : NULL_TREE; +TreeToLLVM::TreeToLLVM(tree fndecl) : + TD(getTargetData()), Builder(Context, *TheFolder) { + FnDecl = fndecl; Fn = 0; ReturnBB = UnwindBB = 0; ReturnOffset = 0; if (EmitDebugInfo()) { - expanded_location Location = expand_location(DECL_SOURCE_LOCATION (decl)); + expanded_location Location = expand_location(DECL_SOURCE_LOCATION (fndecl)); if (Location.file) { TheDebugInfo->setLocationFile(Location.file); @@ -180,7 +179,6 @@ TheDebugInfo->setLocationFile(""); TheDebugInfo->setLocationLine(0); } - TheDebugInfo->Initialize(); } AllocaInsertionPoint = 0; @@ -190,25 +188,13 @@ FuncEHException = 0; FuncEHSelector = 0; FuncEHGetTypeID = 0; -} - -TreeToLLVM::~TreeToLLVM() {} + assert(TheTreeToLLVM == 0 && "Reentering function creation?"); + TheTreeToLLVM = this; +} -TreeToLLVM *getTreeToLLVM(tree decl) { - // FIXME: should this static move into the TreeToLLVM class decl? - static std::map FunctionMap; - TreeToLLVM *newTreeToLLVM = FunctionMap[decl]; - if (!newTreeToLLVM) { - tree fndecl = (decl && TREE_CODE(decl) == FUNCTION_DECL) ? decl : NULL_TREE; - newTreeToLLVM = FunctionMap[fndecl] = new TreeToLLVM(decl); - } - return newTreeToLLVM; -} - -TreeToLLVM *getCurrentTreeToLLVM(void) { - assert(current_function_decl && "no current_function_decl?"); - return getTreeToLLVM(current_function_decl); +TreeToLLVM::~TreeToLLVM() { + TheTreeToLLVM = 0; } /// getLabelDeclBlock - Lazily get and create a basic block for the specified @@ -322,8 +308,7 @@ assert(TREE_CODE(TREE_TYPE(ResultDecl)) == REFERENCE_TYPE && "Not type match and not passing by reference?"); // Create an alloca for the ResultDecl. - TreeToLLVM *Emitter = getCurrentTreeToLLVM(); - Value *Tmp = Emitter->CreateTemporary(AI->getType()); + Value *Tmp = TheTreeToLLVM->CreateTemporary(AI->getType()); Builder.CreateStore(AI, Tmp); SET_DECL_LLVM(ResultDecl, Tmp); @@ -466,7 +451,7 @@ } } -Function *TreeToLLVM::StartFunctionBody() { +void TreeToLLVM::StartFunctionBody() { const char *Name = ""; // Get the name of the function. if (tree ID = DECL_ASSEMBLER_NAME(FnDecl)) @@ -625,10 +610,10 @@ // Set the BLOCK_NUMBER()s to the depth of each lexical block. setLexicalBlockDepths(FnDecl, block_declared_vars, 1); - if (TheDebugInfo) { - TheDebugInfo->EmitFunctionStart(FnDecl); - Builder.GetInsertBlock(); - } + SeenBlocks.clear(); + + if (EmitDebugInfo()) + TheDebugInfo->EmitFunctionStart(FnDecl, Fn, Builder.GetInsertBlock()); // Loop over all of the arguments to the function, setting Argument names and // creating argument alloca's for the PARM_DECLs in case their address is @@ -643,7 +628,7 @@ ABIConverter.HandleReturnType(TREE_TYPE(TREE_TYPE(FnDecl)), FnDecl, DECL_BUILT_IN(FnDecl)); // Remember this for use by FinishFunctionBody. - ReturnOffset = Client.Offset; + TheTreeToLLVM->ReturnOffset = Client.Offset; // Prepend the static chain (if any) to the list of arguments. tree Args = static_chain ? static_chain : DECL_ARGUMENTS(FnDecl); @@ -724,7 +709,12 @@ block_declared_vars.count(TREE_VALUE(t)) == 0) EmitAutomaticVariableDecl(TREE_VALUE(t)); } - return Fn; + + // Push the outermost lexical block onto the RegionStack. + switchLexicalBlock(DECL_INITIAL(FnDecl)); + + // Create a new block for the return node, but don't insert it yet. + ReturnBB = BasicBlock::Create(Context, "return"); } Function *TreeToLLVM::FinishFunctionBody() { @@ -812,19 +802,9 @@ } Function *TreeToLLVM::EmitFunction() { - // Set up parameters for the function. + // Set up parameters and prepare for return, for the function. StartFunctionBody(); - // We'll remember the lexical BLOCKs we've seen here. - SeenBlocks.clear(); - - // FIXME: Should these two statements move to StartFunctionBody() ? - // Push the outermost lexical block onto the RegionStack. - switchLexicalBlock(DECL_INITIAL(FnDecl)); - - // Create a new block for the return node, but don't insert it yet. - ReturnBB = BasicBlock::Create(Context, "return"); - // Emit the body of the function iterating over all BBs basic_block bb; edge e; @@ -2636,7 +2616,7 @@ if (!Loc) { // A value. Store to a temporary, and return the temporary's address. // Any future access to this argument will reuse the same address. - Loc = getCurrentTreeToLLVM()->CreateTemporary(TheValue->getType()); + Loc = TheTreeToLLVM->CreateTemporary(TheValue->getType()); Builder.CreateStore(TheValue, Loc); } return Loc; @@ -2676,7 +2656,7 @@ assert(ConvertType(type) == cast(RetBuf.Ptr->getType())->getElementType() && "Inconsistent result types!"); - getCurrentTreeToLLVM()->EmitAggregateCopy(*DestLoc, RetBuf, type); + TheTreeToLLVM->EmitAggregateCopy(*DestLoc, RetBuf, type); return 0; } else { // Read out the scalar return value now. @@ -2719,7 +2699,7 @@ if (DestLoc == 0) { // The result is unused, but still needs to be stored somewhere. - Value *Buf = getCurrentTreeToLLVM()->CreateTemporary(PtrArgTy->getElementType()); + Value *Buf = TheTreeToLLVM->CreateTemporary(PtrArgTy->getElementType()); CallOperands.push_back(Buf); } else if (useReturnSlot) { // Letting the call write directly to the final destination is safe and @@ -2729,7 +2709,7 @@ // Letting the call write directly to the final destination may not be // safe (eg: if DestLoc aliases a parameter) and is not required - pass // a buffer and copy it to DestLoc after the call. - RetBuf = getCurrentTreeToLLVM()->CreateTempLoc(PtrArgTy->getElementType()); + RetBuf = TheTreeToLLVM->CreateTempLoc(PtrArgTy->getElementType()); CallOperands.push_back(RetBuf.Ptr); } @@ -2750,7 +2730,7 @@ "Call returns a scalar but caller expects aggregate!"); // Create a buffer to hold the result. The result will be loaded out of // it after the call. - RetBuf = getCurrentTreeToLLVM()->CreateTempLoc(PtrArgTy->getElementType()); + RetBuf = TheTreeToLLVM->CreateTempLoc(PtrArgTy->getElementType()); CallOperands.push_back(RetBuf.Ptr); // Note the use of a shadow argument. @@ -2774,7 +2754,7 @@ if (Loc->getType() != CalledTy) { assert(type && "Inconsistent parameter types?"); bool isSigned = !TYPE_UNSIGNED(type); - Loc = getCurrentTreeToLLVM()->CastToAnyType(Loc, isSigned, CalledTy, false); + Loc = TheTreeToLLVM->CastToAnyType(Loc, isSigned, CalledTy, false); } } @@ -8477,18 +8457,18 @@ /// EmitLV_LABEL_DECL - Someone took the address of a label. Constant *TreeConstantToLLVM::EmitLV_LABEL_DECL(tree exp) { - assert(getCurrentTreeToLLVM() && + assert(TheTreeToLLVM && "taking the address of a label while not compiling the function!"); // Figure out which function this is for, verify it's the one we're compiling. if (DECL_CONTEXT(exp)) { assert(TREE_CODE(DECL_CONTEXT(exp)) == FUNCTION_DECL && "Address of label in nested function?"); - assert(getCurrentTreeToLLVM()->getFUNCTION_DECL() == DECL_CONTEXT(exp) && + assert(TheTreeToLLVM->getFUNCTION_DECL() == DECL_CONTEXT(exp) && "Taking the address of a label that isn't in the current fn!?"); } - return getCurrentTreeToLLVM()->EmitLV_LABEL_DECL(exp); + return TheTreeToLLVM->EmitLV_LABEL_DECL(exp); } Constant *TreeConstantToLLVM::EmitLV_COMPLEX_CST(tree exp) { Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=100564&r1=100563&r2=100564&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Tue Apr 6 16:38:33 2010 @@ -289,10 +289,12 @@ setCurrentLexicalBlock(desired); } -/// CreateSubprogramFromFnDecl - Constructs the debug code for -/// entering a function - "llvm.dbg.func.start." +/// EmitFunctionStart - Constructs the debug code for entering a function - +/// "llvm.dbg.func.start." +void DebugInfo::EmitFunctionStart(tree FnDecl, Function *Fn, + BasicBlock *CurBB) { + setCurrentLexicalBlock(FnDecl); -DISubprogram DebugInfo::CreateSubprogramFromFnDecl(tree FnDecl) { DIType FNType = getOrCreateType(TREE_TYPE(FnDecl)); std::map::iterator I = SPCache.find(FnDecl); @@ -300,9 +302,12 @@ DISubprogram SPDecl(cast(I->second)); DISubprogram SP = DebugFactory.CreateSubprogramDefinition(SPDecl); - if (SP.getNode() != SPDecl.getNode()) - SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); - return SP; + SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); + + // Push function on region stack. + RegionStack.push_back(WeakVH(SP.getNode())); + RegionMap[FnDecl] = WeakVH(SP.getNode()); + return; } bool ArtificialFnWithAbstractOrigin = false; @@ -324,13 +329,12 @@ DISubprogram SPDecl(cast(I->second)); DISubprogram SP = DebugFactory.CreateSubprogramDefinition(SPDecl); - if (SP.getNode() != SPDecl.getNode()) - SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); + SPDecl.getNode()->replaceAllUsesWith(SP.getNode()); // Push function on region stack. RegionStack.push_back(WeakVH(SP.getNode())); RegionMap[FnDecl] = WeakVH(SP.getNode()); - return SP; + return; } // Gather location information. @@ -352,36 +356,23 @@ } StringRef FnName = getFunctionName(FnDecl); - // If the Function * hasn't been created yet, use a bogus value for - // the debug internal linkage bit. - bool hasInternalLinkage = true; - if (GET_DECL_LLVM_INDEX(FnDecl)) { - Function *Fn = castDECL_LLVM(FnDecl); - hasInternalLinkage = Fn->hasInternalLinkage(); - } + DISubprogram SP = DebugFactory.CreateSubprogram(SPContext, FnName, FnName, LinkageName, getOrCreateFile(Loc.file), lineno, FNType, - hasInternalLinkage, + Fn->hasInternalLinkage(), true /*definition*/, Virtuality, VIndex, ContainingType); SPCache[FnDecl] = WeakVH(SP.getNode()); - RegionMap[FnDecl] = WeakVH(SP.getNode()); - return SP; -} -/// EmitFunctionStart - Constructs the debug code for entering a function - -/// "llvm.dbg.func.start", and pushes it onto the RegionStack. -void DebugInfo::EmitFunctionStart(tree FnDecl) { - setCurrentLexicalBlock(FnDecl); - DISubprogram SP = CreateSubprogramFromFnDecl(FnDecl); // Push function on region stack. RegionStack.push_back(WeakVH(SP.getNode())); + RegionMap[FnDecl] = WeakVH(SP.getNode()); } /// getOrCreateNameSpace - Get name space descriptor for the tree node. @@ -414,20 +405,12 @@ DIType Ty = getOrCreateType(Node); return DIDescriptor(Ty.getNode()); } else if (DECL_P (Node)) { - switch (TREE_CODE(Node)) { - default: - /// What kind of DECL is this? - return findRegion (DECL_CONTEXT (Node)); - case NAMESPACE_DECL: { + if (TREE_CODE (Node) == NAMESPACE_DECL) { DIDescriptor NSContext = findRegion(DECL_CONTEXT(Node)); DINameSpace NS = getOrCreateNameSpace(Node, NSContext); return DIDescriptor(NS.getNode()); } - case FUNCTION_DECL: { - DISubprogram SP = CreateSubprogramFromFnDecl(Node); - return SP; - } - } + return findRegion (DECL_CONTEXT (Node)); } else if (TREE_CODE(Node) == BLOCK) { // TREE_BLOCK is GCC's lexical block. // Recursively create all necessary contexts: @@ -640,7 +623,7 @@ sprintf(FwdTypeName, "fwd.type.%d", FwdTypeCount++); llvm::DIType FwdType = DebugFactory.CreateCompositeType(llvm::dwarf::DW_TAG_subroutine_type, - findRegion(TYPE_CONTEXT(type)), + getOrCreateFile(main_input_filename), FwdTypeName, getOrCreateFile(main_input_filename), 0, 0, 0, 0, 0, @@ -726,10 +709,9 @@ return Ty; } - tree type_with_context = TYPE_CONTEXT(type) ? type : TREE_TYPE(type); StringRef PName = FromTy.getName(); DIType PTy = - DebugFactory.CreateDerivedType(Tag, findRegion(type_with_context), + DebugFactory.CreateDerivedType(Tag, findRegion(TYPE_CONTEXT(type)), Tag == DW_TAG_pointer_type ? StringRef() : PName, getOrCreateFile(main_input_filename), Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.h?rev=100564&r1=100563&r2=100564&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.h Tue Apr 6 16:38:33 2010 @@ -118,13 +118,9 @@ // by GCC's cfglayout.c:change_scope(). void change_regions(tree_node *desired, tree_node *grand); - /// CreateSubprogramFromFnDecl - Constructs the debug code for entering a function - - /// "llvm.dbg.func.start." - DISubprogram CreateSubprogramFromFnDecl(tree_node *FnDecl); - /// EmitFunctionStart - Constructs the debug code for entering a function - - /// "llvm.dbg.func.start", and pushes it onto the RegionStack. - void EmitFunctionStart(tree_node *FnDecl); + /// "llvm.dbg.func.start." + void EmitFunctionStart(tree_node *FnDecl, Function *Fn, BasicBlock *CurBB); /// EmitFunctionEnd - Constructs the debug code for exiting a declarative /// region - "llvm.dbg.region.end." Modified: llvm-gcc-4.2/trunk/gcc/llvm-internal.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-internal.h?rev=100564&r1=100563&r2=100564&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-internal.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-internal.h Tue Apr 6 16:38:33 2010 @@ -75,7 +75,7 @@ extern llvm::Module *TheModule; /// TheDebugInfo - This object is responsible for gather all debug information. -/// If its value is NULL then no debug information should be gathered. +/// If it's value is NULL then no debug information should be gathered. extern llvm::DebugInfo *TheDebugInfo; /// TheTarget - The current target being compiled for. @@ -281,7 +281,6 @@ BasicBlock *ReturnBB; BasicBlock *UnwindBB; unsigned ReturnOffset; - // Lexical BLOCKS that we have previously seen and processed. treeset SeenBlocks; @@ -398,10 +397,6 @@ // allocation would change with -g, and users dislike that. void switchLexicalBlock(tree_node *exp); - /// StartFunctionBody - Start the emission of 'FnDecl', outputing all - /// declarations for parameters and setting things up. - Function *StartFunctionBody(); - private: // Helper functions. // Walk over the lexical BLOCK() tree of the given FUNCTION_DECL; @@ -410,6 +405,10 @@ // the given set. void setLexicalBlockDepths(tree_node *t, treeset &s, unsigned level); + /// StartFunctionBody - Start the emission of 'fndecl', outputing all + /// declarations for parameters and setting things up. + void StartFunctionBody(); + /// FinishFunctionBody - Once the body of the function has been emitted, this /// cleans up and returns the result function. Function *FinishFunctionBody(); @@ -609,9 +608,6 @@ Constant *EmitLV_LABEL_DECL(tree_node *exp); }; -/// Locate a previously exiting TreeToLLVM. Construct one if necessary. -TreeToLLVM *getTreeToLLVM(tree_node *fndecl); - /// TreeConstantToLLVM - An instance of this class is created and used to /// convert tree constant values to LLVM. This is primarily for things like /// global variable initializers. From bob.wilson at apple.com Tue Apr 6 16:56:06 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 06 Apr 2010 21:56:06 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r100565 - /llvm-gcc-4.2/trunk/gcc/c-parser.c Message-ID: <20100406215606.314E52A6C12C@llvm.org> Author: bwilson Date: Tue Apr 6 16:56:05 2010 New Revision: 100565 URL: http://llvm.org/viewvc/llvm-project?rev=100565&view=rev Log: Disable the "ISO C forbids braced-groups within expressions" pedantic warning. This is needed for llvm's implementation of the ARM NEON intrinsics, which use macros with statements expressions instead of inline functions. This warning made is impossible to compile NEON intrinsics with "-pedantic -Werror". Radar 7833512. Modified: llvm-gcc-4.2/trunk/gcc/c-parser.c Modified: llvm-gcc-4.2/trunk/gcc/c-parser.c URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/c-parser.c?rev=100565&r1=100564&r2=100565&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/c-parser.c (original) +++ llvm-gcc-4.2/trunk/gcc/c-parser.c Tue Apr 6 16:56:05 2010 @@ -5643,8 +5643,17 @@ c_parser_compound_statement_nostart (parser); c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); +/* LLVM LOCAL begin */ + /* Disable this warning for the sake of ARM NEON intrinsics, which + are implemented as macros with statement expressions in llvm-gcc + since the inliner is not run later and the arguments need to be + visible to the front-end. Otherwise, it is not possible to + compile NEON intrinsics with "-pedantic -Werror". */ +#ifndef ENABLE_LLVM if (pedantic) pedwarn ("ISO C forbids braced-groups within expressions"); +#endif +/* LLVM LOCAL end */ expr.value = c_finish_stmt_expr (stmt); expr.original_code = ERROR_MARK; } From dalej at apple.com Tue Apr 6 16:59:56 2010 From: dalej at apple.com (Dale Johannesen) Date: Tue, 06 Apr 2010 21:59:56 -0000 Subject: [llvm-commits] [llvm] r100566 - /llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Message-ID: <20100406215956.C3F582A6C12C@llvm.org> Author: johannes Date: Tue Apr 6 16:59:56 2010 New Revision: 100566 URL: http://llvm.org/viewvc/llvm-project?rev=100566&view=rev Log: Allow for the possibility that a debug-value points to a SDNode that didn't have code generated for it. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp?rev=100566&r1=100565&r2=100566&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Tue Apr 6 16:59:56 2010 @@ -517,8 +517,19 @@ const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { - AddOperand(&*MIB, SDValue(SD->getSDNode(), SD->getResNo()), - (*MIB).getNumOperands(), &II, VRBaseMap, true /*IsDebug*/); + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + DenseMap::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + MIB.addReg(0U); // undef + else + AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + true /*IsDebug*/); } else if (SD->getKind() == SDDbgValue::CONST) { Value *V = SD->getConst(); if (ConstantInt *CI = dyn_cast(V)) { From bob.wilson at apple.com Tue Apr 6 17:02:24 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 06 Apr 2010 22:02:24 -0000 Subject: [llvm-commits] [llvm] r100568 - /llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Message-ID: <20100406220224.BE4A82A6C12C@llvm.org> Author: bwilson Date: Tue Apr 6 17:02:24 2010 New Revision: 100568 URL: http://llvm.org/viewvc/llvm-project?rev=100568&view=rev Log: Expand SELECT and SELECT_CC for NEON vector types. Radar 7770501. Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=100568&r1=100567&r2=100568&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Apr 6 17:02:24 2010 @@ -90,6 +90,8 @@ setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); From eli.friedman at gmail.com Tue Apr 6 17:03:31 2010 From: eli.friedman at gmail.com (Eli Friedman) Date: Tue, 6 Apr 2010 15:03:31 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r100565 - /llvm-gcc-4.2/trunk/gcc/c-parser.c In-Reply-To: <20100406215606.314E52A6C12C@llvm.org> References: <20100406215606.314E52A6C12C@llvm.org> Message-ID: On Tue, Apr 6, 2010 at 2:56 PM, Bob Wilson wrote: > Author: bwilson > Date: Tue Apr ?6 16:56:05 2010 > New Revision: 100565 > > URL: http://llvm.org/viewvc/llvm-project?rev=100565&view=rev > Log: > Disable the "ISO C forbids braced-groups within expressions" pedantic > warning. ?This is needed for llvm's implementation of the ARM NEON intrinsics, > which use macros with statements expressions instead of inline functions. > This warning made is impossible to compile NEON intrinsics with "-pedantic > -Werror". ?Radar 7833512. Umm, this isn't really good... would it be possible to change the header in question to use __extension__ to avoid the warnings? -Eli From dalej at apple.com Tue Apr 6 17:05:58 2010 From: dalej at apple.com (Dale Johannesen) Date: Tue, 6 Apr 2010 15:05:58 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r100565 - /llvm-gcc-4.2/trunk/gcc/c-parser.c In-Reply-To: References: <20100406215606.314E52A6C12C@llvm.org> Message-ID: <33DCC7F9-796C-43C3-ADB9-6241B9141AAC@apple.com> On Apr 6, 2010, at 3:03 PMPDT, Eli Friedman wrote: > On Tue, Apr 6, 2010 at 2:56 PM, Bob Wilson wrote: >> Author: bwilson >> Date: Tue Apr 6 16:56:05 2010 >> New Revision: 100565 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=100565&view=rev >> Log: >> Disable the "ISO C forbids braced-groups within expressions" pedantic >> warning. This is needed for llvm's implementation of the ARM NEON intrinsics, >> which use macros with statements expressions instead of inline functions. >> This warning made is impossible to compile NEON intrinsics with "-pedantic >> -Werror". Radar 7833512. > > Umm, this isn't really good... would it be possible to change the > header in question to use __extension__ to avoid the warnings? And does clang have the same issue? From baldrick at free.fr Tue Apr 6 17:07:55 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Apr 2010 00:07:55 +0200 Subject: [llvm-commits] [llvm] r100559 - in /llvm/trunk/lib/Target/X86: X86.h X86MachineFunctionInfo.h X86RegisterInfo.cpp X86RegisterInfo.td X86TargetMachine.cpp In-Reply-To: <20100406202637.3DCFE2A6C12C@llvm.org> References: <20100406202637.3DCFE2A6C12C@llvm.org> Message-ID: <4BBBB0BB.2090804@free.fr> > Fix PR6696 and PR6663 Thanks for fixing this Jim - it was pretty nasty. Ciao, Duncan. From bob.wilson at apple.com Tue Apr 6 17:18:46 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 6 Apr 2010 15:18:46 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r100565 - /llvm-gcc-4.2/trunk/gcc/c-parser.c In-Reply-To: <33DCC7F9-796C-43C3-ADB9-6241B9141AAC@apple.com> References: <20100406215606.314E52A6C12C@llvm.org> <33DCC7F9-796C-43C3-ADB9-6241B9141AAC@apple.com> Message-ID: On Apr 6, 2010, at 3:05 PM, Dale Johannesen wrote: > > On Apr 6, 2010, at 3:03 PMPDT, Eli Friedman wrote: > >> On Tue, Apr 6, 2010 at 2:56 PM, Bob Wilson wrote: >>> Author: bwilson >>> Date: Tue Apr 6 16:56:05 2010 >>> New Revision: 100565 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=100565&view=rev >>> Log: >>> Disable the "ISO C forbids braced-groups within expressions" pedantic >>> warning. This is needed for llvm's implementation of the ARM NEON intrinsics, >>> which use macros with statements expressions instead of inline functions. >>> This warning made is impossible to compile NEON intrinsics with "-pedantic >>> -Werror". Radar 7833512. >> >> Umm, this isn't really good... would it be possible to change the >> header in question to use __extension__ to avoid the warnings? > > And does clang have the same issue? Clang has not yet implemented the NEON intrinsics, and when it does it should not be hard to avoid the problems that llvm-gcc has. llvm-gcc inherited a bunch of issues from the original gcc implementation of the NEON intrinsics, which we tried to reuse. In retrospect we might have been better off starting from scratch.... From dalej at apple.com Tue Apr 6 17:21:07 2010 From: dalej at apple.com (Dale Johannesen) Date: Tue, 06 Apr 2010 22:21:07 -0000 Subject: [llvm-commits] [llvm] r100573 - in /llvm/trunk/lib: CodeGen/AsmPrinter/AsmPrinter.cpp Target/X86/AsmPrinter/X86MCInstLower.cpp Message-ID: <20100406222107.8BBF32A6C12C@llvm.org> Author: johannes Date: Tue Apr 6 17:21:07 2010 New Revision: 100573 URL: http://llvm.org/viewvc/llvm-project?rev=100573&view=rev Log: Move printing of DEBUG_VALUE comments to target-independent place. There is probably a more elegant way to do this. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100573&r1=100572&r2=100573&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Apr 6 17:21:07 2010 @@ -434,7 +434,59 @@ AP.OutStreamer.AddBlankLine(); } - +static void EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + char buf[100]; + std::string Str = "\t"; + Str += AP.MAI->getCommentString(); + Str += "DEBUG_VALUE: "; + // This code handles only the 3-operand target-independent form. + assert(MI->getNumOperands() == 3); + + // cast away const; DIetc do not take const operands for some reason. + DIVariable V((MDNode*)(MI->getOperand(2).getMetadata())); + Str += V.getName(); + Str += " <- "; + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + sprintf(buf, "%e", APF.convertToFloat()); + Str += buf; + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + sprintf(buf, "%e", APF.convertToDouble()); + Str += buf; + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + Str += "(long double) "; + sprintf(buf, "%e", APF.convertToDouble()); + Str += buf; + } + } else if (MI->getOperand(0).isImm()) { + sprintf(buf, "%lld", MI->getOperand(0).getImm()); + Str += buf; + } else if (MI->getOperand(0).isReg()) { + if (MI->getOperand(0).getReg() == 0) { + // Suppress offset, it is not meaningful here. + Str += "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(Twine(Str)); + return; + } + Str += AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + } else + llvm_unreachable("Unknown operand type"); + + Str += '+'; + sprintf(buf, "%lld", MI->getOperand(1).getImm()); + Str += buf; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(Twine(Str)); +} /// EmitFunctionBody - This method emits the body and trailer for a /// function. @@ -473,6 +525,9 @@ case TargetOpcode::INLINEASM: EmitInlineAsm(II); break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) EmitDebugValueComment(II, *this); + break; case TargetOpcode::IMPLICIT_DEF: if (isVerbose()) EmitImplicitDef(II, *this); break; @@ -1236,7 +1291,7 @@ if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex - // api needed to prevent premature destruction + // API needed to prevent premature destruction APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.isVerbose()) { @@ -1266,8 +1321,8 @@ assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); - // All long double variants are printed as hex api needed to prevent - // premature destruction. + // All long double variants are printed as hex + // API needed to prevent premature destruction. APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.TM.getTargetData()->isBigEndian()) { Modified: llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp?rev=100573&r1=100572&r2=100573&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Tue Apr 6 17:21:07 2010 @@ -326,76 +326,10 @@ } } -void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &O) { - // FIXME: if this is implemented for another target before it goes - // away completely, the common part should be moved into AsmPrinter. - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - unsigned NOps = MI->getNumOperands(); - // cast away const; DIetc do not take const operands for some reason. - DIVariable V((MDNode*)(MI->getOperand(NOps-1).getMetadata())); - O << V.getName(); - O << " <- "; - if (NOps==3) { - // Register or immediate value. Register 0 means undef. - assert(MI->getOperand(0).isReg() || - MI->getOperand(0).isImm() || - MI->getOperand(0).isFPImm()); - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - - if (MI->getOperand(0).isFPImm()) { - // This is more naturally done in printOperand, but since the only use - // of such an operand is in this comment and that is temporary (and it's - // ugly), we prefer to keep this localized. - // The include of Type.h may be removable when this code is. - if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() || - MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) - MI->getOperand(0).print(O, &TM); - else { - // There is no good way to print long double. Convert a copy to - // double. Ah well, it's only a comment. - bool ignored; - APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); - O << "(long double) " << APF.convertToDouble(); - } - } else - printOperand(MI, 0, O); - } else { - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); - O << ']'; - } - O << "+"; - printOperand(MI, NOps-2, O); -} - - void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(OutContext, Mang, *this); switch (MI->getOpcode()) { - case TargetOpcode::DBG_VALUE: - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - std::string TmpStr; - raw_string_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which From bob.wilson at apple.com Tue Apr 6 17:22:42 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 6 Apr 2010 15:22:42 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r100565 - /llvm-gcc-4.2/trunk/gcc/c-parser.c In-Reply-To: References: <20100406215606.314E52A6C12C@llvm.org> Message-ID: On Apr 6, 2010, at 3:03 PM, Eli Friedman wrote: > On Tue, Apr 6, 2010 at 2:56 PM, Bob Wilson wrote: >> Author: bwilson >> Date: Tue Apr 6 16:56:05 2010 >> New Revision: 100565 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=100565&view=rev >> Log: >> Disable the "ISO C forbids braced-groups within expressions" pedantic >> warning. This is needed for llvm's implementation of the ARM NEON intrinsics, >> which use macros with statements expressions instead of inline functions. >> This warning made is impossible to compile NEON intrinsics with "-pedantic >> -Werror". Radar 7833512. > > Umm, this isn't really good... would it be possible to change the > header in question to use __extension__ to avoid the warnings? It's not really good, but is it really that bad? Is the world really going to miss one pedantic warning? ;-) That said, I had forgotten about __extension__, and it seems like that ought to work. Thanks for the suggestion. From grosbach at apple.com Tue Apr 6 17:46:04 2010 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 6 Apr 2010 15:46:04 -0700 Subject: [llvm-commits] [llvm] r100559 - in /llvm/trunk/lib/Target/X86: X86.h X86MachineFunctionInfo.h X86RegisterInfo.cpp X86RegisterInfo.td X86TargetMachine.cpp In-Reply-To: <4BBBB0BB.2090804@free.fr> References: <20100406202637.3DCFE2A6C12C@llvm.org> <4BBBB0BB.2090804@free.fr> Message-ID: On Apr 6, 2010, at 3:07 PM, Duncan Sands wrote: >> Fix PR6696 and PR6663 > > Thanks for fixing this Jim - it was pretty nasty. Glad to do so. It's definitely good to get this taken care. Thanks for isolating the testcases. That helped a lot. -j From dalej at apple.com Tue Apr 6 17:45:26 2010 From: dalej at apple.com (Dale Johannesen) Date: Tue, 06 Apr 2010 22:45:26 -0000 Subject: [llvm-commits] [llvm] r100578 - in /llvm/trunk/lib: CodeGen/AsmPrinter/AsmPrinter.cpp Target/X86/AsmPrinter/X86MCInstLower.cpp Message-ID: <20100406224526.973F22A6C12C@llvm.org> Author: johannes Date: Tue Apr 6 17:45:26 2010 New Revision: 100578 URL: http://llvm.org/viewvc/llvm-project?rev=100578&view=rev Log: Revert 100573, it's causing some testsuite problems. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100578&r1=100577&r2=100578&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Apr 6 17:45:26 2010 @@ -434,59 +434,7 @@ AP.OutStreamer.AddBlankLine(); } -static void EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { - char buf[100]; - std::string Str = "\t"; - Str += AP.MAI->getCommentString(); - Str += "DEBUG_VALUE: "; - // This code handles only the 3-operand target-independent form. - assert(MI->getNumOperands() == 3); - - // cast away const; DIetc do not take const operands for some reason. - DIVariable V((MDNode*)(MI->getOperand(2).getMetadata())); - Str += V.getName(); - Str += " <- "; - - // Register or immediate value. Register 0 means undef. - if (MI->getOperand(0).isFPImm()) { - APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); - if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { - sprintf(buf, "%e", APF.convertToFloat()); - Str += buf; - } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { - sprintf(buf, "%e", APF.convertToDouble()); - Str += buf; - } else { - // There is no good way to print long double. Convert a copy to - // double. Ah well, it's only a comment. - bool ignored; - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); - Str += "(long double) "; - sprintf(buf, "%e", APF.convertToDouble()); - Str += buf; - } - } else if (MI->getOperand(0).isImm()) { - sprintf(buf, "%lld", MI->getOperand(0).getImm()); - Str += buf; - } else if (MI->getOperand(0).isReg()) { - if (MI->getOperand(0).getReg() == 0) { - // Suppress offset, it is not meaningful here. - Str += "undef"; - // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.EmitRawText(Twine(Str)); - return; - } - Str += AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); - } else - llvm_unreachable("Unknown operand type"); - - Str += '+'; - sprintf(buf, "%lld", MI->getOperand(1).getImm()); - Str += buf; - // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.EmitRawText(Twine(Str)); -} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. @@ -525,9 +473,6 @@ case TargetOpcode::INLINEASM: EmitInlineAsm(II); break; - case TargetOpcode::DBG_VALUE: - if (isVerbose()) EmitDebugValueComment(II, *this); - break; case TargetOpcode::IMPLICIT_DEF: if (isVerbose()) EmitImplicitDef(II, *this); break; @@ -1291,7 +1236,7 @@ if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex - // API needed to prevent premature destruction + // api needed to prevent premature destruction APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.isVerbose()) { @@ -1321,8 +1266,8 @@ assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); - // All long double variants are printed as hex - // API needed to prevent premature destruction. + // All long double variants are printed as hex api needed to prevent + // premature destruction. APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.TM.getTargetData()->isBigEndian()) { Modified: llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp?rev=100578&r1=100577&r2=100578&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Tue Apr 6 17:45:26 2010 @@ -326,10 +326,76 @@ } } +void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &O) { + // FIXME: if this is implemented for another target before it goes + // away completely, the common part should be moved into AsmPrinter. + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + unsigned NOps = MI->getNumOperands(); + // cast away const; DIetc do not take const operands for some reason. + DIVariable V((MDNode*)(MI->getOperand(NOps-1).getMetadata())); + O << V.getName(); + O << " <- "; + if (NOps==3) { + // Register or immediate value. Register 0 means undef. + assert(MI->getOperand(0).isReg() || + MI->getOperand(0).isImm() || + MI->getOperand(0).isFPImm()); + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { + // Suppress offset in this case, it is not meaningful. + O << "undef"; + OutStreamer.AddBlankLine(); + return; + } + + if (MI->getOperand(0).isFPImm()) { + // This is more naturally done in printOperand, but since the only use + // of such an operand is in this comment and that is temporary (and it's + // ugly), we prefer to keep this localized. + // The include of Type.h may be removable when this code is. + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() || + MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) + MI->getOperand(0).print(O, &TM); + else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + O << "(long double) " << APF.convertToDouble(); + } + } else + printOperand(MI, 0, O); + } else { + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { + // Suppress offset in this case, it is not meaningful. + O << "undef"; + OutStreamer.AddBlankLine(); + return; + } + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); + O << ']'; + } + O << "+"; + printOperand(MI, NOps-2, O); +} + + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(OutContext, Mang, *this); switch (MI->getOpcode()) { - + case TargetOpcode::DBG_VALUE: + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + std::string TmpStr; + raw_string_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which From rjmccall at apple.com Tue Apr 6 18:35:53 2010 From: rjmccall at apple.com (John McCall) Date: Tue, 06 Apr 2010 23:35:53 -0000 Subject: [llvm-commits] [llvm] r100581 - in /llvm/trunk/lib: CodeGen/AsmPrinter/DwarfDebug.cpp CodeGen/VirtRegRewriter.cpp Target/SystemZ/SystemZRegisterInfo.cpp Target/X86/SSEDomainFix.cpp Target/X86/X86FastISel.cpp Message-ID: <20100406233553.5D3BC2A6C12C@llvm.org> Author: rjmccall Date: Tue Apr 6 18:35:53 2010 New Revision: 100581 URL: http://llvm.org/viewvc/llvm-project?rev=100581&view=rev Log: Fix a number of clang -Wsign-compare warnings that didn't have an obvious solution. The only reason these don't fire with gcc-4.2 is that gcc turns off part of -Wsign-compare in C++ on accident. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp llvm/trunk/lib/Target/X86/SSEDomainFix.cpp llvm/trunk/lib/Target/X86/X86FastISel.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100581&r1=100580&r2=100581&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Apr 6 18:35:53 2010 @@ -1542,7 +1542,7 @@ const APInt FltVal = FPImm.bitcastToAPInt(); const char *FltPtr = (const char*)FltVal.getRawData(); - unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getTargetData().isLittleEndian(); int Incr = (LittleEndian ? 1 : -1); int Start = (LittleEndian ? 0 : NumBytes - 1); Modified: llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp?rev=100581&r1=100580&r2=100581&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp (original) +++ llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp Tue Apr 6 18:35:53 2010 @@ -895,7 +895,7 @@ bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; int SSorRMId = DoReMat - ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat; + ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; // Back-schedule reloads and remats. MachineBasicBlock::iterator InsertLoc = Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=100581&r1=100580&r2=100581&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Tue Apr 6 18:35:53 2010 @@ -200,7 +200,7 @@ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D) - .addReg(SystemZ::R15D).addImm((isSub ? -(int64_t)ThisVal : ThisVal)); + .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal); // The PSW implicit def is dead. MI->getOperand(3).setIsDead(); Offset -= ThisVal; Modified: llvm/trunk/lib/Target/X86/SSEDomainFix.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/SSEDomainFix.cpp?rev=100581&r1=100580&r2=100581&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/SSEDomainFix.cpp (original) +++ llvm/trunk/lib/Target/X86/SSEDomainFix.cpp Tue Apr 6 18:35:53 2010 @@ -159,7 +159,7 @@ // We just need them to be consecutive, ordering doesn't matter. assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); reg -= X86::XMM0; - return reg < NumRegs ? reg : -1; + return reg < NumRegs ? (int) reg : -1; } DomainValue *SSEDomainFixPass::Alloc(int domain) { Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=100581&r1=100580&r2=100581&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original) +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Tue Apr 6 18:35:53 2010 @@ -305,7 +305,7 @@ if (Opc) { addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) - .addImm(Signed ? CI->getSExtValue() : + .addImm(Signed ? (uint64_t) CI->getSExtValue() : CI->getZExtValue()); return true; } From stoklund at 2pi.dk Tue Apr 6 18:44:44 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 06 Apr 2010 23:44:44 -0000 Subject: [llvm-commits] [llvm] r100584 - /llvm/trunk/test/FrontendC/2009-12-07-BitFieldAlignment.c Message-ID: <20100406234444.DE0772A6C12C@llvm.org> Author: stoklund Date: Tue Apr 6 18:44:44 2010 New Revision: 100584 URL: http://llvm.org/viewvc/llvm-project?rev=100584&view=rev Log: Let that which does not matter truly slide. This test only cares about alignment, so don't test for other cruft. An upcoming llvm-gcc patch needs this. Modified: llvm/trunk/test/FrontendC/2009-12-07-BitFieldAlignment.c Modified: llvm/trunk/test/FrontendC/2009-12-07-BitFieldAlignment.c URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC/2009-12-07-BitFieldAlignment.c?rev=100584&r1=100583&r2=100584&view=diff ============================================================================== --- llvm/trunk/test/FrontendC/2009-12-07-BitFieldAlignment.c (original) +++ llvm/trunk/test/FrontendC/2009-12-07-BitFieldAlignment.c Tue Apr 6 18:44:44 2010 @@ -9,7 +9,7 @@ }; void f0(struct S *a) { -// CHECK: %3 = load i32* %2, align 4 -// CHECK: store i32 %4, i32* %2, align 4 +// CHECK: load {{.*}}, align 4 +// CHECK: store {{.*}}, align 4 a->e = 0; } From baldrick at free.fr Tue Apr 6 18:50:12 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Apr 2010 01:50:12 +0200 Subject: [llvm-commits] [llvm] r100581 - in /llvm/trunk/lib: CodeGen/AsmPrinter/DwarfDebug.cpp CodeGen/VirtRegRewriter.cpp Target/SystemZ/SystemZRegisterInfo.cpp Target/X86/SSEDomainFix.cpp Target/X86/X86FastISel.cpp In-Reply-To: <20100406233553.5D3BC2A6C12C@llvm.org> References: <20100406233553.5D3BC2A6C12C@llvm.org> Message-ID: <4BBBC8B4.90701@free.fr> Hi John, > - unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. > + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. it doesn't make any sense for the bit width to be negative, so maybe getBitWidth() should be changed to return an unsigned value instead. Ciao, Duncan. From dpatel at apple.com Tue Apr 6 18:53:48 2010 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Apr 2010 23:53:48 -0000 Subject: [llvm-commits] [llvm] r100586 - in /llvm/trunk: lib/CodeGen/AsmPrinter/DwarfDebug.cpp test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll Message-ID: <20100406235348.5BCB92A6C12C@llvm.org> Author: dpatel Date: Tue Apr 6 18:53:48 2010 New Revision: 100586 URL: http://llvm.org/viewvc/llvm-project?rev=100586&view=rev Log: Do not emit specification DIE with DW_AT_specification attribute for member functions of a funcation local class. This trips gdb's partial scan of DIEs at load time. Fixes Radar 7833483. Added: llvm/trunk/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100586&r1=100585&r2=100586&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Apr 6 18:53:48 2010 @@ -1332,6 +1332,19 @@ return AScope; } +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool isSubprogramContext(MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext().getNode()); + return false; +} + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert @@ -1347,7 +1360,8 @@ // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && !SP.getContext().isSubprogram()) { + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext().getNode())) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. @@ -1766,7 +1780,8 @@ // Do not create specification DIE if context is either compile unit // or a subprogram. if (DI_GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !GVContext.isSubprogram()) { + !GVContext.isFile() && + !isSubprogramContext(GVContext.getNode())) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, Added: llvm/trunk/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll?rev=100586&view=auto ============================================================================== --- llvm/trunk/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll (added) +++ llvm/trunk/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll Tue Apr 6 18:53:48 2010 @@ -0,0 +1,89 @@ +; RUN: llvm-as < %s | llc -asm-verbose -O0 | grep AT_specification | count 2 +; Radar 7833483 +; Do not emit AT_specification for nested function foo. + +%class.A = type { i8 } +%class.B = type { i8 } + +define i32 @main() ssp { +entry: + %retval = alloca i32, align 4 ; [#uses=3] + %b = alloca %class.A, align 1 ; <%class.A*> [#uses=1] + store i32 0, i32* %retval + call void @llvm.dbg.declare(metadata !{%class.A* %b}, metadata !0), !dbg !14 + %call = call i32 @_ZN1B2fnEv(%class.A* %b), !dbg !15 ; [#uses=1] + store i32 %call, i32* %retval, !dbg !15 + %0 = load i32* %retval, !dbg !16 ; [#uses=1] + ret i32 %0, !dbg !16 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 { +entry: + %retval = alloca i32, align 4 ; [#uses=2] + %this.addr = alloca %class.A*, align 8 ; <%class.A**> [#uses=2] + %a = alloca %class.A, align 1 ; <%class.A*> [#uses=1] + %i = alloca i32, align 4 ; [#uses=2] + store %class.A* %this, %class.A** %this.addr + call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !17), !dbg !18 + %this1 = load %class.A** %this.addr ; <%class.A*> [#uses=0] + call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !19), !dbg !27 + call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !28), !dbg !29 + %call = call i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %a), !dbg !30 ; [#uses=1] + store i32 %call, i32* %i, !dbg !30 + %tmp = load i32* %i, !dbg !31 ; [#uses=1] + store i32 %tmp, i32* %retval, !dbg !31 + %0 = load i32* %retval, !dbg !32 ; [#uses=1] + ret i32 %0, !dbg !32 +} + +define internal i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %this) ssp align 2 { +entry: + %retval = alloca i32, align 4 ; [#uses=2] + %this.addr = alloca %class.A*, align 8 ; <%class.A**> [#uses=2] + store %class.A* %this, %class.A** %this.addr + call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !33), !dbg !34 + %this1 = load %class.A** %this.addr ; <%class.A*> [#uses=0] + store i32 42, i32* %retval, !dbg !35 + %0 = load i32* %retval, !dbg !35 ; [#uses=1] + ret i32 %0, !dbg !35 +} + +!0 = metadata !{i32 524544, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8} ; [ DW_TAG_auto_variable ] +!1 = metadata !{i32 524299, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ] +!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ] +!3 = metadata !{i32 524329, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ] +!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp", metadata !"clang 1.5", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ] +!6 = metadata !{metadata !7} +!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 524290, metadata !3, metadata !"B", metadata !3, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ] +!9 = metadata !{metadata !10} +!10 = metadata !{i32 524334, i32 0, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ] +!12 = metadata !{metadata !7, metadata !13} +!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ] +!14 = metadata !{i32 16, i32 5, metadata !1, null} +!15 = metadata !{i32 17, i32 3, metadata !1, null} +!16 = metadata !{i32 18, i32 1, metadata !2, null} +!17 = metadata !{i32 524545, metadata !10, metadata !"this", metadata !3, i32 4, metadata !13} ; [ DW_TAG_arg_variable ] +!18 = metadata !{i32 4, i32 7, metadata !10, null} +!19 = metadata !{i32 524544, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21} ; [ DW_TAG_auto_variable ] +!20 = metadata !{i32 524299, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ] +!21 = metadata !{i32 524290, metadata !10, metadata !"A", metadata !3, i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ] +!22 = metadata !{metadata !23} +!23 = metadata !{i32 524334, i32 0, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ] +!24 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ] +!25 = metadata !{metadata !7, metadata !26} +!26 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ] +!27 = metadata !{i32 9, i32 7, metadata !20, null} +!28 = metadata !{i32 524544, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7} ; [ DW_TAG_auto_variable ] +!29 = metadata !{i32 10, i32 9, metadata !20, null} +!30 = metadata !{i32 10, i32 5, metadata !20, null} +!31 = metadata !{i32 11, i32 5, metadata !20, null} +!32 = metadata !{i32 12, i32 3, metadata !10, null} +!33 = metadata !{i32 524545, metadata !23, metadata !"this", metadata !3, i32 7, metadata !26} ; [ DW_TAG_arg_variable ] +!34 = metadata !{i32 7, i32 11, metadata !23, null} +!35 = metadata !{i32 7, i32 19, metadata !36, null} +!36 = metadata !{i32 524299, metadata !23, i32 7, i32 17} ; [ DW_TAG_lexical_block ] From rjmccall at apple.com Tue Apr 6 18:56:30 2010 From: rjmccall at apple.com (John McCall) Date: Tue, 6 Apr 2010 16:56:30 -0700 Subject: [llvm-commits] [llvm] r100581 - in /llvm/trunk/lib: CodeGen/AsmPrinter/DwarfDebug.cpp CodeGen/VirtRegRewriter.cpp Target/SystemZ/SystemZRegisterInfo.cpp Target/X86/SSEDomainFix.cpp Target/X86/X86FastISel.cpp In-Reply-To: <4BBBC8B4.90701@free.fr> References: <20100406233553.5D3BC2A6C12C@llvm.org> <4BBBC8B4.90701@free.fr> Message-ID: <0B0EF282-69D7-466C-9B8D-7D9091A30999@apple.com> On Apr 6, 2010, at 4:50 PM, Duncan Sands wrote: >> - unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. >> + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. > > it doesn't make any sense for the bit width to be negative, so maybe > getBitWidth() should be changed to return an unsigned value instead. It does return an unsigned value. The -Wsign-compare warnings are actually downstream of this; changing the type of NumBytes just happened to be the nicest way of fixing them. Dividing by 8 should be sufficient to suppress any unsigned-to-signed warnings for the variable initialization. John. From evan.cheng at apple.com Tue Apr 6 19:41:17 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 07 Apr 2010 00:41:17 -0000 Subject: [llvm-commits] [llvm] r100592 - in /llvm/trunk: include/llvm/CodeGen/Passes.h lib/CodeGen/LLVMTargetMachine.cpp lib/CodeGen/MachineLICM.cpp Message-ID: <20100407004117.741782A6C12C@llvm.org> Author: evancheng Date: Tue Apr 6 19:41:17 2010 New Revision: 100592 URL: http://llvm.org/viewvc/llvm-project?rev=100592&view=rev Log: Post regalloc LICM. Work in progress. Modified: llvm/trunk/include/llvm/CodeGen/Passes.h llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp llvm/trunk/lib/CodeGen/MachineLICM.cpp Modified: llvm/trunk/include/llvm/CodeGen/Passes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/Passes.h?rev=100592&r1=100591&r2=100592&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/Passes.h (original) +++ llvm/trunk/include/llvm/CodeGen/Passes.h Tue Apr 6 19:41:17 2010 @@ -170,7 +170,7 @@ /// createMachineLICMPass - This pass performs LICM on machine instructions. /// - FunctionPass *createMachineLICMPass(); + FunctionPass *createMachineLICMPass(bool PreRegAlloc = true); /// createMachineSinkingPass - This pass performs sinking on machine /// instructions. Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=100592&r1=100591&r2=100592&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original) +++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Tue Apr 6 19:41:17 2010 @@ -66,6 +66,9 @@ cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +static cl::opt PostRAMachineLICM("postra-machine-licm", cl::Hidden, + cl::desc("Enable post-regalloc Machine LICM")); + static cl::opt AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), cl::init(cl::BOU_UNSET)); @@ -343,6 +346,10 @@ // kill markers. PM.add(createStackSlotColoringPass(false)); printAndVerify(PM, "After StackSlotColoring"); + + // Run post-ra machine LICM to hoist reloads / remats. + if (PostRAMachineLICM) + PM.add(createMachineLICMPass(false)); } // Run post-ra passes. Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=100592&r1=100591&r2=100592&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Tue Apr 6 19:41:17 2010 @@ -22,8 +22,8 @@ #define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,6 +33,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -41,20 +42,23 @@ STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); +STATISTIC(NumPostRAHoisted, + "Number of machine instructions hoisted out of loops post regalloc"); namespace { class MachineLICM : public MachineFunctionPass { - MachineConstantPool *MCP; + bool PreRegAlloc; + const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - BitVector AllocatableSet; + const MachineFrameInfo *MFI; + MachineRegisterInfo *RegInfo; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. MachineLoopInfo *LI; // Current MachineLoopInfo MachineDominatorTree *DT; // Machine dominator tree for the cur loop - MachineRegisterInfo *RegInfo; // Machine register information // State that is updated as we process loops bool Changed; // True if a loop is changed. @@ -62,11 +66,18 @@ MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + BitVector AllocatableSet; + // For each opcode, keep a list of potentail CSE instructions. DenseMap > CSEMap; + public: static char ID; // Pass identification, replacement for typeid - MachineLICM() : MachineFunctionPass(&ID) {} + MachineLICM() : + MachineFunctionPass(&ID), PreRegAlloc(true) {} + + explicit MachineLICM(bool PreRA) : + MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -106,6 +117,7 @@ /// pass without iteration. /// void HoistRegion(MachineDomTreeNode *N); + void HoistRegionPostRA(MachineDomTreeNode *N); /// isLoadFromConstantMemory - Return true if the given instruction is a /// load from constant memory. @@ -133,6 +145,7 @@ /// that is safe to hoist, this instruction is called to do the dirty work. /// void Hoist(MachineInstr *MI); + void HoistPostRA(MachineInstr *MI); /// InitCSEMap - Initialize the CSE map with instructions that are in the /// current loop preheader that may become duplicates of instructions that @@ -145,7 +158,9 @@ static RegisterPass X("machinelicm", "Machine Loop Invariant Code Motion"); -FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); } +FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { + return new MachineLICM(PreRegAlloc); +} /// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most /// loop that has a preheader. @@ -161,13 +176,16 @@ /// loop. /// bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "******** Machine LICM ********\n"); + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); Changed = FirstInLoop = false; - MCP = MF.getConstantPool(); TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); + MFI = MF.getFrameInfo(); RegInfo = &MF.getRegInfo(); AllocatableSet = TRI->getAllocatableSet(MF); @@ -196,13 +214,147 @@ // CSEMap is initialized for loop header when the first instruction is // being hoisted. FirstInLoop = true; - HoistRegion(DT->getNode(CurLoop->getHeader())); - CSEMap.clear(); + MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + if (!PreRegAlloc) + HoistRegionPostRA(N); + else { + HoistRegion(N); + CSEMap.clear(); + } } return Changed; } +void MachineLICM::HoistRegionPostRA(MachineDomTreeNode *N) { + assert(N != 0 && "Null dominator tree node?"); + + unsigned NumRegs = TRI->getNumRegs(); + unsigned *PhysRegDefs = new unsigned[NumRegs]; + std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); + + SmallVector, 32> Candidates; + SmallSet StoredFIs; + + // Walk the entire region, count number of defs for each register, and + // return potential LICM candidates. + SmallVector WorkList; + WorkList.push_back(N); + do { + N = WorkList.pop_back_val(); + MachineBasicBlock *BB = N->getBlock(); + + if (!CurLoop->contains(BB)) + continue; + // Conservatively treat live-in's as an external def. + for (MachineBasicBlock::const_livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + ++PhysRegDefs[Reg]; + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + ++PhysRegDefs[*SR]; + } + + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + bool RuledOut = false; + bool SeenDef = false; + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Not expecting virtual register!"); + + if (MO.isDef()) { + SeenDef = true; + if (++PhysRegDefs[Reg] > 1) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + if (++PhysRegDefs[*SR] > 1) + RuledOut = true; + } + } + + // FIXME: Only consider reloads for now. + bool SkipCheck = false; + int FI; + if (SeenDef && !RuledOut) { + if (TII->isLoadFromStackSlot(MI, FI) && + MFI->isSpillSlotObjectIndex(FI)) { + Candidates.push_back(std::make_pair(MI, FI)); + SkipCheck = true; + } + } + + // If MI is a store to a stack slot, remember the slot. An instruction + // loads from this slot cannot be a LICM candidate. + if (SkipCheck && TII->isStoreToStackSlot(MI, FI)) + StoredFIs.insert(FI); + } + + const std::vector &Children = N->getChildren(); + for (unsigned I = 0, E = Children.size(); I != E; ++I) + WorkList.push_back(Children[I]); + } while (!WorkList.empty()); + + // Now evaluate whether the potential candidates qualify. + // 1. Check if the candidate defined register is defined by another + // instruction in the loop. + // 2. If the candidate is a load from stack slot (always true for now), + // check if the slot is stored anywhere in the loop. + for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { + bool Safe = true; + int FI = Candidates[i].second; + if (StoredFIs.count(FI)) + continue; + + MachineInstr *MI = Candidates[i].first; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef() && PhysRegDefs[Reg] > 1) { + Safe = false; + break; + } + } + + if (Safe) + HoistPostRA(MI); + } +} + +void MachineLICM::HoistPostRA(MachineInstr *MI) { + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + dbgs() << "Hoisting " << *MI; + if (CurPreheader->getBasicBlock()) + dbgs() << " to MachineBasicBlock " + << CurPreheader->getName(); + if (MI->getParent()->getBasicBlock()) + dbgs() << " from MachineBasicBlock " + << MI->getParent()->getName(); + dbgs() << "\n"; + }); + + // Splice the instruction to the preheader. + CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); + + ++NumPostRAHoisted; + Changed = true; +} + /// HoistRegion - Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions @@ -223,7 +375,6 @@ } const std::vector &Children = N->getChildren(); - for (unsigned I = 0, E = Children.size(); I != E; ++I) HoistRegion(Children[I]); } From dalej at apple.com Tue Apr 6 20:15:14 2010 From: dalej at apple.com (Dale Johannesen) Date: Wed, 07 Apr 2010 01:15:14 -0000 Subject: [llvm-commits] [llvm] r100596 - in /llvm/trunk/lib: CodeGen/AsmPrinter/AsmPrinter.cpp CodeGen/SelectionDAG/FastISel.cpp Target/X86/AsmPrinter/X86MCInstLower.cpp Message-ID: <20100407011514.70B602A6C12C@llvm.org> Author: johannes Date: Tue Apr 6 20:15:14 2010 New Revision: 100596 URL: http://llvm.org/viewvc/llvm-project?rev=100596&view=rev Log: Move printing of target-indepedent DEBUG_VALUE comments into AsmPrinter. Target-dependent form is still generated by FastISel and still handled in X86 code. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100596&r1=100595&r2=100596&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Apr 6 20:15:14 2010 @@ -434,7 +434,64 @@ AP.OutStreamer.AddBlankLine(); } +/// EmitDebugValueComment - This method handles the target-independent form +/// of DBG_VALUE, returning true if it was able to do so. A false return +/// means the target will need to handle MI in EmitInstruction. +static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + char buf[100]; + std::string Str = "\t"; + Str += AP.MAI->getCommentString(); + Str += "DEBUG_VALUE: "; + // This code handles only the 3-operand target-independent form. + if (MI->getNumOperands() != 3) + return false; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V((MDNode*)(MI->getOperand(2).getMetadata())); + Str += V.getName(); + Str += " <- "; + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + sprintf(buf, "%e", APF.convertToFloat()); + Str += buf; + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + sprintf(buf, "%e", APF.convertToDouble()); + Str += buf; + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + Str += "(long double) "; + sprintf(buf, "%e", APF.convertToDouble()); + Str += buf; + } + } else if (MI->getOperand(0).isImm()) { + sprintf(buf, "%lld", MI->getOperand(0).getImm()); + Str += buf; + } else if (MI->getOperand(0).isReg()) { + if (MI->getOperand(0).getReg() == 0) { + // Suppress offset, it is not meaningful here. + Str += "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(Twine(Str)); + return true; + } + Str += AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + } else + llvm_unreachable("Unknown operand type"); + + Str += '+'; + sprintf(buf, "%lld", MI->getOperand(1).getImm()); + Str += buf; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(Twine(Str)); + return true; +} /// EmitFunctionBody - This method emits the body and trailer for a /// function. @@ -473,6 +530,12 @@ case TargetOpcode::INLINEASM: EmitInlineAsm(II); break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) { + if (!EmitDebugValueComment(II, *this)) + EmitInstruction(II); + } + break; case TargetOpcode::IMPLICIT_DEF: if (isVerbose()) EmitImplicitDef(II, *this); break; @@ -1236,7 +1299,7 @@ if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex - // api needed to prevent premature destruction + // API needed to prevent premature destruction APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.isVerbose()) { @@ -1266,8 +1329,8 @@ assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); - // All long double variants are printed as hex api needed to prevent - // premature destruction. + // All long double variants are printed as hex + // API needed to prevent premature destruction. APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.TM.getTargetData()->isBigEndian()) { Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=100596&r1=100595&r2=100596&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Tue Apr 6 20:15:14 2010 @@ -332,6 +332,8 @@ Value *Address = DI->getAddress(); if (!Address) return true; + if (isa(Address)) + return true; AllocaInst *AI = dyn_cast(Address); // Don't handle byval struct arguments or VLAs, for example. if (!AI) break; @@ -348,7 +350,7 @@ return true; } case Intrinsic::dbg_value: { - // This requires target support, but right now X86 is the only Fast target. + // This form of DBG_VALUE is target-independent. DbgValueInst *DI = cast(I); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); Value *V = DI->getValue(); Modified: llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp?rev=100596&r1=100595&r2=100596&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Tue Apr 6 20:15:14 2010 @@ -328,62 +328,24 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, raw_ostream &O) { - // FIXME: if this is implemented for another target before it goes - // away completely, the common part should be moved into AsmPrinter. - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // Only the target-dependent form of DBG_VALUE should get here. + // Referencing the offset and metadata as NOps-2 and NOps-1 is + // probably portable to other targets; frame pointer location is not. unsigned NOps = MI->getNumOperands(); + assert(NOps==7); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; // cast away const; DIetc do not take const operands for some reason. DIVariable V((MDNode*)(MI->getOperand(NOps-1).getMetadata())); O << V.getName(); O << " <- "; - if (NOps==3) { - // Register or immediate value. Register 0 means undef. - assert(MI->getOperand(0).isReg() || - MI->getOperand(0).isImm() || - MI->getOperand(0).isFPImm()); - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - - if (MI->getOperand(0).isFPImm()) { - // This is more naturally done in printOperand, but since the only use - // of such an operand is in this comment and that is temporary (and it's - // ugly), we prefer to keep this localized. - // The include of Type.h may be removable when this code is. - if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() || - MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) - MI->getOperand(0).print(O, &TM); - else { - // There is no good way to print long double. Convert a copy to - // double. Ah well, it's only a comment. - bool ignored; - APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); - O << "(long double) " << APF.convertToDouble(); - } - } else - printOperand(MI, 0, O); - } else { - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); - O << ']'; - } + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); + O << ']'; O << "+"; printOperand(MI, NOps-2, O); } - void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(OutContext, Mang, *this); switch (MI->getOpcode()) { @@ -395,7 +357,7 @@ OutStreamer.EmitRawText(StringRef(OS.str())); } return; - + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which From rjmccall at apple.com Tue Apr 6 20:49:15 2010 From: rjmccall at apple.com (John McCall) Date: Wed, 07 Apr 2010 01:49:15 -0000 Subject: [llvm-commits] [llvm] r100599 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20100407014915.9D34E2A6C12C@llvm.org> Author: rjmccall Date: Tue Apr 6 20:49:15 2010 New Revision: 100599 URL: http://llvm.org/viewvc/llvm-project?rev=100599&view=rev Log: Clean up some signedness oddities in this code noticed by clang. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=100599&r1=100598&r2=100599&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 6 20:49:15 2010 @@ -3440,7 +3440,7 @@ /// FIXME: split into pslldqi, psrldqi, palignr variants. static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - int NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); isLeft = true; unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); @@ -3452,11 +3452,12 @@ } bool SeenV1 = false; bool SeenV2 = false; - for (int i = NumZeros; i < NumElems; ++i) { - int Val = isLeft ? (i - NumZeros) : i; - int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); - if (Idx < 0) + for (unsigned i = NumZeros; i < NumElems; ++i) { + unsigned Val = isLeft ? (i - NumZeros) : i; + int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); + if (Idx_ < 0) continue; + unsigned Idx = (unsigned) Idx_; if (Idx < NumElems) SeenV1 = true; else { From sanjiv.gupta at microchip.com Tue Apr 6 22:36:01 2010 From: sanjiv.gupta at microchip.com (Sanjiv Gupta) Date: Wed, 07 Apr 2010 03:36:01 -0000 Subject: [llvm-commits] [llvm] r100601 - in /llvm/trunk/lib/Target/PIC16: PIC16.h PIC16ISelLowering.cpp PIC16InstrInfo.cpp Message-ID: <20100407033601.EB8BF2A6C12C@llvm.org> Author: sgupta Date: Tue Apr 6 22:36:01 2010 New Revision: 100601 URL: http://llvm.org/viewvc/llvm-project?rev=100601&view=rev Log: Fix memory leaks for external symbol name strings. Modified: llvm/trunk/lib/Target/PIC16/PIC16.h llvm/trunk/lib/Target/PIC16/PIC16ISelLowering.cpp llvm/trunk/lib/Target/PIC16/PIC16InstrInfo.cpp Modified: llvm/trunk/lib/Target/PIC16/PIC16.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16.h?rev=100601&r1=100600&r2=100601&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16.h (original) +++ llvm/trunk/lib/Target/PIC16/PIC16.h Tue Apr 6 22:36:01 2010 @@ -21,6 +21,7 @@ #include #include #include +#include namespace llvm { class PIC16TargetMachine; @@ -52,17 +53,34 @@ UDATA_SHR }; + class ESNames { + std::vector stk; + ESNames() {} + public: + ~ESNames() { + std::vector::iterator it = stk.end(); + it--; + while(stk.end() != stk.begin()) + { + char* p = *it; + delete [] p; + it--; + stk.pop_back(); + } + } - // External symbol names require memory to live till the program end. - // So we have to allocate it and keep. - // FIXME: Don't leak the allocated strings. - inline static const char *createESName (const std::string &name) { - char *tmpName = new char[name.size() + 1]; - memcpy(tmpName, name.c_str(), name.size() + 1); - return tmpName; - } - + // External symbol names require memory to live till the program end. + // So we have to allocate it and keep. Push all such allocations into a + // vector so that they get freed up on termination. + inline static const char *createESName (const std::string &name) { + static ESNames esn; + char *tmpName = new char[name.size() + 1]; + memcpy(tmpName, name.c_str(), name.size() + 1); + esn.stk.push_back(tmpName); + return tmpName; + } + }; inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) { switch (CC) { Modified: llvm/trunk/lib/Target/PIC16/PIC16ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16ISelLowering.cpp?rev=100601&r1=100600&r2=100601&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/PIC16/PIC16ISelLowering.cpp Tue Apr 6 22:36:01 2010 @@ -116,7 +116,7 @@ std::string Fullname = prefix + tagname + Basename; // The name has to live through program life. - return createESName(Fullname); + return ESNames::createESName(Fullname); } // getStdLibCallName - Get the name for the standard library function. @@ -139,7 +139,7 @@ std::string LibCallName = prefix + BaseName; // The name has to live through program life. - return createESName(LibCallName); + return ESNames::createESName(LibCallName); } // PIC16TargetLowering Constructor. @@ -737,7 +737,7 @@ unsigned FIndex = FR->getIndex(); const char *tmpName; if (FIndex < ReservedFrameCount) { - tmpName = createESName(PAN::getFrameLabel(Name)); + tmpName = ESNames::createESName(PAN::getFrameLabel(Name)); ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); Offset = 0; for (unsigned i=0; igetObjectSize(FIndex)); } @@ -1077,7 +1077,7 @@ // Put the value on stack. // Get a stack slot index and convert to es. int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); // Store the value to ES. @@ -1275,7 +1275,7 @@ const Function *F = MF.getFunction(); std::string FuncName = F->getName(); - const char *tmpName = createESName(PAN::getFrameLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); SDValue BS = DAG.getConstant(1, MVT::i8); SDValue RetVal; @@ -1419,11 +1419,11 @@ } // Label for argument passing - const char *argFrame = createESName(PAN::getArgsLabel(Name)); + const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name)); ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8); // Label for reading return value - const char *retName = createESName(PAN::getRetvalLabel(Name)); + const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name)); RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8); } else { // if indirect call @@ -1683,7 +1683,7 @@ InitReservedFrameCount(F); // Create the .args external symbol. - const char *tmpName = createESName(PAN::getArgsLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); // Load arg values from the label + offset. Modified: llvm/trunk/lib/Target/PIC16/PIC16InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16InstrInfo.cpp?rev=100601&r1=100600&r2=100601&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/PIC16/PIC16InstrInfo.cpp Tue Apr 6 22:36:01 2010 @@ -78,7 +78,7 @@ const Function *Func = MBB.getParent()->getFunction(); const std::string FuncName = Func->getName(); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); // On the order of operands here: think "movwf SrcReg, tmp_slot, offset". if (RC == PIC16::GPRRegisterClass) { @@ -120,7 +120,7 @@ const Function *Func = MBB.getParent()->getFunction(); const std::string FuncName = Func->getName(); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); // On the order of operands here: think "movf FrameIndex, W". if (RC == PIC16::GPRRegisterClass) { From sabre at nondot.org Tue Apr 6 23:08:57 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 07 Apr 2010 04:08:57 -0000 Subject: [llvm-commits] [llvm] r100602 - in /llvm/trunk/lib/AsmParser: LLParser.cpp LLParser.h Message-ID: <20100407040858.0B5FF2A6C12C@llvm.org> Author: lattner Date: Tue Apr 6 23:08:57 2010 New Revision: 100602 URL: http://llvm.org/viewvc/llvm-project?rev=100602&view=rev Log: fix a crash on invalid metadata, e.g.: call i32 @foo(), XXXX We would return the error without inserting the new instruction into the program, so it wouldn't get deallocated, and an abort would trigger when the module was deleted. Modified: llvm/trunk/lib/AsmParser/LLParser.cpp llvm/trunk/lib/AsmParser/LLParser.h Modified: llvm/trunk/lib/AsmParser/LLParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.cpp?rev=100602&r1=100601&r2=100602&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/LLParser.cpp (original) +++ llvm/trunk/lib/AsmParser/LLParser.cpp Tue Apr 6 23:08:57 2010 @@ -2933,6 +2933,8 @@ default: assert(0 && "Unknown ParseInstruction result!"); case InstError: return true; case InstNormal: + BB->getInstList().push_back(Inst); + // With a normal result, we check to see if the instruction is followed by // a comma and metadata. if (EatIfPresent(lltok::comma)) @@ -2940,6 +2942,8 @@ return true; break; case InstExtraComma: + BB->getInstList().push_back(Inst); + // If the instruction parser ate an extra comma at the end of it, it // *must* be followed by metadata. if (ParseInstructionMetadata(Inst)) @@ -2947,8 +2951,6 @@ break; } - BB->getInstList().push_back(Inst); - // Set the name on the instruction. if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true; } while (!isa(Inst)); Modified: llvm/trunk/lib/AsmParser/LLParser.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.h?rev=100602&r1=100601&r2=100602&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/LLParser.h (original) +++ llvm/trunk/lib/AsmParser/LLParser.h Tue Apr 6 23:08:57 2010 @@ -74,7 +74,7 @@ public: typedef LLLexer::LocTy LocTy; private: - LLVMContext& Context; + LLVMContext &Context; LLLexer Lex; Module *M; From sabre at nondot.org Wed Apr 7 00:20:54 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 07 Apr 2010 05:20:54 -0000 Subject: [llvm-commits] [llvm] r100605 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h include/llvm/CodeGen/SelectionDAGNodes.h include/llvm/InlineAsm.h lib/CodeGen/SelectionDAG/InstrEmitter.cpp lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Message-ID: <20100407052054.E61C42A6C12C@llvm.org> Author: lattner Date: Wed Apr 7 00:20:54 2010 New Revision: 100605 URL: http://llvm.org/viewvc/llvm-project?rev=100605&view=rev Log: Three changes: 1. Introduce some enums and accessors in the InlineAsm class that eliminate a ton of magic numbers when handling inline asm SDNode. 2. Add a new MDNodeSDNode selection dag node type that holds a MDNode (shocking!) 3. Add a new argument to ISD::INLINEASM nodes that hold !srcloc metadata, propagating it to the instruction emitter, which drops it. No functionality change. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h llvm/trunk/include/llvm/InlineAsm.h llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Wed Apr 7 00:20:54 2010 @@ -650,6 +650,9 @@ /// getSrcValue - Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); + /// getMDNode - Return an MDNodeSDNode which holds an MDNode. + SDValue getMDNode(const MDNode *MD); + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue getShiftAmountOperand(SDValue Op); Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Wed Apr 7 00:20:54 2010 @@ -499,9 +499,16 @@ // return values: a chain and a flag result. The inputs are as follows: // Operand #0 : Input chain. // Operand #1 : a ExternalSymbolSDNode with a pointer to the asm string. - // Operand #2n+2: A RegisterNode. - // Operand #2n+3: A TargetConstant, indicating if the reg is a use/def + // Operand #2 : a MDNodeSDNode with the !srcloc metadata. + // After this, it is followed by a list of operands with this format: + // ConstantSDNode: Flags that encode whether it is a mem or not, the + // of operands that follow, etc. See InlineAsm.h. + // ... however many operands ... // Operand #last: Optional, an incoming flag. + // + // The variable width operands are required to represent target addressing + // modes as a single "operand", even though they may have multiple + // SDOperands. INLINEASM, // EH_LABEL - Represents a label in mid basic block used to track @@ -542,6 +549,10 @@ // SRCVALUE - This is a node type that holds a Value* that is used to // make reference to a value in the LLVM IR. SRCVALUE, + + // MDNODE_SDNODE - This is a node that holdes an MDNode*, which is used to + // reference metadata in the IR. + MDNODE_SDNODE, // PCMARKER - This corresponds to the pcmarker intrinsic. PCMARKER, @@ -2053,6 +2064,21 @@ return N->getOpcode() == ISD::SRCVALUE; } }; + +class MDNodeSDNode : public SDNode { + const MDNode *MD; + friend class SelectionDAG; + explicit MDNodeSDNode(const MDNode *md) + : SDNode(ISD::MDNODE_SDNODE, DebugLoc(), getSDVTList(MVT::Other)), MD(md) {} +public: + + const MDNode *getMD() const { return MD; } + + static bool classof(const MDNodeSDNode *) { return true; } + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::MDNODE_SDNODE; + } +}; class RegisterSDNode : public SDNode { Modified: llvm/trunk/include/llvm/InlineAsm.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InlineAsm.h?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/include/llvm/InlineAsm.h (original) +++ llvm/trunk/include/llvm/InlineAsm.h Wed Apr 7 00:20:54 2010 @@ -146,6 +146,49 @@ return V->getValueID() == Value::InlineAsmVal; } + + // These are helper methods for dealing with flags in the INLINEASM SDNode + // in the backend. + + enum { + Op_InputChain = 0, + Op_AsmString = 1, + Op_MDNode = 2, + Op_FirstOperand = 3, + + Kind_RegUse = 1, + Kind_RegDef = 2, + Kind_Imm = 3, + Kind_Mem = 4, + Kind_RegDefEarlyClobber = 6, + + Flag_MatchingOperand = 0x80000000 + }; + + static unsigned getFlagWord(unsigned Kind, unsigned NumOps) { + assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!"); + return Kind | (NumOps << 3); + } + + /// getFlagWordForMatchingOp - Augment an existing flag word returned by + /// getFlagWord with information indicating that this input operand is tied + /// to a previous output operand. + static unsigned getFlagWordForMatchingOp(unsigned InputFlag, + unsigned MatchedOperandNo) { + return InputFlag | Flag_MatchingOperand | (MatchedOperandNo << 16); + } + + static unsigned getKind(unsigned Flags) { + return Flags & 7; + } + + static bool isRegDefKind(unsigned Flag){ return getKind(Flag) == Kind_RegDef;} + static bool isImmKind(unsigned Flag) { return getKind(Flag) == Kind_Imm; } + static bool isMemKind(unsigned Flag) { return getKind(Flag) == Kind_Mem; } + static bool isRegDefEarlyClobberKind(unsigned Flag) { + return getKind(Flag) == Kind_RegDefEarlyClobber; + } + /// getNumOperandRegisters - Extract the number of registers field from the /// inline asm operand flag. static unsigned getNumOperandRegisters(unsigned Flag) { @@ -155,9 +198,9 @@ /// isUseOperandTiedToDef - Return true if the flag of the inline asm /// operand indicates it is an use operand that's matched to a def operand. static bool isUseOperandTiedToDef(unsigned Flag, unsigned &Idx) { - if ((Flag & 0x80000000) == 0) + if ((Flag & Flag_MatchingOperand) == 0) return false; - Idx = (Flag & ~0x80000000) >> 16; + Idx = (Flag & ~Flag_MatchingOperand) >> 16; return true; } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Wed Apr 7 00:20:54 2010 @@ -731,12 +731,12 @@ TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. - const char *AsmStr = - cast(Node->getOperand(1))->getSymbol(); + SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); + const char *AsmStr = cast(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); // Add all of the operand registers to the instruction. - for (unsigned i = 2; i != NumOps;) { + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast(Node->getOperand(i))->getZExtValue(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); @@ -744,24 +744,24 @@ MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. - switch (Flags & 7) { + switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); - case 2: // Def of register. + case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, true)); } break; - case 6: // Def of earlyclobber register. + case InlineAsm::Kind_RegDefEarlyClobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, false, false, true)); } break; - case 1: // Use of register. - case 3: // Immediate. - case 4: // Addressing mode. + case InlineAsm::Kind_RegUse: // Use of register. + case InlineAsm::Kind_Imm: // Immediate. + case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (; NumVals; --NumVals, ++i) Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Wed Apr 7 00:20:54 2010 @@ -17,18 +17,19 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -647,13 +648,14 @@ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) --NumOps; // Ignore the flag operand. - for (unsigned i = 2; i != NumOps;) { + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = (Flags & 0xffff) >> 3; + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); ++i; // Skip the ID value. - if ((Flags & 7) == 2 || (Flags & 7) == 6) { + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h Wed Apr 7 00:20:54 2010 @@ -59,7 +59,8 @@ if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; - if (Node->getOpcode() == ISD::EntryToken) return true; + if (Node->getOpcode() == ISD::EntryToken || + isa(Node)) return true; return false; } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Apr 7 00:20:54 2010 @@ -1356,6 +1356,23 @@ return SDValue(N, 0); } +/// getMDNode - Return an MDNodeSDNode which holds an MDNode. +SDValue SelectionDAG::getMDNode(const MDNode *MD) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MD); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { @@ -5559,6 +5576,7 @@ case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; case ISD::EntryToken: return "EntryToken"; case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; @@ -5927,6 +5945,11 @@ OS << "<" << M->getValue() << ">"; else OS << ""; + } else if (const MDNodeSDNode *MD = dyn_cast(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << ""; } else if (const VTSDNode *N = dyn_cast(this)) { OS << ":" << N->getVT().getEVTString(); } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Apr 7 00:20:54 2010 @@ -168,7 +168,7 @@ /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Code, + void AddInlineAsmOperands(unsigned Kind, bool HasMatching, unsigned MatchingIdx, SelectionDAG &DAG, std::vector &Ops) const; @@ -4871,14 +4871,13 @@ /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker and includes the number of /// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, - bool HasMatching,unsigned MatchingIdx, +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, SelectionDAG &DAG, std::vector &Ops) const { - assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); - unsigned Flag = Code | (Regs.size() << 3); + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) - Flag |= 0x80000000 | (MatchingIdx << 16); + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); @@ -5409,6 +5408,11 @@ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), TLI.getPointerTy())); + // If we have a !srcloc metadata node associated with it, we want to attach + // this to the ultimately generated inline asm machineinstr. To do this, we + // pass in the third operand as this (potentially null) inline asm MDNode. + const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -5428,8 +5432,8 @@ assert(OpInfo.isIndirect && "Memory output must be indirect operand"); // Add information to the INLINEASM node to know about this output. - unsigned ResOpType = 4/*MEM*/ | (1<<3); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, TLI.getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; @@ -5439,10 +5443,9 @@ // Copy the output from the appropriate register. Find a register that // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) { + if (OpInfo.AssignedRegs.Regs.empty()) llvm_report_error("Couldn't allocate output reg for" " constraint '" + OpInfo.ConstraintCode + "'!"); - } // If this is an indirect operand, store through the pointer after the // asm. @@ -5459,8 +5462,8 @@ // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? - 6 /* EARLYCLOBBER REGDEF */ : - 2 /* REGDEF */ , + InlineAsm::Kind_RegDefEarlyClobber : + InlineAsm::Kind_RegDef, false, 0, DAG, @@ -5477,27 +5480,25 @@ // Scan until we find the definition we already emitted of this operand. // When we find it, create a RegsForValue operand. - unsigned CurOp = 2; // The first operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; for (; OperandNo; --OperandNo) { // Advance to the next operand. unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); - assert(((OpFlag & 7) == 2 /*REGDEF*/ || - (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ || - (OpFlag & 7) == 4 /*MEM*/) && - "Skipped past definitions?"); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; } unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); - if ((OpFlag & 7) == 2 /*REGDEF*/ - || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { + if (InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. - if (OpInfo.isIndirect) { + if (OpInfo.isIndirect) llvm_report_error("Don't know how to handle tied indirect " "register inputs yet!"); - } RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); @@ -5512,22 +5513,23 @@ // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); - MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); break; - } else { - assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); - assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && - "Unexpected number of operands"); - // Add information to the INLINEASM node to know about this input. - // See InlineAsm.h isUseOperandTiedToDef. - OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI.getPointerTy())); - AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); - break; } + + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); + assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, + OpInfo.getMatchedOperand()); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; } if (OpInfo.ConstraintType == TargetLowering::C_Other) { @@ -5543,18 +5545,21 @@ } // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3); + unsigned ResOpType = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; - } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = 4/*MEM*/ | (1<<3); + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.push_back(InOperandVal); @@ -5577,7 +5582,7 @@ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); - OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); break; } @@ -5585,7 +5590,8 @@ // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, + OpInfo.AssignedRegs.AddInlineAsmOperands( + InlineAsm::Kind_RegDefEarlyClobber, false, 0, DAG, AsmNodeOperands); break; @@ -5593,7 +5599,7 @@ } } - // Finish up input operands. + // Finish up input operands. Set the input chain and add the flag last. AsmNodeOperands[0] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=100605&r1=100604&r2=100605&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Wed Apr 7 00:20:54 2010 @@ -1333,16 +1333,17 @@ std::vector InOps; std::swap(InOps, Ops); - Ops.push_back(InOps[0]); // input chain. - Ops.push_back(InOps[1]); // input asm string. + Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 + Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 + Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc - unsigned i = 2, e = InOps.size(); + unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) --e; // Don't process a flag operand if it is here. while (i != e) { unsigned Flags = cast(InOps[i])->getZExtValue(); - if ((Flags & 7) != 4 /*MEM*/) { + if (!InlineAsm::isMemKind(Flags)) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); @@ -1352,14 +1353,14 @@ "Memory operand with multiple values?"); // Otherwise, this is a memory operand. Ask the target to select it. std::vector SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) llvm_report_error("Could not match memory address. Inline asm" " failure!"); - } // Add this to the output node. - Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), - MVT::i32)); + unsigned NewFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } @@ -2045,6 +2046,7 @@ //case ISD::VALUETYPE: //case ISD::CONDCODE: case ISD::HANDLENODE: + case ISD::MDNODE_SDNODE: case ISD::TargetConstant: case ISD::TargetConstantFP: case ISD::TargetConstantPool: From sabre at nondot.org Wed Apr 7 00:27:36 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 07 Apr 2010 05:27:36 -0000 Subject: [llvm-commits] [llvm] r100606 - /llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Message-ID: <20100407052736.DFC9B2A6C12C@llvm.org> Author: lattner Date: Wed Apr 7 00:27:36 2010 New Revision: 100606 URL: http://llvm.org/viewvc/llvm-project?rev=100606&view=rev Log: remove another magic number. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=100606&r1=100605&r2=100606&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Wed Apr 7 00:27:36 2010 @@ -167,10 +167,9 @@ break; case '(': // $( -> same as GCC's { character. ++LastEmitted; // Consume '(' character. - if (CurVariant != -1) { + if (CurVariant != -1) llvm_report_error("Nested variants found in inline asm string: '" + std::string(AsmStr) + "'"); - } CurVariant = 0; // We're in the first variant now. break; case '|': @@ -273,7 +272,7 @@ OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); else { AsmPrinter *AP = const_cast(this); - if ((OpFlags & 7) == 4) { + if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, Modifier[0] ? Modifier : 0, OS); From sabre at nondot.org Wed Apr 7 00:38:05 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 07 Apr 2010 05:38:05 -0000 Subject: [llvm-commits] [llvm] r100607 - in /llvm/trunk: docs/LangRef.html lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp lib/CodeGen/SelectionDAG/InstrEmitter.cpp Message-ID: <20100407053805.8215F2A6C12C@llvm.org> Author: lattner Date: Wed Apr 7 00:38:05 2010 New Revision: 100607 URL: http://llvm.org/viewvc/llvm-project?rev=100607&view=rev Log: Have the inst emitter add the !srcloc mdnode to the machine instr. Have the asmprinter use the mdnode to scavenge a source location if present. Document this nonsense in langref. Modified: llvm/trunk/docs/LangRef.html llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Modified: llvm/trunk/docs/LangRef.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.html?rev=100607&r1=100606&r2=100607&view=diff ============================================================================== --- llvm/trunk/docs/LangRef.html (original) +++ llvm/trunk/docs/LangRef.html Wed Apr 7 00:38:05 2010 @@ -2516,6 +2516,31 @@ documented here. Constraints on what can be done (e.g. duplication, moving, etc need to be documented). This is probably best done by reference to another document that covers inline asm from a holistic perspective.

+
+ + + +
+ +

The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode + attached to it that contains a constant integer. If present, the code + generator will use the integer as the location cookie value when report + errors through the LLVMContext error reporting mechanisms. This allows a + front-end to corrolate backend errors that occur with inline asm back to the + source code that produced it. For example:

+ +
+
+call void asm sideeffect "something bad", ""(), !srcloc !42
+...
+!42 = !{ i32 1234567 }
+
+
+ +

It is up to the front-end to make sense of the magic numbers it places in the + IR.

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=100607&r1=100606&r2=100607&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Wed Apr 7 00:38:05 2010 @@ -13,6 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" @@ -97,7 +98,7 @@ unsigned NumDefs = 0; for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); ++NumDefs) - assert(NumDefs != NumOperands-1 && "No asm string?"); + assert(NumDefs != NumOperands-2 && "No asm string?"); assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); @@ -123,6 +124,15 @@ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ MAI->getInlineAsmStart()); + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + if (const MDNode *SrcLoc = MI->getOperand(NumOperands-1).getMetadata()) { + if (SrcLoc->getNumOperands() != 0) + if (const ConstantInt *CI = dyn_cast(SrcLoc->getOperand(0))) + LocCookie = CI->getZExtValue(); + } + // Emit the inline asm to a temporary string so we can emit it through // EmitInlineAsm. SmallString<256> StringData; @@ -295,7 +305,7 @@ } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), 0/*no loc cookie*/); + EmitInlineAsm(OS.str(), LocCookie); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp?rev=100607&r1=100606&r2=100607&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Wed Apr 7 00:38:05 2010 @@ -769,6 +769,12 @@ break; } } + + // Get the mdnode from the asm if it exists and add it to the instruction. + SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); + const MDNode *MD = cast(MDV)->getMD(); + MI->addOperand(MachineOperand::CreateMetadata(MD)); + MBB->insert(InsertPos, MI); break; } From clattner at apple.com Wed Apr 7 00:50:51 2010 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Apr 2010 22:50:51 -0700 Subject: [llvm-commits] [llvm] r100607 - in /llvm/trunk: docs/LangRef.html lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp lib/CodeGen/SelectionDAG/InstrEmitter.cpp In-Reply-To: <20100407053805.8215F2A6C12C@llvm.org> References: <20100407053805.8215F2A6C12C@llvm.org> Message-ID: <24BA6D44-2132-4726-91A7-41776DC7FF5C@apple.com> On Apr 6, 2010, at 10:38 PM, Chris Lattner wrote: > Author: lattner > Date: Wed Apr 7 00:38:05 2010 > New Revision: 100607 > > URL: http://llvm.org/viewvc/llvm-project?rev=100607&view=rev > Log: > Have the inst emitter add the !srcloc mdnode to the machine instr. > Have the asmprinter use the mdnode to scavenge a source location if > present. Document this nonsense in langref. In case anyone is curious... with all this plumbing, clang can now produce diagnostics like this with the integrated assembler: -------------- next part -------------- A non-text attachment was scrubbed... Name: PastedGraphic-1.png Type: image/png Size: 6691 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100406/4160cc1e/attachment.png -------------- next part -------------- Contrast this with: -------------- next part -------------- A non-text attachment was scrubbed... Name: PastedGraphic-3.png Type: image/png Size: 8364 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100406/4160cc1e/attachment-0001.png -------------- next part -------------- Woot, -Chris From evan.cheng at apple.com Wed Apr 7 00:59:13 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 07 Apr 2010 05:59:13 -0000 Subject: [llvm-commits] [llvm] r100609 - /llvm/trunk/lib/CodeGen/MachineLICM.cpp Message-ID: <20100407055913.1C3BD2A6C12C@llvm.org> Author: evancheng Date: Wed Apr 7 00:59:12 2010 New Revision: 100609 URL: http://llvm.org/viewvc/llvm-project?rev=100609&view=rev Log: Fix typo. Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=100609&r1=100608&r2=100609&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Wed Apr 7 00:59:12 2010 @@ -295,7 +295,7 @@ // If MI is a store to a stack slot, remember the slot. An instruction // loads from this slot cannot be a LICM candidate. - if (SkipCheck && TII->isStoreToStackSlot(MI, FI)) + if (!SkipCheck && TII->isStoreToStackSlot(MI, FI)) StoredFIs.insert(FI); } From evan.cheng at apple.com Wed Apr 7 01:00:33 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 07 Apr 2010 06:00:33 -0000 Subject: [llvm-commits] [llvm] r100610 - /llvm/trunk/lib/CodeGen/MachineLICM.cpp Message-ID: <20100407060034.06EC72A6C12C@llvm.org> Author: evancheng Date: Wed Apr 7 01:00:33 2010 New Revision: 100610 URL: http://llvm.org/viewvc/llvm-project?rev=100610&view=rev Log: Add comments for missed opportunities. Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=100610&r1=100609&r2=100610&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Wed Apr 7 01:00:33 2010 @@ -247,6 +247,8 @@ if (!CurLoop->contains(BB)) continue; // Conservatively treat live-in's as an external def. + // FIXME: That means a reload that's reused into a fallthrough block + // will not be LICM'ed. for (MachineBasicBlock::const_livein_iterator I = BB->livein_begin(), E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; @@ -282,7 +284,8 @@ } } - // FIXME: Only consider reloads for now. + // FIXME: Only consider reloads for now. We should be able to handle + // remat which does not have register operands. bool SkipCheck = false; int FI; if (SeenDef && !RuledOut) { From wangmp at apple.com Wed Apr 7 01:35:53 2010 From: wangmp at apple.com (Mon P Wang) Date: Wed, 07 Apr 2010 06:35:53 -0000 Subject: [llvm-commits] [llvm] r100611 - /llvm/trunk/docs/LangRef.html Message-ID: <20100407063554.07BD52A6C12C@llvm.org> Author: wangmp Date: Wed Apr 7 01:35:53 2010 New Revision: 100611 URL: http://llvm.org/viewvc/llvm-project?rev=100611&view=rev Log: Update langref for memcpy, memmove, and memset Modified: llvm/trunk/docs/LangRef.html Modified: llvm/trunk/docs/LangRef.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.html?rev=100611&r1=100610&r2=100611&view=diff ============================================================================== --- llvm/trunk/docs/LangRef.html (original) +++ llvm/trunk/docs/LangRef.html Wed Apr 7 01:35:53 2010 @@ -5890,7 +5890,8 @@
Syntax:

This is an overloaded intrinsic. You can use llvm.memcpy on any - integer bit width. Not all targets support all bit widths however.

+ integer bit width and for different address spaces. Not all targets support + all bit widths however.

   declare void @llvm.memcpy.i8(i8 * <dest>, i8 * <src>,
@@ -5938,7 +5939,8 @@
 
 
Syntax:

This is an overloaded intrinsic. You can use llvm.memmove on any integer bit - width. Not all targets support all bit widths however.

+ width and for different address space. Not all targets support all bit + widths however.

   declare void @llvm.memmove.i8(i8 * <dest>, i8 * <src>,
@@ -5988,7 +5990,8 @@
 
 
Syntax:

This is an overloaded intrinsic. You can use llvm.memset on any integer bit - width. Not all targets support all bit widths however.

+ width and for different address spaces. Not all targets support all bit + widths however.

   declare void @llvm.memset.i8(i8 * <dest>, i8 <val>,




From sanjiv.gupta at microchip.com  Wed Apr  7 02:06:48 2010
From: sanjiv.gupta at microchip.com (Sanjiv Gupta)
Date: Wed, 07 Apr 2010 07:06:48 -0000
Subject: [llvm-commits] [llvm] r100612 -
	/llvm/trunk/test/CodeGen/PIC16/2009-11-20-NewNode.ll
Message-ID: <20100407070648.C48302A6C12C@llvm.org>

Author: sgupta
Date: Wed Apr  7 02:06:48 2010
New Revision: 100612

URL: http://llvm.org/viewvc/llvm-project?rev=100612&view=rev
Log:
Remove XFAIL for vg_leak as the leaks are fixed by 100601.

Modified:
    llvm/trunk/test/CodeGen/PIC16/2009-11-20-NewNode.ll

Modified: llvm/trunk/test/CodeGen/PIC16/2009-11-20-NewNode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PIC16/2009-11-20-NewNode.ll?rev=100612&r1=100611&r2=100612&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PIC16/2009-11-20-NewNode.ll (original)
+++ llvm/trunk/test/CodeGen/PIC16/2009-11-20-NewNode.ll Wed Apr  7 02:06:48 2010
@@ -1,6 +1,5 @@
 ; RUN: llc -march=pic16 < %s
 ; PR5558
-; XFAIL: vg_leak
 
 define i64 @_strtoll_r(i16 %base) nounwind {
 entry:




From nicholas at mxc.ca  Wed Apr  7 02:12:46 2010
From: nicholas at mxc.ca (Nick Lewycky)
Date: Wed, 07 Apr 2010 00:12:46 -0700
Subject: [llvm-commits] [llvm] r100553 - in /llvm/trunk:
 lib/Target/X86/SSEDomainFix.cpp
 test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
In-Reply-To: <20100406194856.D28562A6C12C@llvm.org>
References: <20100406194856.D28562A6C12C@llvm.org>
Message-ID: <4BBC306E.8020801@mxc.ca>

Jakob Stoklund Olesen wrote:
> Author: stoklund
> Date: Tue Apr  6 14:48:56 2010
> New Revision: 100553
>
> URL: http://llvm.org/viewvc/llvm-project?rev=100553&view=rev
> Log:
> Don't try to collapse DomainValues onto an incompatible SSE domain.
> This fixes the Bullet regression on i386/nocona.
>
> Added:
>      llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
> Modified:
>      llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
>
> Modified: llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/SSEDomainFix.cpp?rev=100553&r1=100552&r2=100553&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/SSEDomainFix.cpp (original)
> +++ llvm/trunk/lib/Target/X86/SSEDomainFix.cpp Tue Apr  6 14:48:56 2010
> @@ -216,8 +216,15 @@
>     if (LiveRegs&&  (dv = LiveRegs[rx])) {
>       if (dv->isCollapsed())
>         dv->addDomain(domain);
> -    else
> +    else if (dv->hasDomain(domain))
>         Collapse(dv, domain);
> +    else {
> +      // This is an incompatible open DomainValue. Collapse it to whatever and force
> +      // the new value into domain. This costs a domain crossing.
> +      Collapse(dv, dv->getFirstDomain());
> +      assert(LiveRegs[rx]&&  "Not live after collapse?");
> +      LiveRegs[rx]->addDomain(domain);
> +    }
>     } else {
>       // Set up basic collapsed DomainValue.
>       SetLiveReg(rx, Alloc(domain));
> @@ -281,8 +288,9 @@
>         // We have a live DomainValue from more than one predecessor.
>         if (LiveRegs[rx]->isCollapsed()) {
>           // We are already collapsed, but predecessor is not. Force him.
> -        if (!pdv->isCollapsed())
> -          Collapse(pdv, LiveRegs[rx]->getFirstDomain());
> +        unsigned domain = LiveRegs[rx]->getFirstDomain();
> +        if (!pdv->isCollapsed()&&  pdv->hasDomain(domain))
> +          Collapse(pdv, domain);
>           continue;
>         }
>
> @@ -290,7 +298,7 @@
>         if (!pdv->isCollapsed())
>           Merge(LiveRegs[rx], pdv);
>         else
> -        Collapse(LiveRegs[rx], pdv->getFirstDomain());
> +        Force(rx, pdv->getFirstDomain());
>       }
>     }
>   }
>
> Added: llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll?rev=100553&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll Tue Apr  6 14:48:56 2010
> @@ -0,0 +1,130 @@
> +; RUN: llc<  %s -O3 -relocation-model=pic -disable-fp-elim -mcpu=nocona
> +;
> +; This test case is reduced from Bullet. It crashes SSEDomainFix.
> +;
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
> +target triple = "i386-apple-darwin10.0"
> +
> +%struct.CONTACT_KEY_TOKEN_COMP = type<{ i8 }>
> +%struct.GIM_AABB = type { %struct.btSimdScalar, %struct.btSimdScalar }
> +%struct.HullDesc = type { i32, i32, %struct.btSimdScalar*, i32, float, i32, i32 }
> +%struct.HullLibrary = type { %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray" }
> +%struct.HullResult = type { i8, i32, %"struct.btAlignedObjectArray", i32, i32, %"struct.btAlignedObjectArray" }
> +%struct.btActionInterface = type { i32 (...)** }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, i8*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btCollisionObject**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btDbvt::sStkCLN"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btHullTriangle**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Anchor"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Cluster"**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Face"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Joint"**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Link"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Material"**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Node"**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Node"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Note"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::RContact"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::SContact"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSoftBody::Tetra"*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, i32*, i8 }
> +%"struct.btAlignedObjectArray::Cell*>" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %"struct.btSparseSdf<3>::Cell"**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btTypedConstraint**, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, %struct.btSimdScalar*, i8 }
> +%"struct.btAlignedObjectArray" = type { %struct.CONTACT_KEY_TOKEN_COMP, i32, i32, float*, i8 }
> +%struct.btBroadphaseProxy = type { i8*, i16, i16, i8*, i32, %struct.btSimdScalar, %struct.btSimdScalar }
> +%struct.btCollisionObject = type { i32 (...)**, %struct.btTransform, %struct.btTransform, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, i8, float, %struct.btBroadphaseProxy*, %struct.btCollisionShape*, %struct.btCollisionShape*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
> +%struct.btCollisionShape = type { i32 (...)**, i32, i8* }
> +%struct.btDbvt = type { %struct.btDbvtNode*, %struct.btDbvtNode*, i32, i32, i32, %"struct.btAlignedObjectArray" }
> +%"struct.btDbvt::sStkCLN" = type { %struct.btDbvtNode*, %struct.btDbvtNode* }
> +%struct.btDbvtNode = type { %struct.GIM_AABB, %struct.btDbvtNode*, %"union.btDbvtNode::$_12" }
> +%"struct.btHashKey" = type { i32 }
> +%struct.btHullTriangle = type { %struct.int3, %struct.int3, i32, i32, float }
> +%struct.btMatrix3x3 = type { [3 x %struct.btSimdScalar] }
> +%"struct.btRaycastVehicle::btVehicleTuning" = type { float, float, float, float, float }
> +%struct.btRigidBody = type { %struct.btCollisionObject, %struct.btMatrix3x3, %struct.btSimdScalar, %struct.btSimdScalar, float, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, float, float, i8, float, float, float, float, float, float, %struct.btActionInterface*, %"struct.btAlignedObjectArray", i32, i32, i32 }
> +%struct.btSimdScalar = type { %"union.btSimdScalar::$_13" }
> +%struct.btSoftBody = type { [268 x i8], %"struct.btAlignedObjectArray", %"struct.btSoftBody::Config", %"struct.btRaycastVehicle::btVehicleTuning", %"struct.btSoftBody::Pose", i8*, %struct.btSoftBodyWorldInfo*, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", float, [2 x %struct.btSimdScalar], i8, %struct.btDbvt, %struct.btDbvt, %struct.btDbvt, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %struct.btTransform, %"struct.btAlignedObjectArray   :ePSolver::_>" }
> +%"struct.btSoftBody::Anchor" = type { %"struct.btSoftBody::Node"*, %struct.btSimdScalar, %struct.btRigidBody*, %struct.btMatrix3x3, %struct.btSimdScalar, float }
> +%"struct.btSoftBody::Body" = type { %"struct.btSoftBody::Cluster"*, %struct.btRigidBody*, %struct.btCollisionObject* }
> +%"struct.btSoftBody::Cluster" = type { %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %struct.btTransform, float, float, %struct.btMatrix3x3, %struct.btMatrix3x3, %struct.btSimdScalar, [2 x %struct.btSimdScalar], [2 x %struct.btSimdScalar], i32, i32, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btDbvtNode*, float, float, float, float, float, float, i8, i8, i32 }
> +%"struct.btSoftBody::Config" = type { i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray" }
> +%"struct.btSoftBody::Element" = type { i8* }
> +%"struct.btSoftBody::Face" = type { %"struct.btSoftBody::Feature", [3 x %"struct.btSoftBody::Node"*], %struct.btSimdScalar, float, %struct.btDbvtNode* }
> +%"struct.btSoftBody::Feature" = type { %"struct.btSoftBody::Element", %"struct.btSoftBody::Material"* }
> +%"struct.btSoftBody::Joint" = type { i32 (...)**, [2 x %"struct.btSoftBody::Body"], [2 x %struct.btSimdScalar], float, float, float, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btMatrix3x3, i8 }
> +%"struct.btSoftBody::Link" = type { %"struct.btSoftBody::Feature", [2 x %"struct.btSoftBody::Node"*], float, i8, float, float, float, %struct.btSimdScalar }
> +%"struct.btSoftBody::Material" = type { %"struct.btSoftBody::Element", float, float, float, i32 }
> +%"struct.btSoftBody::Node" = type { %"struct.btSoftBody::Feature", %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar, float, float, %struct.btDbvtNode*, i8 }
> +%"struct.btSoftBody::Note" = type { %"struct.btSoftBody::Element", i8*, %struct.btSimdScalar, i32, [4 x %"struct.btSoftBody::Node"*], [4 x float] }
> +%"struct.btSoftBody::Pose" = type { i8, i8, float, %"struct.btAlignedObjectArray", %"struct.btAlignedObjectArray", %struct.btSimdScalar, %struct.btMatrix3x3, %struct.btMatrix3x3, %struct.btMatrix3x3 }
> +%"struct.btSoftBody::RContact" = type { %"struct.btSoftBody::sCti", %"struct.btSoftBody::Node"*, %struct.btMatrix3x3, %struct.btSimdScalar, float, float, float }
> +%"struct.btSoftBody::SContact" = type { %"struct.btSoftBody::Node"*, %"struct.btSoftBody::Face"*, %struct.btSimdScalar, %struct.btSimdScalar, float, float, [2 x float] }
> +%"struct.btSoftBody::Tetra" = type { %"struct.btSoftBody::Feature", [4 x %"struct.btSoftBody::Node"*], float, %struct.btDbvtNode*, [4 x %struct.btSimdScalar], float, float }
> +%"struct.btSoftBody::sCti" = type { %struct.btCollisionObject*, %struct.btSimdScalar, float }
> +%struct.btSoftBodyWorldInfo = type { float, float, float, %struct.btSimdScalar, %struct.btActionInterface*, %struct.btActionInterface*, %struct.btSimdScalar, %"struct.btSparseSdf<3>" }
> +%"struct.btSparseSdf<3>" = type { %"struct.btAlignedObjectArray::Cell*>", float, i32, i32, i32, i32 }
> +%"struct.btSparseSdf<3>::Cell" = type { [4 x [4 x [4 x float]]], [3 x i32], i32, i32, %struct.btCollisionShape*, %"struct.btSparseSdf<3>::Cell"* }
> +%struct.btTransform = type { %struct.btMatrix3x3, %struct.btSimdScalar }
> +%struct.btTypedConstraint = type { i32 (...)**, %"struct.btHashKey", i32, i32, i8, %struct.btRigidBody*, %struct.btRigidBody*, float, float, %struct.btSimdScalar, %struct.btSimdScalar, %struct.btSimdScalar }
> +%struct.int3 = type { i32, i32, i32 }
> +%"union.btDbvtNode::$_12" = type { [2 x %struct.btDbvtNode*] }
> +%"union.btSimdScalar::$_13" = type {<4 x float>  }
> +
> +declare i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(%struct.HullLibrary*, %struct.HullDesc* nocapture, %struct.HullResult* nocapture) ssp align 2

Jakob, is %struct.HullLibrary* relevant to the bug? It'd be great if you 
could reduce it out and lose all these types!

Nick

> +
> +define void @_ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi(%struct.btSoftBody* %psb, %struct.btActionInterface* %idraw, i32 %drawflags) ssp align 2 {
> +entry:
> +  br i1 undef, label %bb92, label %bb58
> +
> +bb58:                                             ; preds = %entry
> +  %0 = invoke i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(%struct.HullLibrary* undef, %struct.HullDesc* undef, %struct.HullResult* undef)
> +          to label %invcont64 unwind label %lpad159 ;  [#uses=0]
> +
> +invcont64:                                        ; preds = %bb58
> +  br i1 undef, label %invcont65, label %bb.i.i
> +
> +bb.i.i:                                           ; preds = %invcont64
> +  %1 = load<4 x float>* undef, align 16          ;<<4 x float>>  [#uses=5]
> +  br i1 undef, label %bb.nph.i.i, label %invcont65
> +
> +bb.nph.i.i:                                       ; preds = %bb.i.i
> +  %tmp22.i.i = bitcast<4 x float>  %1 to i128     ;  [#uses=1]
> +  %tmp23.i.i = trunc i128 %tmp22.i.i to i32       ;  [#uses=1]
> +  %2 = bitcast i32 %tmp23.i.i to float            ;  [#uses=1]
> +  %tmp6.i = extractelement<4 x float>  %1, i32 1  ;  [#uses=1]
> +  %tmp2.i = extractelement<4 x float>  %1, i32 2  ;  [#uses=1]
> +  br label %bb1.i.i
> +
> +bb1.i.i:                                          ; preds = %bb1.i.i, %bb.nph.i.i
> +  %.tmp6.0.i.i = phi float [ %tmp2.i, %bb.nph.i.i ], [ %5, %bb1.i.i ] ;  [#uses=1]
> +  %.tmp5.0.i.i = phi float [ %tmp6.i, %bb.nph.i.i ], [ %4, %bb1.i.i ] ;  [#uses=1]
> +  %.tmp.0.i.i = phi float [ %2, %bb.nph.i.i ], [ %3, %bb1.i.i ] ;  [#uses=1]
> +  %3 = fadd float %.tmp.0.i.i, undef              ;  [#uses=2]
> +  %4 = fadd float %.tmp5.0.i.i, undef             ;  [#uses=2]
> +  %5 = fadd float %.tmp6.0.i.i, undef             ;  [#uses=2]
> +  br i1 undef, label %bb2.return.loopexit_crit_edge.i.i, label %bb1.i.i
> +
> +bb2.return.loopexit_crit_edge.i.i:                ; preds = %bb1.i.i
> +  %tmp8.i = insertelement<4 x float>  %1, float %3, i32 0 ;<<4 x float>>  [#uses=1]
> +  %tmp4.i = insertelement<4 x float>  %tmp8.i, float %4, i32 1 ;<<4 x float>>  [#uses=1]
> +  %tmp.i = insertelement<4 x float>  %tmp4.i, float %5, i32 2 ;<<4 x float>>  [#uses=1]
> +  br label %invcont65
> +
> +invcont65:                                        ; preds = %bb2.return.loopexit_crit_edge.i.i, %bb.i.i, %invcont64
> +  %.0.i = phi<4 x float>  [ %tmp.i, %bb2.return.loopexit_crit_edge.i.i ], [ undef, %invcont64 ], [ %1, %bb.i.i ] ;<<4 x float>>  [#uses=1]
> +  %tmp15.i = extractelement<4 x float>  %.0.i, i32 2 ;  [#uses=1]
> +  %6 = fmul float %tmp15.i, undef                 ;  [#uses=1]
> +  br label %bb.i265
> +
> +bb.i265:                                          ; preds = %bb.i265, %invcont65
> +  %7 = fsub float 0.000000e+00, %6                ;  [#uses=1]
> +  store float %7, float* undef, align 4
> +  br label %bb.i265
> +
> +bb92:                                             ; preds = %entry
> +  unreachable
> +
> +lpad159:                                          ; preds = %bb58
> +  unreachable
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>



From benny.kra at googlemail.com  Wed Apr  7 04:26:52 2010
From: benny.kra at googlemail.com (Benjamin Kramer)
Date: Wed, 07 Apr 2010 09:26:52 -0000
Subject: [llvm-commits] [llvm] r100615 -
	/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Message-ID: <20100407092652.1B4942A6C12C@llvm.org>

Author: d0k
Date: Wed Apr  7 04:26:51 2010
New Revision: 100615

URL: http://llvm.org/viewvc/llvm-project?rev=100615&view=rev
Log:
Use raw_ostream.

Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100615&r1=100614&r2=100615&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Wed Apr  7 04:26:51 2010
@@ -438,58 +438,50 @@
 /// of DBG_VALUE, returning true if it was able to do so.  A false return
 /// means the target will need to handle MI in EmitInstruction.
 static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
-  char buf[100];
-  std::string Str =  "\t";
-  Str += AP.MAI->getCommentString();
-  Str += "DEBUG_VALUE: ";
   // This code handles only the 3-operand target-independent form.
   if (MI->getNumOperands() != 3)
     return false;
 
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
   // cast away const; DIetc do not take const operands for some reason.
   DIVariable V((MDNode*)(MI->getOperand(2).getMetadata()));
-  Str += V.getName();
-  Str += " <- ";
+  OS << V.getName() << " <- ";
 
   // Register or immediate value. Register 0 means undef.
   if (MI->getOperand(0).isFPImm()) {
     APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
     if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
-      sprintf(buf, "%e", APF.convertToFloat());
-      Str += buf;
+      OS << (double)APF.convertToFloat();
     } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
-      sprintf(buf, "%e", APF.convertToDouble());
-      Str += buf;
+      OS << APF.convertToDouble();
     } else {
       // There is no good way to print long double.  Convert a copy to
       // double.  Ah well, it's only a comment.
       bool ignored;
       APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
                   &ignored);
-      Str += "(long double) ";
-      sprintf(buf, "%e", APF.convertToDouble());
-      Str += buf;
+      OS << "(long double) " << APF.convertToDouble();
     }
   } else if (MI->getOperand(0).isImm()) {
-    sprintf(buf, "%lld", MI->getOperand(0).getImm());
-    Str += buf;
+    OS << MI->getOperand(0).getImm();
   } else if (MI->getOperand(0).isReg()) {
     if (MI->getOperand(0).getReg() == 0) {
       // Suppress offset, it is not meaningful here.
-      Str += "undef";
+      OS << "undef";
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      AP.OutStreamer.EmitRawText(Twine(Str));
+      AP.OutStreamer.EmitRawText(OS.str());
       return true;
     }
-    Str += AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+    OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
   } else
     llvm_unreachable("Unknown operand type");
 
-  Str += '+';
-  sprintf(buf, "%lld", MI->getOperand(1).getImm());
-  Str += buf;
+  OS << '+' << MI->getOperand(1).getImm();
   // NOTE: Want this comment at start of line, don't emit with AddComment.
-  AP.OutStreamer.EmitRawText(Twine(Str));
+  AP.OutStreamer.EmitRawText(OS.str());
   return true;
 }
 




From isanbard at gmail.com  Wed Apr  7 04:28:04 2010
From: isanbard at gmail.com (Bill Wendling)
Date: Wed, 07 Apr 2010 09:28:04 -0000
Subject: [llvm-commits] [llvm] r100616 - in
 /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h
 DwarfException.cpp DwarfException.h
Message-ID: <20100407092804.B65FE2A6C12C@llvm.org>

Author: void
Date: Wed Apr  7 04:28:04 2010
New Revision: 100616

URL: http://llvm.org/viewvc/llvm-project?rev=100616&view=rev
Log:
Use the "NamedGroupTimer" class to categorize DWARF emission better.

Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100616&r1=100615&r2=100616&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Wed Apr  7 04:28:04 2010
@@ -39,6 +39,11 @@
 #include "llvm/System/Path.h"
 using namespace llvm;
 
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
 //===----------------------------------------------------------------------===//
 
 /// Configuration values for initial hash set sizes (log2).
@@ -305,22 +310,17 @@
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   : Asm(A), MMI(Asm->MMI), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), 
-    CurrentFnDbgScope(0), DebugTimer(0) {
+    CurrentFnDbgScope(0) {
   NextStringPoolNumber = 0;
       
   DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
       
-  if (TimePassesIsEnabled)
-    DebugTimer = new Timer("Dwarf Debug Writer");
-      
   beginModule(M);
 }
 DwarfDebug::~DwarfDebug() {
   for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
     DIEBlocks[j]->~DIEBlock();
-
-  delete DebugTimer;
 }
 
 MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
@@ -1844,7 +1844,7 @@
 /// content. Create global DIEs and emit initial debug info sections.
 /// This is inovked by the target AsmPrinter.
 void DwarfDebug::beginModule(Module *M) {
-  TimeRegion Timer(DebugTimer);
+  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
 
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
@@ -1908,10 +1908,8 @@
 /// endModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::endModule() {
-  if (!ModuleCU)
-    return;
-
-  TimeRegion Timer(DebugTimer);
+  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+  if (!ModuleCU) return;
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
   for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -2309,11 +2307,10 @@
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
+  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+
   if (!MMI->hasDebugInfo()) return;
-  
-  TimeRegion Timer(DebugTimer);
-  if (!extractScopeInformation())
-    return;
+  if (!extractScopeInformation()) return;
   
   collectVariableInfo();
 
@@ -2344,10 +2341,9 @@
 /// endFunction - Gather and emit post-function debug information.
 ///
 void DwarfDebug::endFunction(const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo() ||
-      DbgScopeMap.empty()) return;
-  
-  TimeRegion Timer(DebugTimer);
+  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+
+  if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
 
   if (CurrentFnDbgScope) {
     // Define end label for subprogram.
@@ -2393,7 +2389,7 @@
 /// unique label that was emitted and which provides correspondence to
 /// the source line list.
 MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
-  TimeRegion Timer(DebugTimer);
+  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
 
   StringRef Dir;
   StringRef Fn;
@@ -2429,7 +2425,7 @@
 /// well.
 unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
                                          const std::string &FileName) {
-  TimeRegion Timer(DebugTimer);
+  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
   return GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
 }
 

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100616&r1=100615&r2=100616&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Wed Apr  7 04:28:04 2010
@@ -33,7 +33,6 @@
 class MachineLocation;
 class MachineModuleInfo;
 class MCAsmInfo;
-class Timer;
 class DIEAbbrev;
 class DIE;
 class DIEBlock;
@@ -208,9 +207,6 @@
   /// label location to indicate scope boundries in dwarf debug info.
   DebugLoc PrevInstLoc;
 
-  /// DebugTimer - Timer for the Dwarf debug writer.
-  Timer *DebugTimer;
-  
   struct FunctionDebugFrameInfo {
     unsigned Number;
     std::vector Moves;

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp?rev=100616&r1=100615&r2=100616&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Wed Apr  7 04:28:04 2010
@@ -39,17 +39,16 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *EHTimerName = "DWARF Exception Writer";
+} // end anonymous namespace
+
 DwarfException::DwarfException(AsmPrinter *A)
   : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
-    shouldEmitTableModule(false), shouldEmitMovesModule(false),
-    ExceptionTimer(0) {
-  if (TimePassesIsEnabled)
-    ExceptionTimer = new Timer("DWARF Exception Writer");
-}
+    shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
 
-DwarfException::~DwarfException() {
-  delete ExceptionTimer;
-}
+DwarfException::~DwarfException() {}
 
 /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
 /// is shared among many Frame Description Entries.  There is at least one CIE
@@ -897,14 +896,14 @@
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
+  NamedRegionTimer T(EHTimerName, DWARFGroupName);
+
   if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
     return;
 
   if (!shouldEmitMovesModule && !shouldEmitTableModule)
     return;
 
-  TimeRegion Timer(ExceptionTimer);
-
   const std::vector Personalities = MMI->getPersonalities();
 
   for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
@@ -918,7 +917,7 @@
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  TimeRegion Timer(ExceptionTimer);
+  NamedRegionTimer T(EHTimerName, DWARFGroupName);
   shouldEmitTable = shouldEmitMoves = false;
 
   // If any landing pads survive, we need an EH table.
@@ -940,9 +939,9 @@
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
+  NamedRegionTimer T(EHTimerName, DWARFGroupName);
   if (!shouldEmitMoves && !shouldEmitTable) return;
 
-  TimeRegion Timer(ExceptionTimer);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                 Asm->getFunctionNumber()));
 

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h?rev=100616&r1=100615&r2=100616&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Wed Apr  7 04:28:04 2010
@@ -28,7 +28,6 @@
 class MCAsmInfo;
 class MCExpr;
 class MCSymbol;
-class Timer;
 class Function;
 class AsmPrinter;
 
@@ -82,9 +81,6 @@
   /// should be emitted.
   bool shouldEmitMovesModule;
 
-  /// ExceptionTimer - Timer for the Dwarf exception writer.
-  Timer *ExceptionTimer;
-
   /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
   /// that is shared among many Frame Description Entries.  There is at least
   /// one CIE in every non-empty .debug_frame section.




From edwintorok at gmail.com  Wed Apr  7 05:20:40 2010
From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=)
Date: Wed, 07 Apr 2010 13:20:40 +0300
Subject: [llvm-commits] [llvm] r100616 - in
 /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h
 DwarfException.cpp DwarfException.h
In-Reply-To: <20100407092804.B65FE2A6C12C@llvm.org>
References: <20100407092804.B65FE2A6C12C@llvm.org>
Message-ID: <4BBC5C78.6050108@gmail.com>

On 04/07/2010 12:28 PM, Bill Wendling wrote:
> Author: void
> Date: Wed Apr  7 04:28:04 2010
> New Revision: 100616
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=100616&view=rev
> Log:
> Use the "NamedGroupTimer" class to categorize DWARF emission better.

Hi Bill,

Looks like this change broke most of the buildbots, because the dwarf
timers are now shown unconditionally on stderr when llc quits, see for
example:
http://google1.osuosl.org:8011/builders/clang-i686-linux/builds/6270/steps/test-llvm/logs/2007-03-07-combinercrash.ll



> 
> Modified:
>     llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
>     llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h
>     llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp
>     llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h
> 
> Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100616&r1=100615&r2=100616&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
> +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Wed Apr  7 04:28:04 2010
> @@ -39,6 +39,11 @@
>  #include "llvm/System/Path.h"
>  using namespace llvm;
>  
> +namespace {
> +  const char *DWARFGroupName = "DWARF Emission";
> +  const char *DbgTimerName = "DWARF Debug Writer";
> +} // end anonymous namespace
> +
>  //===----------------------------------------------------------------------===//
>  
>  /// Configuration values for initial hash set sizes (log2).
> @@ -305,22 +310,17 @@
>  DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
>    : Asm(A), MMI(Asm->MMI), ModuleCU(0),
>      AbbreviationsSet(InitAbbreviationsSetSize), 
> -    CurrentFnDbgScope(0), DebugTimer(0) {
> +    CurrentFnDbgScope(0) {
>    NextStringPoolNumber = 0;
>        
>    DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
>    DwarfStrSectionSym = TextSectionSym = 0;
>        
> -  if (TimePassesIsEnabled)
> -    DebugTimer = new Timer("Dwarf Debug Writer");

Looks like you should guard the new timers with if (TimePassesIsEnabled).

Best regards,
--Edwin


From edwintorok at gmail.com  Wed Apr  7 05:44:46 2010
From: edwintorok at gmail.com (Torok Edwin)
Date: Wed, 07 Apr 2010 10:44:46 -0000
Subject: [llvm-commits] [llvm] r100618 - in
 /llvm/trunk/lib/CodeGen/AsmPrinter: AsmPrinter.cpp DwarfDebug.cpp
 DwarfException.cpp
Message-ID: <20100407104446.7F8752A6C12C@llvm.org>

Author: edwin
Date: Wed Apr  7 05:44:46 2010
New Revision: 100618

URL: http://llvm.org/viewvc/llvm-project?rev=100618&view=rev
Log:
Workaround the breakage in r100616 by guarding all timers with
TimePassesIsEnabled. This should allow make check to pass.

Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100618&r1=100617&r2=100618&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Wed Apr  7 05:44:46 2010
@@ -1,4 +1,3 @@
-//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -42,8 +41,15 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
 using namespace llvm;
 
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *DbgTimerName = "DWARF Debug Writer";
+  const char *EHTimerName = "DWARF Exception Writer";
+} // end anonymous namespace
+
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
@@ -347,8 +353,22 @@
   }
   
   // Emit pre-function debug and/or EH information.
-  if (DE) DE->BeginFunction(MF);
-  if (DD) DD->beginFunction(MF);
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->BeginFunction(MF);
+    } else {
+      DE->BeginFunction(MF);
+    }
+  }
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->beginFunction(MF);
+    } else {
+      DD->beginFunction(MF);
+    }
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -507,8 +527,14 @@
       
       ++EmittedInsts;
       
-      if (ShouldPrintDebugScopes)
-        DD->beginScope(II);
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->beginScope(II);
+	} else {
+	  DD->beginScope(II);
+	}
+      }
       
       if (isVerbose())
         EmitComments(*II, OutStreamer.GetCommentOS());
@@ -539,8 +565,14 @@
         break;
       }
       
-      if (ShouldPrintDebugScopes)
-        DD->endScope(II);
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->endScope(II);
+	} else {
+	  DD->endScope(II);
+	}
+      }
     }
   }
   
@@ -569,8 +601,22 @@
   }
   
   // Emit post-function debug information.
-  if (DD) DD->endFunction(MF);
-  if (DE) DE->EndFunction();
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endFunction(MF);
+    } else {
+      DD->endFunction(MF);
+    }
+  }
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndFunction();
+    } else {
+      DE->EndFunction();
+    }
+  }
   MMI->EndFunction();
   
   // Print out jump tables referenced by the function.
@@ -588,11 +634,21 @@
   
   // Finalize debug and EH information.
   if (DE) {
-    DE->EndModule();
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndModule();
+    } else {
+      DE->EndModule();
+    }
     delete DE; DE = 0;
   }
   if (DD) {
-    DD->endModule();
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endModule();
+    } else {
+      DD->endModule();
+    }
     delete DD; DD = 0;
   }
   

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100618&r1=100617&r2=100618&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Wed Apr  7 05:44:46 2010
@@ -315,8 +315,13 @@
       
   DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
-      
-  beginModule(M);
+
+  if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      beginModule(M);
+  } else {
+      beginModule(M);
+  }
 }
 DwarfDebug::~DwarfDebug() {
   for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
@@ -1844,8 +1849,6 @@
 /// content. Create global DIEs and emit initial debug info sections.
 /// This is inovked by the target AsmPrinter.
 void DwarfDebug::beginModule(Module *M) {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
-
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
 
@@ -1908,7 +1911,6 @@
 /// endModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::endModule() {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
   if (!ModuleCU) return;
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
@@ -2307,8 +2309,6 @@
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
-
   if (!MMI->hasDebugInfo()) return;
   if (!extractScopeInformation()) return;
   
@@ -2341,8 +2341,6 @@
 /// endFunction - Gather and emit post-function debug information.
 ///
 void DwarfDebug::endFunction(const MachineFunction *MF) {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
-
   if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
 
   if (CurrentFnDbgScope) {
@@ -2389,8 +2387,6 @@
 /// unique label that was emitted and which provides correspondence to
 /// the source line list.
 MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
-
   StringRef Dir;
   StringRef Fn;
 

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp?rev=100618&r1=100617&r2=100618&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Wed Apr  7 05:44:46 2010
@@ -33,17 +33,11 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-namespace {
-  const char *DWARFGroupName = "DWARF Emission";
-  const char *EHTimerName = "DWARF Exception Writer";
-} // end anonymous namespace
-
 DwarfException::DwarfException(AsmPrinter *A)
   : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
     shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
@@ -896,8 +890,6 @@
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
-  NamedRegionTimer T(EHTimerName, DWARFGroupName);
-
   if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
     return;
 
@@ -917,7 +909,6 @@
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  NamedRegionTimer T(EHTimerName, DWARFGroupName);
   shouldEmitTable = shouldEmitMoves = false;
 
   // If any landing pads survive, we need an EH table.
@@ -939,7 +930,6 @@
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
-  NamedRegionTimer T(EHTimerName, DWARFGroupName);
   if (!shouldEmitMoves && !shouldEmitTable) return;
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",




From edwintorok at gmail.com  Wed Apr  7 05:46:47 2010
From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=)
Date: Wed, 07 Apr 2010 13:46:47 +0300
Subject: [llvm-commits] [llvm] r100616 - in
 /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h
 DwarfException.cpp DwarfException.h
In-Reply-To: <4BBC5C78.6050108@gmail.com>
References: <20100407092804.B65FE2A6C12C@llvm.org> <4BBC5C78.6050108@gmail.com>
Message-ID: <4BBC6297.1070400@gmail.com>

On 04/07/2010 01:20 PM, T?r?k Edwin wrote:
> On 04/07/2010 12:28 PM, Bill Wendling wrote:
>> Author: void
>> Date: Wed Apr  7 04:28:04 2010
>> New Revision: 100616
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=100616&view=rev
>> Log:
>> Use the "NamedGroupTimer" class to categorize DWARF emission better.
> 
> Hi Bill,
> 
> Looks like this change broke most of the buildbots, because the dwarf
> timers are now shown unconditionally on stderr when llc quits, see for
> example:
> http://google1.osuosl.org:8011/builders/clang-i686-linux/builds/6270/steps/test-llvm/logs/2007-03-07-combinercrash.ll
> 

I guarded all your timer creations with TimePassesIsEnabled in r100618,
this should unbreak the buildbots.
Please check that the timers still work the way you want.


Best regards,
--Edwin


From benny.kra at googlemail.com  Wed Apr  7 06:23:46 2010
From: benny.kra at googlemail.com (Benjamin Kramer)
Date: Wed, 07 Apr 2010 11:23:46 -0000
Subject: [llvm-commits] [llvm] r100620 - in
 /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h
Message-ID: <20100407112346.CBC762A6C12C@llvm.org>

Author: d0k
Date: Wed Apr  7 06:23:46 2010
New Revision: 100620

URL: http://llvm.org/viewvc/llvm-project?rev=100620&view=rev
Log:
Remove unused method.

Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=100620&r1=100619&r2=100620&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Wed Apr  7 06:23:46 2010
@@ -2414,17 +2414,6 @@
   return Label;
 }
 
-/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-/// timed. Look up the source id with the given directory and source file
-/// names. If none currently exists, create a new id and insert it in the
-/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-/// well.
-unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
-                                         const std::string &FileName) {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
-  return GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
-}
-
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=100620&r1=100619&r2=100620&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Wed Apr  7 06:23:46 2010
@@ -527,14 +527,6 @@
     return Lines.size();
   }
   
-  /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-  /// timed. Look up the source id with the given directory and source file
-  /// names. If none currently exists, create a new id and insert it in the
-  /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-  /// well.
-  unsigned getOrCreateSourceID(const std::string &DirName,
-                               const std::string &FileName);
-  
   /// extractScopeInformation - Scan machine instructions in this function
   /// and collect DbgScopes. Return true, if atleast one scope was found.
   bool extractScopeInformation();




From arplynn at gmail.com  Wed Apr  7 08:14:58 2010
From: arplynn at gmail.com (Alastair Lynn)
Date: Wed, 7 Apr 2010 14:14:58 +0100
Subject: [llvm-commits] Trivial patch to fix an 80 column violation in
	DAGCombiner.cpp
Message-ID: <70DB0CB9-2A51-4D07-823A-8D1C67846D60@gmail.com>

Hi-

Just noticed this as I was looking through.

Alastair

-------------- next part --------------
A non-text attachment was scrubbed...
Name: DAGCombiner-80cols.patch
Type: application/octet-stream
Size: 786 bytes
Desc: not available
Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100407/64002285/attachment.obj 

From bob.wilson at apple.com  Wed Apr  7 10:36:05 2010
From: bob.wilson at apple.com (Bob Wilson)
Date: Wed, 07 Apr 2010 15:36:05 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r100624 -
	/llvm-gcc-4.2/trunk/gcc/c-parser.c
Message-ID: <20100407153605.3D4AB2A6C12C@llvm.org>

Author: bwilson
Date: Wed Apr  7 10:36:05 2010
New Revision: 100624

URL: http://llvm.org/viewvc/llvm-project?rev=100624&view=rev
Log:
Revert my change to disable the pedantic warning about statement expressions.
There is a better way.

Modified:
    llvm-gcc-4.2/trunk/gcc/c-parser.c

Modified: llvm-gcc-4.2/trunk/gcc/c-parser.c
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/c-parser.c?rev=100624&r1=100623&r2=100624&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/c-parser.c (original)
+++ llvm-gcc-4.2/trunk/gcc/c-parser.c Wed Apr  7 10:36:05 2010
@@ -5643,17 +5643,8 @@
 	  c_parser_compound_statement_nostart (parser);
 	  c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
 				     "expected %<)%>");
-/* LLVM LOCAL begin */
-          /* Disable this warning for the sake of ARM NEON intrinsics, which
-             are implemented as macros with statement expressions in llvm-gcc
-             since the inliner is not run later and the arguments need to be
-             visible to the front-end.  Otherwise, it is not possible to
-             compile NEON intrinsics with "-pedantic -Werror".  */
-#ifndef ENABLE_LLVM
 	  if (pedantic)
 	    pedwarn ("ISO C forbids braced-groups within expressions");
-#endif
-/* LLVM LOCAL end */
 	  expr.value = c_finish_stmt_expr (stmt);
 	  expr.original_code = ERROR_MARK;
 	}




From bob.wilson at apple.com  Wed Apr  7 10:37:03 2010
From: bob.wilson at apple.com (Bob Wilson)
Date: Wed, 07 Apr 2010 15:37:03 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r100625 -
	/llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml
Message-ID: <20100407153703.5AD7B2A6C12C@llvm.org>

Author: bwilson
Date: Wed Apr  7 10:37:03 2010
New Revision: 100625

URL: http://llvm.org/viewvc/llvm-project?rev=100625&view=rev
Log:
Add an __extension__ keyword before statement expressions to avoid
warnings in -pedantic mode.

Modified:
    llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml

Modified: llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml?rev=100625&r1=100624&r2=100625&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml (original)
+++ llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml Wed Apr  7 10:37:03 2010
@@ -93,15 +93,16 @@
   | Arity4 (ret, arg0, arg1, arg2, arg3) ->
       Format.printf "%s(__a, __b, __c, __d)" fnname
   end;
-  Format.printf " \\@,";
   let rec print_lines = function
     [] -> ()
   | [line] -> Format.printf "%s; \\" line
   | line::lines -> Format.printf "%s; \\@," line; print_lines lines in
   let print_macro_body = function
-    [] -> ()
-  | [line] -> Format.printf "%s" line
-  | line::lines -> Format.printf "@[({ \\@,%s; \\@," line;
+    [] -> Format.printf " \\@,";
+  | [line] -> Format.printf " \\@,";
+              Format.printf "%s" line
+  | line::lines -> Format.printf " __extension__ \\@,";
+                   Format.printf "@[({ \\@,%s; \\@," line;
                    print_lines lines;
                    Format.printf "@]@, })" in
   print_macro_body body;




From bob.wilson at apple.com  Wed Apr  7 10:37:27 2010
From: bob.wilson at apple.com (Bob Wilson)
Date: Wed, 07 Apr 2010 15:37:27 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r100626 -
	/llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h
Message-ID: <20100407153728.46AD32A6C12C@llvm.org>

Author: bwilson
Date: Wed Apr  7 10:37:27 2010
New Revision: 100626

URL: http://llvm.org/viewvc/llvm-project?rev=100626&view=rev
Log:
Regenerate arm_neon.h.

Modified:
    llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h

Modified: llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h?rev=100626&r1=100625&r2=100626&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h (original)
+++ llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h Wed Apr  7 10:37:27 2010
@@ -514,7 +514,7 @@
 } poly16x8x4_t;
 
 
-#define vadd_s8(__a, __b) \
+#define vadd_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -523,7 +523,7 @@
      __rv.__i; \
    })
 
-#define vadd_s16(__a, __b) \
+#define vadd_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -532,7 +532,7 @@
      __rv.__i; \
    })
 
-#define vadd_s32(__a, __b) \
+#define vadd_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -541,7 +541,7 @@
      __rv.__i; \
    })
 
-#define vadd_s64(__a, __b) \
+#define vadd_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -550,7 +550,7 @@
      __rv.__i; \
    })
 
-#define vadd_f32(__a, __b) \
+#define vadd_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -559,7 +559,7 @@
      __rv.__i; \
    })
 
-#define vadd_u8(__a, __b) \
+#define vadd_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -568,7 +568,7 @@
      __rv.__i; \
    })
 
-#define vadd_u16(__a, __b) \
+#define vadd_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -577,7 +577,7 @@
      __rv.__i; \
    })
 
-#define vadd_u32(__a, __b) \
+#define vadd_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -586,7 +586,7 @@
      __rv.__i; \
    })
 
-#define vadd_u64(__a, __b) \
+#define vadd_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -595,7 +595,7 @@
      __rv.__i; \
    })
 
-#define vaddq_s8(__a, __b) \
+#define vaddq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -604,7 +604,7 @@
      __rv.__i; \
    })
 
-#define vaddq_s16(__a, __b) \
+#define vaddq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -613,7 +613,7 @@
      __rv.__i; \
    })
 
-#define vaddq_s32(__a, __b) \
+#define vaddq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -622,7 +622,7 @@
      __rv.__i; \
    })
 
-#define vaddq_s64(__a, __b) \
+#define vaddq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -631,7 +631,7 @@
      __rv.__i; \
    })
 
-#define vaddq_f32(__a, __b) \
+#define vaddq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -640,7 +640,7 @@
      __rv.__i; \
    })
 
-#define vaddq_u8(__a, __b) \
+#define vaddq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -649,7 +649,7 @@
      __rv.__i; \
    })
 
-#define vaddq_u16(__a, __b) \
+#define vaddq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -658,7 +658,7 @@
      __rv.__i; \
    })
 
-#define vaddq_u32(__a, __b) \
+#define vaddq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -667,7 +667,7 @@
      __rv.__i; \
    })
 
-#define vaddq_u64(__a, __b) \
+#define vaddq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -676,7 +676,7 @@
      __rv.__i; \
    })
 
-#define vaddl_s8(__a, __b) \
+#define vaddl_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -685,7 +685,7 @@
      __rv.__i; \
    })
 
-#define vaddl_s16(__a, __b) \
+#define vaddl_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -694,7 +694,7 @@
      __rv.__i; \
    })
 
-#define vaddl_s32(__a, __b) \
+#define vaddl_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -703,7 +703,7 @@
      __rv.__i; \
    })
 
-#define vaddl_u8(__a, __b) \
+#define vaddl_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -712,7 +712,7 @@
      __rv.__i; \
    })
 
-#define vaddl_u16(__a, __b) \
+#define vaddl_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -721,7 +721,7 @@
      __rv.__i; \
    })
 
-#define vaddl_u32(__a, __b) \
+#define vaddl_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -730,7 +730,7 @@
      __rv.__i; \
    })
 
-#define vaddw_s8(__a, __b) \
+#define vaddw_s8(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -739,7 +739,7 @@
      __rv.__i; \
    })
 
-#define vaddw_s16(__a, __b) \
+#define vaddw_s16(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -748,7 +748,7 @@
      __rv.__i; \
    })
 
-#define vaddw_s32(__a, __b) \
+#define vaddw_s32(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -757,7 +757,7 @@
      __rv.__i; \
    })
 
-#define vaddw_u8(__a, __b) \
+#define vaddw_u8(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -766,7 +766,7 @@
      __rv.__i; \
    })
 
-#define vaddw_u16(__a, __b) \
+#define vaddw_u16(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -775,7 +775,7 @@
      __rv.__i; \
    })
 
-#define vaddw_u32(__a, __b) \
+#define vaddw_u32(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -784,7 +784,7 @@
      __rv.__i; \
    })
 
-#define vhadd_s8(__a, __b) \
+#define vhadd_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -793,7 +793,7 @@
      __rv.__i; \
    })
 
-#define vhadd_s16(__a, __b) \
+#define vhadd_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -802,7 +802,7 @@
      __rv.__i; \
    })
 
-#define vhadd_s32(__a, __b) \
+#define vhadd_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -811,7 +811,7 @@
      __rv.__i; \
    })
 
-#define vhadd_u8(__a, __b) \
+#define vhadd_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -820,7 +820,7 @@
      __rv.__i; \
    })
 
-#define vhadd_u16(__a, __b) \
+#define vhadd_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -829,7 +829,7 @@
      __rv.__i; \
    })
 
-#define vhadd_u32(__a, __b) \
+#define vhadd_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -838,7 +838,7 @@
      __rv.__i; \
    })
 
-#define vhaddq_s8(__a, __b) \
+#define vhaddq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -847,7 +847,7 @@
      __rv.__i; \
    })
 
-#define vhaddq_s16(__a, __b) \
+#define vhaddq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -856,7 +856,7 @@
      __rv.__i; \
    })
 
-#define vhaddq_s32(__a, __b) \
+#define vhaddq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -865,7 +865,7 @@
      __rv.__i; \
    })
 
-#define vhaddq_u8(__a, __b) \
+#define vhaddq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -874,7 +874,7 @@
      __rv.__i; \
    })
 
-#define vhaddq_u16(__a, __b) \
+#define vhaddq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -883,7 +883,7 @@
      __rv.__i; \
    })
 
-#define vhaddq_u32(__a, __b) \
+#define vhaddq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -892,7 +892,7 @@
      __rv.__i; \
    })
 
-#define vrhadd_s8(__a, __b) \
+#define vrhadd_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -901,7 +901,7 @@
      __rv.__i; \
    })
 
-#define vrhadd_s16(__a, __b) \
+#define vrhadd_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -910,7 +910,7 @@
      __rv.__i; \
    })
 
-#define vrhadd_s32(__a, __b) \
+#define vrhadd_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -919,7 +919,7 @@
      __rv.__i; \
    })
 
-#define vrhadd_u8(__a, __b) \
+#define vrhadd_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -928,7 +928,7 @@
      __rv.__i; \
    })
 
-#define vrhadd_u16(__a, __b) \
+#define vrhadd_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -937,7 +937,7 @@
      __rv.__i; \
    })
 
-#define vrhadd_u32(__a, __b) \
+#define vrhadd_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -946,7 +946,7 @@
      __rv.__i; \
    })
 
-#define vrhaddq_s8(__a, __b) \
+#define vrhaddq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -955,7 +955,7 @@
      __rv.__i; \
    })
 
-#define vrhaddq_s16(__a, __b) \
+#define vrhaddq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -964,7 +964,7 @@
      __rv.__i; \
    })
 
-#define vrhaddq_s32(__a, __b) \
+#define vrhaddq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -973,7 +973,7 @@
      __rv.__i; \
    })
 
-#define vrhaddq_u8(__a, __b) \
+#define vrhaddq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -982,7 +982,7 @@
      __rv.__i; \
    })
 
-#define vrhaddq_u16(__a, __b) \
+#define vrhaddq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -991,7 +991,7 @@
      __rv.__i; \
    })
 
-#define vrhaddq_u32(__a, __b) \
+#define vrhaddq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -1000,7 +1000,7 @@
      __rv.__i; \
    })
 
-#define vqadd_s8(__a, __b) \
+#define vqadd_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1009,7 +1009,7 @@
      __rv.__i; \
    })
 
-#define vqadd_s16(__a, __b) \
+#define vqadd_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1018,7 +1018,7 @@
      __rv.__i; \
    })
 
-#define vqadd_s32(__a, __b) \
+#define vqadd_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1027,7 +1027,7 @@
      __rv.__i; \
    })
 
-#define vqadd_s64(__a, __b) \
+#define vqadd_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -1036,7 +1036,7 @@
      __rv.__i; \
    })
 
-#define vqadd_u8(__a, __b) \
+#define vqadd_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -1045,7 +1045,7 @@
      __rv.__i; \
    })
 
-#define vqadd_u16(__a, __b) \
+#define vqadd_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -1054,7 +1054,7 @@
      __rv.__i; \
    })
 
-#define vqadd_u32(__a, __b) \
+#define vqadd_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -1063,7 +1063,7 @@
      __rv.__i; \
    })
 
-#define vqadd_u64(__a, __b) \
+#define vqadd_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -1072,7 +1072,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_s8(__a, __b) \
+#define vqaddq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -1081,7 +1081,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_s16(__a, __b) \
+#define vqaddq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1090,7 +1090,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_s32(__a, __b) \
+#define vqaddq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1099,7 +1099,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_s64(__a, __b) \
+#define vqaddq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -1108,7 +1108,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_u8(__a, __b) \
+#define vqaddq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -1117,7 +1117,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_u16(__a, __b) \
+#define vqaddq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -1126,7 +1126,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_u32(__a, __b) \
+#define vqaddq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -1135,7 +1135,7 @@
      __rv.__i; \
    })
 
-#define vqaddq_u64(__a, __b) \
+#define vqaddq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -1144,7 +1144,7 @@
      __rv.__i; \
    })
 
-#define vaddhn_s16(__a, __b) \
+#define vaddhn_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1153,7 +1153,7 @@
      __rv.__i; \
    })
 
-#define vaddhn_s32(__a, __b) \
+#define vaddhn_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1162,7 +1162,7 @@
      __rv.__i; \
    })
 
-#define vaddhn_s64(__a, __b) \
+#define vaddhn_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -1171,7 +1171,7 @@
      __rv.__i; \
    })
 
-#define vaddhn_u16(__a, __b) \
+#define vaddhn_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -1180,7 +1180,7 @@
      __rv.__i; \
    })
 
-#define vaddhn_u32(__a, __b) \
+#define vaddhn_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -1189,7 +1189,7 @@
      __rv.__i; \
    })
 
-#define vaddhn_u64(__a, __b) \
+#define vaddhn_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -1198,7 +1198,7 @@
      __rv.__i; \
    })
 
-#define vraddhn_s16(__a, __b) \
+#define vraddhn_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1207,7 +1207,7 @@
      __rv.__i; \
    })
 
-#define vraddhn_s32(__a, __b) \
+#define vraddhn_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1216,7 +1216,7 @@
      __rv.__i; \
    })
 
-#define vraddhn_s64(__a, __b) \
+#define vraddhn_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -1225,7 +1225,7 @@
      __rv.__i; \
    })
 
-#define vraddhn_u16(__a, __b) \
+#define vraddhn_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -1234,7 +1234,7 @@
      __rv.__i; \
    })
 
-#define vraddhn_u32(__a, __b) \
+#define vraddhn_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -1243,7 +1243,7 @@
      __rv.__i; \
    })
 
-#define vraddhn_u64(__a, __b) \
+#define vraddhn_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -1252,7 +1252,7 @@
      __rv.__i; \
    })
 
-#define vmul_s8(__a, __b) \
+#define vmul_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1261,7 +1261,7 @@
      __rv.__i; \
    })
 
-#define vmul_s16(__a, __b) \
+#define vmul_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1270,7 +1270,7 @@
      __rv.__i; \
    })
 
-#define vmul_s32(__a, __b) \
+#define vmul_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1279,7 +1279,7 @@
      __rv.__i; \
    })
 
-#define vmul_f32(__a, __b) \
+#define vmul_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -1288,7 +1288,7 @@
      __rv.__i; \
    })
 
-#define vmul_u8(__a, __b) \
+#define vmul_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -1297,7 +1297,7 @@
      __rv.__i; \
    })
 
-#define vmul_u16(__a, __b) \
+#define vmul_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -1306,7 +1306,7 @@
      __rv.__i; \
    })
 
-#define vmul_u32(__a, __b) \
+#define vmul_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -1315,7 +1315,7 @@
      __rv.__i; \
    })
 
-#define vmul_p8(__a, __b) \
+#define vmul_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -1324,7 +1324,7 @@
      __rv.__i; \
    })
 
-#define vmulq_s8(__a, __b) \
+#define vmulq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -1333,7 +1333,7 @@
      __rv.__i; \
    })
 
-#define vmulq_s16(__a, __b) \
+#define vmulq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1342,7 +1342,7 @@
      __rv.__i; \
    })
 
-#define vmulq_s32(__a, __b) \
+#define vmulq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1351,7 +1351,7 @@
      __rv.__i; \
    })
 
-#define vmulq_f32(__a, __b) \
+#define vmulq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -1360,7 +1360,7 @@
      __rv.__i; \
    })
 
-#define vmulq_u8(__a, __b) \
+#define vmulq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -1369,7 +1369,7 @@
      __rv.__i; \
    })
 
-#define vmulq_u16(__a, __b) \
+#define vmulq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -1378,7 +1378,7 @@
      __rv.__i; \
    })
 
-#define vmulq_u32(__a, __b) \
+#define vmulq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -1387,7 +1387,7 @@
      __rv.__i; \
    })
 
-#define vmulq_p8(__a, __b) \
+#define vmulq_p8(__a, __b) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -1396,7 +1396,7 @@
      __rv.__i; \
    })
 
-#define vqdmulh_s16(__a, __b) \
+#define vqdmulh_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1405,7 +1405,7 @@
      __rv.__i; \
    })
 
-#define vqdmulh_s32(__a, __b) \
+#define vqdmulh_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1414,7 +1414,7 @@
      __rv.__i; \
    })
 
-#define vqdmulhq_s16(__a, __b) \
+#define vqdmulhq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1423,7 +1423,7 @@
      __rv.__i; \
    })
 
-#define vqdmulhq_s32(__a, __b) \
+#define vqdmulhq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1432,7 +1432,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulh_s16(__a, __b) \
+#define vqrdmulh_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1441,7 +1441,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulh_s32(__a, __b) \
+#define vqrdmulh_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1450,7 +1450,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulhq_s16(__a, __b) \
+#define vqrdmulhq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1459,7 +1459,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulhq_s32(__a, __b) \
+#define vqrdmulhq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1468,7 +1468,7 @@
      __rv.__i; \
    })
 
-#define vmull_s8(__a, __b) \
+#define vmull_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1477,7 +1477,7 @@
      __rv.__i; \
    })
 
-#define vmull_s16(__a, __b) \
+#define vmull_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1486,7 +1486,7 @@
      __rv.__i; \
    })
 
-#define vmull_s32(__a, __b) \
+#define vmull_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1495,7 +1495,7 @@
      __rv.__i; \
    })
 
-#define vmull_u8(__a, __b) \
+#define vmull_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -1504,7 +1504,7 @@
      __rv.__i; \
    })
 
-#define vmull_u16(__a, __b) \
+#define vmull_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -1513,7 +1513,7 @@
      __rv.__i; \
    })
 
-#define vmull_u32(__a, __b) \
+#define vmull_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -1522,7 +1522,7 @@
      __rv.__i; \
    })
 
-#define vmull_p8(__a, __b) \
+#define vmull_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -1531,7 +1531,7 @@
      __rv.__i; \
    })
 
-#define vqdmull_s16(__a, __b) \
+#define vqdmull_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1540,7 +1540,7 @@
      __rv.__i; \
    })
 
-#define vqdmull_s32(__a, __b) \
+#define vqdmull_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1549,7 +1549,7 @@
      __rv.__i; \
    })
 
-#define vmla_s8(__a, __b, __c) \
+#define vmla_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1559,7 +1559,7 @@
      __rv.__i; \
    })
 
-#define vmla_s16(__a, __b, __c) \
+#define vmla_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1569,7 +1569,7 @@
      __rv.__i; \
    })
 
-#define vmla_s32(__a, __b, __c) \
+#define vmla_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1579,7 +1579,7 @@
      __rv.__i; \
    })
 
-#define vmla_f32(__a, __b, __c) \
+#define vmla_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -1589,7 +1589,7 @@
      __rv.__i; \
    })
 
-#define vmla_u8(__a, __b, __c) \
+#define vmla_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -1599,7 +1599,7 @@
      __rv.__i; \
    })
 
-#define vmla_u16(__a, __b, __c) \
+#define vmla_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -1609,7 +1609,7 @@
      __rv.__i; \
    })
 
-#define vmla_u32(__a, __b, __c) \
+#define vmla_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -1619,7 +1619,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_s8(__a, __b, __c) \
+#define vmlaq_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -1629,7 +1629,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_s16(__a, __b, __c) \
+#define vmlaq_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1639,7 +1639,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_s32(__a, __b, __c) \
+#define vmlaq_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1649,7 +1649,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_f32(__a, __b, __c) \
+#define vmlaq_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -1659,7 +1659,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_u8(__a, __b, __c) \
+#define vmlaq_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -1669,7 +1669,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_u16(__a, __b, __c) \
+#define vmlaq_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -1679,7 +1679,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_u32(__a, __b, __c) \
+#define vmlaq_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -1689,7 +1689,7 @@
      __rv.__i; \
    })
 
-#define vmlal_s8(__a, __b, __c) \
+#define vmlal_s8(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1699,7 +1699,7 @@
      __rv.__i; \
    })
 
-#define vmlal_s16(__a, __b, __c) \
+#define vmlal_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1709,7 +1709,7 @@
      __rv.__i; \
    })
 
-#define vmlal_s32(__a, __b, __c) \
+#define vmlal_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1719,7 +1719,7 @@
      __rv.__i; \
    })
 
-#define vmlal_u8(__a, __b, __c) \
+#define vmlal_u8(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -1729,7 +1729,7 @@
      __rv.__i; \
    })
 
-#define vmlal_u16(__a, __b, __c) \
+#define vmlal_u16(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -1739,7 +1739,7 @@
      __rv.__i; \
    })
 
-#define vmlal_u32(__a, __b, __c) \
+#define vmlal_u32(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -1749,7 +1749,7 @@
      __rv.__i; \
    })
 
-#define vqdmlal_s16(__a, __b, __c) \
+#define vqdmlal_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1759,7 +1759,7 @@
      __rv.__i; \
    })
 
-#define vqdmlal_s32(__a, __b, __c) \
+#define vqdmlal_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1769,7 +1769,7 @@
      __rv.__i; \
    })
 
-#define vmls_s8(__a, __b, __c) \
+#define vmls_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1779,7 +1779,7 @@
      __rv.__i; \
    })
 
-#define vmls_s16(__a, __b, __c) \
+#define vmls_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1789,7 +1789,7 @@
      __rv.__i; \
    })
 
-#define vmls_s32(__a, __b, __c) \
+#define vmls_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1799,7 +1799,7 @@
      __rv.__i; \
    })
 
-#define vmls_f32(__a, __b, __c) \
+#define vmls_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -1809,7 +1809,7 @@
      __rv.__i; \
    })
 
-#define vmls_u8(__a, __b, __c) \
+#define vmls_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -1819,7 +1819,7 @@
      __rv.__i; \
    })
 
-#define vmls_u16(__a, __b, __c) \
+#define vmls_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -1829,7 +1829,7 @@
      __rv.__i; \
    })
 
-#define vmls_u32(__a, __b, __c) \
+#define vmls_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -1839,7 +1839,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_s8(__a, __b, __c) \
+#define vmlsq_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -1849,7 +1849,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_s16(__a, __b, __c) \
+#define vmlsq_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -1859,7 +1859,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_s32(__a, __b, __c) \
+#define vmlsq_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -1869,7 +1869,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_f32(__a, __b, __c) \
+#define vmlsq_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -1879,7 +1879,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_u8(__a, __b, __c) \
+#define vmlsq_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -1889,7 +1889,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_u16(__a, __b, __c) \
+#define vmlsq_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -1899,7 +1899,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_u32(__a, __b, __c) \
+#define vmlsq_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -1909,7 +1909,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_s8(__a, __b, __c) \
+#define vmlsl_s8(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1919,7 +1919,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_s16(__a, __b, __c) \
+#define vmlsl_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1929,7 +1929,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_s32(__a, __b, __c) \
+#define vmlsl_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1939,7 +1939,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_u8(__a, __b, __c) \
+#define vmlsl_u8(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -1949,7 +1949,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_u16(__a, __b, __c) \
+#define vmlsl_u16(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -1959,7 +1959,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_u32(__a, __b, __c) \
+#define vmlsl_u32(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -1969,7 +1969,7 @@
      __rv.__i; \
    })
 
-#define vqdmlsl_s16(__a, __b, __c) \
+#define vqdmlsl_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -1979,7 +1979,7 @@
      __rv.__i; \
    })
 
-#define vqdmlsl_s32(__a, __b, __c) \
+#define vqdmlsl_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -1989,7 +1989,7 @@
      __rv.__i; \
    })
 
-#define vsub_s8(__a, __b) \
+#define vsub_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -1998,7 +1998,7 @@
      __rv.__i; \
    })
 
-#define vsub_s16(__a, __b) \
+#define vsub_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2007,7 +2007,7 @@
      __rv.__i; \
    })
 
-#define vsub_s32(__a, __b) \
+#define vsub_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2016,7 +2016,7 @@
      __rv.__i; \
    })
 
-#define vsub_s64(__a, __b) \
+#define vsub_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -2025,7 +2025,7 @@
      __rv.__i; \
    })
 
-#define vsub_f32(__a, __b) \
+#define vsub_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -2034,7 +2034,7 @@
      __rv.__i; \
    })
 
-#define vsub_u8(__a, __b) \
+#define vsub_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2043,7 +2043,7 @@
      __rv.__i; \
    })
 
-#define vsub_u16(__a, __b) \
+#define vsub_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2052,7 +2052,7 @@
      __rv.__i; \
    })
 
-#define vsub_u32(__a, __b) \
+#define vsub_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2061,7 +2061,7 @@
      __rv.__i; \
    })
 
-#define vsub_u64(__a, __b) \
+#define vsub_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -2070,7 +2070,7 @@
      __rv.__i; \
    })
 
-#define vsubq_s8(__a, __b) \
+#define vsubq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -2079,7 +2079,7 @@
      __rv.__i; \
    })
 
-#define vsubq_s16(__a, __b) \
+#define vsubq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2088,7 +2088,7 @@
      __rv.__i; \
    })
 
-#define vsubq_s32(__a, __b) \
+#define vsubq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2097,7 +2097,7 @@
      __rv.__i; \
    })
 
-#define vsubq_s64(__a, __b) \
+#define vsubq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -2106,7 +2106,7 @@
      __rv.__i; \
    })
 
-#define vsubq_f32(__a, __b) \
+#define vsubq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -2115,7 +2115,7 @@
      __rv.__i; \
    })
 
-#define vsubq_u8(__a, __b) \
+#define vsubq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -2124,7 +2124,7 @@
      __rv.__i; \
    })
 
-#define vsubq_u16(__a, __b) \
+#define vsubq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -2133,7 +2133,7 @@
      __rv.__i; \
    })
 
-#define vsubq_u32(__a, __b) \
+#define vsubq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -2142,7 +2142,7 @@
      __rv.__i; \
    })
 
-#define vsubq_u64(__a, __b) \
+#define vsubq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -2151,7 +2151,7 @@
      __rv.__i; \
    })
 
-#define vsubl_s8(__a, __b) \
+#define vsubl_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -2160,7 +2160,7 @@
      __rv.__i; \
    })
 
-#define vsubl_s16(__a, __b) \
+#define vsubl_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2169,7 +2169,7 @@
      __rv.__i; \
    })
 
-#define vsubl_s32(__a, __b) \
+#define vsubl_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2178,7 +2178,7 @@
      __rv.__i; \
    })
 
-#define vsubl_u8(__a, __b) \
+#define vsubl_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2187,7 +2187,7 @@
      __rv.__i; \
    })
 
-#define vsubl_u16(__a, __b) \
+#define vsubl_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2196,7 +2196,7 @@
      __rv.__i; \
    })
 
-#define vsubl_u32(__a, __b) \
+#define vsubl_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2205,7 +2205,7 @@
      __rv.__i; \
    })
 
-#define vsubw_s8(__a, __b) \
+#define vsubw_s8(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -2214,7 +2214,7 @@
      __rv.__i; \
    })
 
-#define vsubw_s16(__a, __b) \
+#define vsubw_s16(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2223,7 +2223,7 @@
      __rv.__i; \
    })
 
-#define vsubw_s32(__a, __b) \
+#define vsubw_s32(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2232,7 +2232,7 @@
      __rv.__i; \
    })
 
-#define vsubw_u8(__a, __b) \
+#define vsubw_u8(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2241,7 +2241,7 @@
      __rv.__i; \
    })
 
-#define vsubw_u16(__a, __b) \
+#define vsubw_u16(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2250,7 +2250,7 @@
      __rv.__i; \
    })
 
-#define vsubw_u32(__a, __b) \
+#define vsubw_u32(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2259,7 +2259,7 @@
      __rv.__i; \
    })
 
-#define vhsub_s8(__a, __b) \
+#define vhsub_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -2268,7 +2268,7 @@
      __rv.__i; \
    })
 
-#define vhsub_s16(__a, __b) \
+#define vhsub_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2277,7 +2277,7 @@
      __rv.__i; \
    })
 
-#define vhsub_s32(__a, __b) \
+#define vhsub_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2286,7 +2286,7 @@
      __rv.__i; \
    })
 
-#define vhsub_u8(__a, __b) \
+#define vhsub_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2295,7 +2295,7 @@
      __rv.__i; \
    })
 
-#define vhsub_u16(__a, __b) \
+#define vhsub_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2304,7 +2304,7 @@
      __rv.__i; \
    })
 
-#define vhsub_u32(__a, __b) \
+#define vhsub_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2313,7 +2313,7 @@
      __rv.__i; \
    })
 
-#define vhsubq_s8(__a, __b) \
+#define vhsubq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -2322,7 +2322,7 @@
      __rv.__i; \
    })
 
-#define vhsubq_s16(__a, __b) \
+#define vhsubq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2331,7 +2331,7 @@
      __rv.__i; \
    })
 
-#define vhsubq_s32(__a, __b) \
+#define vhsubq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2340,7 +2340,7 @@
      __rv.__i; \
    })
 
-#define vhsubq_u8(__a, __b) \
+#define vhsubq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -2349,7 +2349,7 @@
      __rv.__i; \
    })
 
-#define vhsubq_u16(__a, __b) \
+#define vhsubq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -2358,7 +2358,7 @@
      __rv.__i; \
    })
 
-#define vhsubq_u32(__a, __b) \
+#define vhsubq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -2367,7 +2367,7 @@
      __rv.__i; \
    })
 
-#define vqsub_s8(__a, __b) \
+#define vqsub_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -2376,7 +2376,7 @@
      __rv.__i; \
    })
 
-#define vqsub_s16(__a, __b) \
+#define vqsub_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2385,7 +2385,7 @@
      __rv.__i; \
    })
 
-#define vqsub_s32(__a, __b) \
+#define vqsub_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2394,7 +2394,7 @@
      __rv.__i; \
    })
 
-#define vqsub_s64(__a, __b) \
+#define vqsub_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -2403,7 +2403,7 @@
      __rv.__i; \
    })
 
-#define vqsub_u8(__a, __b) \
+#define vqsub_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2412,7 +2412,7 @@
      __rv.__i; \
    })
 
-#define vqsub_u16(__a, __b) \
+#define vqsub_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2421,7 +2421,7 @@
      __rv.__i; \
    })
 
-#define vqsub_u32(__a, __b) \
+#define vqsub_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2430,7 +2430,7 @@
      __rv.__i; \
    })
 
-#define vqsub_u64(__a, __b) \
+#define vqsub_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -2439,7 +2439,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_s8(__a, __b) \
+#define vqsubq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -2448,7 +2448,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_s16(__a, __b) \
+#define vqsubq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2457,7 +2457,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_s32(__a, __b) \
+#define vqsubq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2466,7 +2466,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_s64(__a, __b) \
+#define vqsubq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -2475,7 +2475,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_u8(__a, __b) \
+#define vqsubq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -2484,7 +2484,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_u16(__a, __b) \
+#define vqsubq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -2493,7 +2493,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_u32(__a, __b) \
+#define vqsubq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -2502,7 +2502,7 @@
      __rv.__i; \
    })
 
-#define vqsubq_u64(__a, __b) \
+#define vqsubq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -2511,7 +2511,7 @@
      __rv.__i; \
    })
 
-#define vsubhn_s16(__a, __b) \
+#define vsubhn_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2520,7 +2520,7 @@
      __rv.__i; \
    })
 
-#define vsubhn_s32(__a, __b) \
+#define vsubhn_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2529,7 +2529,7 @@
      __rv.__i; \
    })
 
-#define vsubhn_s64(__a, __b) \
+#define vsubhn_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -2538,7 +2538,7 @@
      __rv.__i; \
    })
 
-#define vsubhn_u16(__a, __b) \
+#define vsubhn_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -2547,7 +2547,7 @@
      __rv.__i; \
    })
 
-#define vsubhn_u32(__a, __b) \
+#define vsubhn_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -2556,7 +2556,7 @@
      __rv.__i; \
    })
 
-#define vsubhn_u64(__a, __b) \
+#define vsubhn_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -2565,7 +2565,7 @@
      __rv.__i; \
    })
 
-#define vrsubhn_s16(__a, __b) \
+#define vrsubhn_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2574,7 +2574,7 @@
      __rv.__i; \
    })
 
-#define vrsubhn_s32(__a, __b) \
+#define vrsubhn_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2583,7 +2583,7 @@
      __rv.__i; \
    })
 
-#define vrsubhn_s64(__a, __b) \
+#define vrsubhn_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -2592,7 +2592,7 @@
      __rv.__i; \
    })
 
-#define vrsubhn_u16(__a, __b) \
+#define vrsubhn_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -2601,7 +2601,7 @@
      __rv.__i; \
    })
 
-#define vrsubhn_u32(__a, __b) \
+#define vrsubhn_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -2610,7 +2610,7 @@
      __rv.__i; \
    })
 
-#define vrsubhn_u64(__a, __b) \
+#define vrsubhn_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -2619,7 +2619,7 @@
      __rv.__i; \
    })
 
-#define vceq_s8(__a, __b) \
+#define vceq_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -2628,7 +2628,7 @@
      __rv.__i; \
    })
 
-#define vceq_s16(__a, __b) \
+#define vceq_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2637,7 +2637,7 @@
      __rv.__i; \
    })
 
-#define vceq_s32(__a, __b) \
+#define vceq_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2646,7 +2646,7 @@
      __rv.__i; \
    })
 
-#define vceq_f32(__a, __b) \
+#define vceq_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -2655,7 +2655,7 @@
      __rv.__i; \
    })
 
-#define vceq_u8(__a, __b) \
+#define vceq_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2664,7 +2664,7 @@
      __rv.__i; \
    })
 
-#define vceq_u16(__a, __b) \
+#define vceq_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2673,7 +2673,7 @@
      __rv.__i; \
    })
 
-#define vceq_u32(__a, __b) \
+#define vceq_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2682,7 +2682,7 @@
      __rv.__i; \
    })
 
-#define vceq_p8(__a, __b) \
+#define vceq_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -2691,7 +2691,7 @@
      __rv.__i; \
    })
 
-#define vceqq_s8(__a, __b) \
+#define vceqq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -2700,7 +2700,7 @@
      __rv.__i; \
    })
 
-#define vceqq_s16(__a, __b) \
+#define vceqq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2709,7 +2709,7 @@
      __rv.__i; \
    })
 
-#define vceqq_s32(__a, __b) \
+#define vceqq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2718,7 +2718,7 @@
      __rv.__i; \
    })
 
-#define vceqq_f32(__a, __b) \
+#define vceqq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -2727,7 +2727,7 @@
      __rv.__i; \
    })
 
-#define vceqq_u8(__a, __b) \
+#define vceqq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -2736,7 +2736,7 @@
      __rv.__i; \
    })
 
-#define vceqq_u16(__a, __b) \
+#define vceqq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -2745,7 +2745,7 @@
      __rv.__i; \
    })
 
-#define vceqq_u32(__a, __b) \
+#define vceqq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -2754,7 +2754,7 @@
      __rv.__i; \
    })
 
-#define vceqq_p8(__a, __b) \
+#define vceqq_p8(__a, __b) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -2763,7 +2763,7 @@
      __rv.__i; \
    })
 
-#define vcge_s8(__a, __b) \
+#define vcge_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -2772,7 +2772,7 @@
      __rv.__i; \
    })
 
-#define vcge_s16(__a, __b) \
+#define vcge_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2781,7 +2781,7 @@
      __rv.__i; \
    })
 
-#define vcge_s32(__a, __b) \
+#define vcge_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2790,7 +2790,7 @@
      __rv.__i; \
    })
 
-#define vcge_f32(__a, __b) \
+#define vcge_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -2799,7 +2799,7 @@
      __rv.__i; \
    })
 
-#define vcge_u8(__a, __b) \
+#define vcge_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2808,7 +2808,7 @@
      __rv.__i; \
    })
 
-#define vcge_u16(__a, __b) \
+#define vcge_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2817,7 +2817,7 @@
      __rv.__i; \
    })
 
-#define vcge_u32(__a, __b) \
+#define vcge_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2826,7 +2826,7 @@
      __rv.__i; \
    })
 
-#define vcgeq_s8(__a, __b) \
+#define vcgeq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -2835,7 +2835,7 @@
      __rv.__i; \
    })
 
-#define vcgeq_s16(__a, __b) \
+#define vcgeq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2844,7 +2844,7 @@
      __rv.__i; \
    })
 
-#define vcgeq_s32(__a, __b) \
+#define vcgeq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2853,7 +2853,7 @@
      __rv.__i; \
    })
 
-#define vcgeq_f32(__a, __b) \
+#define vcgeq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -2862,7 +2862,7 @@
      __rv.__i; \
    })
 
-#define vcgeq_u8(__a, __b) \
+#define vcgeq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -2871,7 +2871,7 @@
      __rv.__i; \
    })
 
-#define vcgeq_u16(__a, __b) \
+#define vcgeq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -2880,7 +2880,7 @@
      __rv.__i; \
    })
 
-#define vcgeq_u32(__a, __b) \
+#define vcgeq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -2889,7 +2889,7 @@
      __rv.__i; \
    })
 
-#define vcle_s8(__a, __b) \
+#define vcle_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -2898,7 +2898,7 @@
      __rv.__i; \
    })
 
-#define vcle_s16(__a, __b) \
+#define vcle_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -2907,7 +2907,7 @@
      __rv.__i; \
    })
 
-#define vcle_s32(__a, __b) \
+#define vcle_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -2916,7 +2916,7 @@
      __rv.__i; \
    })
 
-#define vcle_f32(__a, __b) \
+#define vcle_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -2925,7 +2925,7 @@
      __rv.__i; \
    })
 
-#define vcle_u8(__a, __b) \
+#define vcle_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -2934,7 +2934,7 @@
      __rv.__i; \
    })
 
-#define vcle_u16(__a, __b) \
+#define vcle_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -2943,7 +2943,7 @@
      __rv.__i; \
    })
 
-#define vcle_u32(__a, __b) \
+#define vcle_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -2952,7 +2952,7 @@
      __rv.__i; \
    })
 
-#define vcleq_s8(__a, __b) \
+#define vcleq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -2961,7 +2961,7 @@
      __rv.__i; \
    })
 
-#define vcleq_s16(__a, __b) \
+#define vcleq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -2970,7 +2970,7 @@
      __rv.__i; \
    })
 
-#define vcleq_s32(__a, __b) \
+#define vcleq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -2979,7 +2979,7 @@
      __rv.__i; \
    })
 
-#define vcleq_f32(__a, __b) \
+#define vcleq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -2988,7 +2988,7 @@
      __rv.__i; \
    })
 
-#define vcleq_u8(__a, __b) \
+#define vcleq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -2997,7 +2997,7 @@
      __rv.__i; \
    })
 
-#define vcleq_u16(__a, __b) \
+#define vcleq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -3006,7 +3006,7 @@
      __rv.__i; \
    })
 
-#define vcleq_u32(__a, __b) \
+#define vcleq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -3015,7 +3015,7 @@
      __rv.__i; \
    })
 
-#define vcgt_s8(__a, __b) \
+#define vcgt_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3024,7 +3024,7 @@
      __rv.__i; \
    })
 
-#define vcgt_s16(__a, __b) \
+#define vcgt_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3033,7 +3033,7 @@
      __rv.__i; \
    })
 
-#define vcgt_s32(__a, __b) \
+#define vcgt_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3042,7 +3042,7 @@
      __rv.__i; \
    })
 
-#define vcgt_f32(__a, __b) \
+#define vcgt_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3051,7 +3051,7 @@
      __rv.__i; \
    })
 
-#define vcgt_u8(__a, __b) \
+#define vcgt_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3060,7 +3060,7 @@
      __rv.__i; \
    })
 
-#define vcgt_u16(__a, __b) \
+#define vcgt_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3069,7 +3069,7 @@
      __rv.__i; \
    })
 
-#define vcgt_u32(__a, __b) \
+#define vcgt_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3078,7 +3078,7 @@
      __rv.__i; \
    })
 
-#define vcgtq_s8(__a, __b) \
+#define vcgtq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -3087,7 +3087,7 @@
      __rv.__i; \
    })
 
-#define vcgtq_s16(__a, __b) \
+#define vcgtq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -3096,7 +3096,7 @@
      __rv.__i; \
    })
 
-#define vcgtq_s32(__a, __b) \
+#define vcgtq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -3105,7 +3105,7 @@
      __rv.__i; \
    })
 
-#define vcgtq_f32(__a, __b) \
+#define vcgtq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3114,7 +3114,7 @@
      __rv.__i; \
    })
 
-#define vcgtq_u8(__a, __b) \
+#define vcgtq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -3123,7 +3123,7 @@
      __rv.__i; \
    })
 
-#define vcgtq_u16(__a, __b) \
+#define vcgtq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -3132,7 +3132,7 @@
      __rv.__i; \
    })
 
-#define vcgtq_u32(__a, __b) \
+#define vcgtq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -3141,7 +3141,7 @@
      __rv.__i; \
    })
 
-#define vclt_s8(__a, __b) \
+#define vclt_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3150,7 +3150,7 @@
      __rv.__i; \
    })
 
-#define vclt_s16(__a, __b) \
+#define vclt_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3159,7 +3159,7 @@
      __rv.__i; \
    })
 
-#define vclt_s32(__a, __b) \
+#define vclt_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3168,7 +3168,7 @@
      __rv.__i; \
    })
 
-#define vclt_f32(__a, __b) \
+#define vclt_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3177,7 +3177,7 @@
      __rv.__i; \
    })
 
-#define vclt_u8(__a, __b) \
+#define vclt_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3186,7 +3186,7 @@
      __rv.__i; \
    })
 
-#define vclt_u16(__a, __b) \
+#define vclt_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3195,7 +3195,7 @@
      __rv.__i; \
    })
 
-#define vclt_u32(__a, __b) \
+#define vclt_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3204,7 +3204,7 @@
      __rv.__i; \
    })
 
-#define vcltq_s8(__a, __b) \
+#define vcltq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -3213,7 +3213,7 @@
      __rv.__i; \
    })
 
-#define vcltq_s16(__a, __b) \
+#define vcltq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -3222,7 +3222,7 @@
      __rv.__i; \
    })
 
-#define vcltq_s32(__a, __b) \
+#define vcltq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -3231,7 +3231,7 @@
      __rv.__i; \
    })
 
-#define vcltq_f32(__a, __b) \
+#define vcltq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3240,7 +3240,7 @@
      __rv.__i; \
    })
 
-#define vcltq_u8(__a, __b) \
+#define vcltq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -3249,7 +3249,7 @@
      __rv.__i; \
    })
 
-#define vcltq_u16(__a, __b) \
+#define vcltq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -3258,7 +3258,7 @@
      __rv.__i; \
    })
 
-#define vcltq_u32(__a, __b) \
+#define vcltq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -3267,7 +3267,7 @@
      __rv.__i; \
    })
 
-#define vcage_f32(__a, __b) \
+#define vcage_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3276,7 +3276,7 @@
      __rv.__i; \
    })
 
-#define vcageq_f32(__a, __b) \
+#define vcageq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3285,7 +3285,7 @@
      __rv.__i; \
    })
 
-#define vcale_f32(__a, __b) \
+#define vcale_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3294,7 +3294,7 @@
      __rv.__i; \
    })
 
-#define vcaleq_f32(__a, __b) \
+#define vcaleq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3303,7 +3303,7 @@
      __rv.__i; \
    })
 
-#define vcagt_f32(__a, __b) \
+#define vcagt_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3312,7 +3312,7 @@
      __rv.__i; \
    })
 
-#define vcagtq_f32(__a, __b) \
+#define vcagtq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3321,7 +3321,7 @@
      __rv.__i; \
    })
 
-#define vcalt_f32(__a, __b) \
+#define vcalt_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3330,7 +3330,7 @@
      __rv.__i; \
    })
 
-#define vcaltq_f32(__a, __b) \
+#define vcaltq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3339,7 +3339,7 @@
      __rv.__i; \
    })
 
-#define vtst_s8(__a, __b) \
+#define vtst_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3348,7 +3348,7 @@
      __rv.__i; \
    })
 
-#define vtst_s16(__a, __b) \
+#define vtst_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3357,7 +3357,7 @@
      __rv.__i; \
    })
 
-#define vtst_s32(__a, __b) \
+#define vtst_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3366,7 +3366,7 @@
      __rv.__i; \
    })
 
-#define vtst_u8(__a, __b) \
+#define vtst_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3375,7 +3375,7 @@
      __rv.__i; \
    })
 
-#define vtst_u16(__a, __b) \
+#define vtst_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3384,7 +3384,7 @@
      __rv.__i; \
    })
 
-#define vtst_u32(__a, __b) \
+#define vtst_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3393,7 +3393,7 @@
      __rv.__i; \
    })
 
-#define vtst_p8(__a, __b) \
+#define vtst_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -3402,7 +3402,7 @@
      __rv.__i; \
    })
 
-#define vtstq_s8(__a, __b) \
+#define vtstq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -3411,7 +3411,7 @@
      __rv.__i; \
    })
 
-#define vtstq_s16(__a, __b) \
+#define vtstq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -3420,7 +3420,7 @@
      __rv.__i; \
    })
 
-#define vtstq_s32(__a, __b) \
+#define vtstq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -3429,7 +3429,7 @@
      __rv.__i; \
    })
 
-#define vtstq_u8(__a, __b) \
+#define vtstq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -3438,7 +3438,7 @@
      __rv.__i; \
    })
 
-#define vtstq_u16(__a, __b) \
+#define vtstq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -3447,7 +3447,7 @@
      __rv.__i; \
    })
 
-#define vtstq_u32(__a, __b) \
+#define vtstq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -3456,7 +3456,7 @@
      __rv.__i; \
    })
 
-#define vtstq_p8(__a, __b) \
+#define vtstq_p8(__a, __b) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -3465,7 +3465,7 @@
      __rv.__i; \
    })
 
-#define vabd_s8(__a, __b) \
+#define vabd_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3474,7 +3474,7 @@
      __rv.__i; \
    })
 
-#define vabd_s16(__a, __b) \
+#define vabd_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3483,7 +3483,7 @@
      __rv.__i; \
    })
 
-#define vabd_s32(__a, __b) \
+#define vabd_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3492,7 +3492,7 @@
      __rv.__i; \
    })
 
-#define vabd_f32(__a, __b) \
+#define vabd_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3501,7 +3501,7 @@
      __rv.__i; \
    })
 
-#define vabd_u8(__a, __b) \
+#define vabd_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3510,7 +3510,7 @@
      __rv.__i; \
    })
 
-#define vabd_u16(__a, __b) \
+#define vabd_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3519,7 +3519,7 @@
      __rv.__i; \
    })
 
-#define vabd_u32(__a, __b) \
+#define vabd_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3528,7 +3528,7 @@
      __rv.__i; \
    })
 
-#define vabdq_s8(__a, __b) \
+#define vabdq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -3537,7 +3537,7 @@
      __rv.__i; \
    })
 
-#define vabdq_s16(__a, __b) \
+#define vabdq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -3546,7 +3546,7 @@
      __rv.__i; \
    })
 
-#define vabdq_s32(__a, __b) \
+#define vabdq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -3555,7 +3555,7 @@
      __rv.__i; \
    })
 
-#define vabdq_f32(__a, __b) \
+#define vabdq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3564,7 +3564,7 @@
      __rv.__i; \
    })
 
-#define vabdq_u8(__a, __b) \
+#define vabdq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -3573,7 +3573,7 @@
      __rv.__i; \
    })
 
-#define vabdq_u16(__a, __b) \
+#define vabdq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -3582,7 +3582,7 @@
      __rv.__i; \
    })
 
-#define vabdq_u32(__a, __b) \
+#define vabdq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -3591,7 +3591,7 @@
      __rv.__i; \
    })
 
-#define vabdl_s8(__a, __b) \
+#define vabdl_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3600,7 +3600,7 @@
      __rv.__i; \
    })
 
-#define vabdl_s16(__a, __b) \
+#define vabdl_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3609,7 +3609,7 @@
      __rv.__i; \
    })
 
-#define vabdl_s32(__a, __b) \
+#define vabdl_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3618,7 +3618,7 @@
      __rv.__i; \
    })
 
-#define vabdl_u8(__a, __b) \
+#define vabdl_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3627,7 +3627,7 @@
      __rv.__i; \
    })
 
-#define vabdl_u16(__a, __b) \
+#define vabdl_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3636,7 +3636,7 @@
      __rv.__i; \
    })
 
-#define vabdl_u32(__a, __b) \
+#define vabdl_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3645,7 +3645,7 @@
      __rv.__i; \
    })
 
-#define vaba_s8(__a, __b, __c) \
+#define vaba_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3655,7 +3655,7 @@
      __rv.__i; \
    })
 
-#define vaba_s16(__a, __b, __c) \
+#define vaba_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3665,7 +3665,7 @@
      __rv.__i; \
    })
 
-#define vaba_s32(__a, __b, __c) \
+#define vaba_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3675,7 +3675,7 @@
      __rv.__i; \
    })
 
-#define vaba_u8(__a, __b, __c) \
+#define vaba_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3685,7 +3685,7 @@
      __rv.__i; \
    })
 
-#define vaba_u16(__a, __b, __c) \
+#define vaba_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3695,7 +3695,7 @@
      __rv.__i; \
    })
 
-#define vaba_u32(__a, __b, __c) \
+#define vaba_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3705,7 +3705,7 @@
      __rv.__i; \
    })
 
-#define vabaq_s8(__a, __b, __c) \
+#define vabaq_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -3715,7 +3715,7 @@
      __rv.__i; \
    })
 
-#define vabaq_s16(__a, __b, __c) \
+#define vabaq_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -3725,7 +3725,7 @@
      __rv.__i; \
    })
 
-#define vabaq_s32(__a, __b, __c) \
+#define vabaq_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -3735,7 +3735,7 @@
      __rv.__i; \
    })
 
-#define vabaq_u8(__a, __b, __c) \
+#define vabaq_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -3745,7 +3745,7 @@
      __rv.__i; \
    })
 
-#define vabaq_u16(__a, __b, __c) \
+#define vabaq_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -3755,7 +3755,7 @@
      __rv.__i; \
    })
 
-#define vabaq_u32(__a, __b, __c) \
+#define vabaq_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -3765,7 +3765,7 @@
      __rv.__i; \
    })
 
-#define vabal_s8(__a, __b, __c) \
+#define vabal_s8(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3775,7 +3775,7 @@
      __rv.__i; \
    })
 
-#define vabal_s16(__a, __b, __c) \
+#define vabal_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3785,7 +3785,7 @@
      __rv.__i; \
    })
 
-#define vabal_s32(__a, __b, __c) \
+#define vabal_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3795,7 +3795,7 @@
      __rv.__i; \
    })
 
-#define vabal_u8(__a, __b, __c) \
+#define vabal_u8(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3805,7 +3805,7 @@
      __rv.__i; \
    })
 
-#define vabal_u16(__a, __b, __c) \
+#define vabal_u16(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3815,7 +3815,7 @@
      __rv.__i; \
    })
 
-#define vabal_u32(__a, __b, __c) \
+#define vabal_u32(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3825,7 +3825,7 @@
      __rv.__i; \
    })
 
-#define vmax_s8(__a, __b) \
+#define vmax_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3834,7 +3834,7 @@
      __rv.__i; \
    })
 
-#define vmax_s16(__a, __b) \
+#define vmax_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3843,7 +3843,7 @@
      __rv.__i; \
    })
 
-#define vmax_s32(__a, __b) \
+#define vmax_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3852,7 +3852,7 @@
      __rv.__i; \
    })
 
-#define vmax_f32(__a, __b) \
+#define vmax_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3861,7 +3861,7 @@
      __rv.__i; \
    })
 
-#define vmax_u8(__a, __b) \
+#define vmax_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3870,7 +3870,7 @@
      __rv.__i; \
    })
 
-#define vmax_u16(__a, __b) \
+#define vmax_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -3879,7 +3879,7 @@
      __rv.__i; \
    })
 
-#define vmax_u32(__a, __b) \
+#define vmax_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -3888,7 +3888,7 @@
      __rv.__i; \
    })
 
-#define vmaxq_s8(__a, __b) \
+#define vmaxq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -3897,7 +3897,7 @@
      __rv.__i; \
    })
 
-#define vmaxq_s16(__a, __b) \
+#define vmaxq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -3906,7 +3906,7 @@
      __rv.__i; \
    })
 
-#define vmaxq_s32(__a, __b) \
+#define vmaxq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -3915,7 +3915,7 @@
      __rv.__i; \
    })
 
-#define vmaxq_f32(__a, __b) \
+#define vmaxq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -3924,7 +3924,7 @@
      __rv.__i; \
    })
 
-#define vmaxq_u8(__a, __b) \
+#define vmaxq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -3933,7 +3933,7 @@
      __rv.__i; \
    })
 
-#define vmaxq_u16(__a, __b) \
+#define vmaxq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -3942,7 +3942,7 @@
      __rv.__i; \
    })
 
-#define vmaxq_u32(__a, __b) \
+#define vmaxq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -3951,7 +3951,7 @@
      __rv.__i; \
    })
 
-#define vmin_s8(__a, __b) \
+#define vmin_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -3960,7 +3960,7 @@
      __rv.__i; \
    })
 
-#define vmin_s16(__a, __b) \
+#define vmin_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -3969,7 +3969,7 @@
      __rv.__i; \
    })
 
-#define vmin_s32(__a, __b) \
+#define vmin_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -3978,7 +3978,7 @@
      __rv.__i; \
    })
 
-#define vmin_f32(__a, __b) \
+#define vmin_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -3987,7 +3987,7 @@
      __rv.__i; \
    })
 
-#define vmin_u8(__a, __b) \
+#define vmin_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -3996,7 +3996,7 @@
      __rv.__i; \
    })
 
-#define vmin_u16(__a, __b) \
+#define vmin_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -4005,7 +4005,7 @@
      __rv.__i; \
    })
 
-#define vmin_u32(__a, __b) \
+#define vmin_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -4014,7 +4014,7 @@
      __rv.__i; \
    })
 
-#define vminq_s8(__a, __b) \
+#define vminq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4023,7 +4023,7 @@
      __rv.__i; \
    })
 
-#define vminq_s16(__a, __b) \
+#define vminq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4032,7 +4032,7 @@
      __rv.__i; \
    })
 
-#define vminq_s32(__a, __b) \
+#define vminq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4041,7 +4041,7 @@
      __rv.__i; \
    })
 
-#define vminq_f32(__a, __b) \
+#define vminq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -4050,7 +4050,7 @@
      __rv.__i; \
    })
 
-#define vminq_u8(__a, __b) \
+#define vminq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -4059,7 +4059,7 @@
      __rv.__i; \
    })
 
-#define vminq_u16(__a, __b) \
+#define vminq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -4068,7 +4068,7 @@
      __rv.__i; \
    })
 
-#define vminq_u32(__a, __b) \
+#define vminq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -4077,7 +4077,7 @@
      __rv.__i; \
    })
 
-#define vpadd_s8(__a, __b) \
+#define vpadd_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4086,7 +4086,7 @@
      __rv.__i; \
    })
 
-#define vpadd_s16(__a, __b) \
+#define vpadd_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4095,7 +4095,7 @@
      __rv.__i; \
    })
 
-#define vpadd_s32(__a, __b) \
+#define vpadd_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4104,7 +4104,7 @@
      __rv.__i; \
    })
 
-#define vpadd_f32(__a, __b) \
+#define vpadd_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -4113,7 +4113,7 @@
      __rv.__i; \
    })
 
-#define vpadd_u8(__a, __b) \
+#define vpadd_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -4122,7 +4122,7 @@
      __rv.__i; \
    })
 
-#define vpadd_u16(__a, __b) \
+#define vpadd_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -4131,7 +4131,7 @@
      __rv.__i; \
    })
 
-#define vpadd_u32(__a, __b) \
+#define vpadd_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -4140,7 +4140,7 @@
      __rv.__i; \
    })
 
-#define vpaddl_s8(__a) \
+#define vpaddl_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -4148,7 +4148,7 @@
      __rv.__i; \
    })
 
-#define vpaddl_s16(__a) \
+#define vpaddl_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -4156,7 +4156,7 @@
      __rv.__i; \
    })
 
-#define vpaddl_s32(__a) \
+#define vpaddl_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -4164,7 +4164,7 @@
      __rv.__i; \
    })
 
-#define vpaddl_u8(__a) \
+#define vpaddl_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -4172,7 +4172,7 @@
      __rv.__i; \
    })
 
-#define vpaddl_u16(__a) \
+#define vpaddl_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -4180,7 +4180,7 @@
      __rv.__i; \
    })
 
-#define vpaddl_u32(__a) \
+#define vpaddl_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -4188,7 +4188,7 @@
      __rv.__i; \
    })
 
-#define vpaddlq_s8(__a) \
+#define vpaddlq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -4196,7 +4196,7 @@
      __rv.__i; \
    })
 
-#define vpaddlq_s16(__a) \
+#define vpaddlq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -4204,7 +4204,7 @@
      __rv.__i; \
    })
 
-#define vpaddlq_s32(__a) \
+#define vpaddlq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -4212,7 +4212,7 @@
      __rv.__i; \
    })
 
-#define vpaddlq_u8(__a) \
+#define vpaddlq_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -4220,7 +4220,7 @@
      __rv.__i; \
    })
 
-#define vpaddlq_u16(__a) \
+#define vpaddlq_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -4228,7 +4228,7 @@
      __rv.__i; \
    })
 
-#define vpaddlq_u32(__a) \
+#define vpaddlq_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -4236,7 +4236,7 @@
      __rv.__i; \
    })
 
-#define vpadal_s8(__a, __b) \
+#define vpadal_s8(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4245,7 +4245,7 @@
      __rv.__i; \
    })
 
-#define vpadal_s16(__a, __b) \
+#define vpadal_s16(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4254,7 +4254,7 @@
      __rv.__i; \
    })
 
-#define vpadal_s32(__a, __b) \
+#define vpadal_s32(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4263,7 +4263,7 @@
      __rv.__i; \
    })
 
-#define vpadal_u8(__a, __b) \
+#define vpadal_u8(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -4272,7 +4272,7 @@
      __rv.__i; \
    })
 
-#define vpadal_u16(__a, __b) \
+#define vpadal_u16(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -4281,7 +4281,7 @@
      __rv.__i; \
    })
 
-#define vpadal_u32(__a, __b) \
+#define vpadal_u32(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -4290,7 +4290,7 @@
      __rv.__i; \
    })
 
-#define vpadalq_s8(__a, __b) \
+#define vpadalq_s8(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4299,7 +4299,7 @@
      __rv.__i; \
    })
 
-#define vpadalq_s16(__a, __b) \
+#define vpadalq_s16(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4308,7 +4308,7 @@
      __rv.__i; \
    })
 
-#define vpadalq_s32(__a, __b) \
+#define vpadalq_s32(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4317,7 +4317,7 @@
      __rv.__i; \
    })
 
-#define vpadalq_u8(__a, __b) \
+#define vpadalq_u8(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -4326,7 +4326,7 @@
      __rv.__i; \
    })
 
-#define vpadalq_u16(__a, __b) \
+#define vpadalq_u16(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -4335,7 +4335,7 @@
      __rv.__i; \
    })
 
-#define vpadalq_u32(__a, __b) \
+#define vpadalq_u32(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -4344,7 +4344,7 @@
      __rv.__i; \
    })
 
-#define vpmax_s8(__a, __b) \
+#define vpmax_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4353,7 +4353,7 @@
      __rv.__i; \
    })
 
-#define vpmax_s16(__a, __b) \
+#define vpmax_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4362,7 +4362,7 @@
      __rv.__i; \
    })
 
-#define vpmax_s32(__a, __b) \
+#define vpmax_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4371,7 +4371,7 @@
      __rv.__i; \
    })
 
-#define vpmax_f32(__a, __b) \
+#define vpmax_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -4380,7 +4380,7 @@
      __rv.__i; \
    })
 
-#define vpmax_u8(__a, __b) \
+#define vpmax_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -4389,7 +4389,7 @@
      __rv.__i; \
    })
 
-#define vpmax_u16(__a, __b) \
+#define vpmax_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -4398,7 +4398,7 @@
      __rv.__i; \
    })
 
-#define vpmax_u32(__a, __b) \
+#define vpmax_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -4407,7 +4407,7 @@
      __rv.__i; \
    })
 
-#define vpmin_s8(__a, __b) \
+#define vpmin_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4416,7 +4416,7 @@
      __rv.__i; \
    })
 
-#define vpmin_s16(__a, __b) \
+#define vpmin_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4425,7 +4425,7 @@
      __rv.__i; \
    })
 
-#define vpmin_s32(__a, __b) \
+#define vpmin_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4434,7 +4434,7 @@
      __rv.__i; \
    })
 
-#define vpmin_f32(__a, __b) \
+#define vpmin_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -4443,7 +4443,7 @@
      __rv.__i; \
    })
 
-#define vpmin_u8(__a, __b) \
+#define vpmin_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -4452,7 +4452,7 @@
      __rv.__i; \
    })
 
-#define vpmin_u16(__a, __b) \
+#define vpmin_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -4461,7 +4461,7 @@
      __rv.__i; \
    })
 
-#define vpmin_u32(__a, __b) \
+#define vpmin_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -4470,7 +4470,7 @@
      __rv.__i; \
    })
 
-#define vrecps_f32(__a, __b) \
+#define vrecps_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -4479,7 +4479,7 @@
      __rv.__i; \
    })
 
-#define vrecpsq_f32(__a, __b) \
+#define vrecpsq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -4488,7 +4488,7 @@
      __rv.__i; \
    })
 
-#define vrsqrts_f32(__a, __b) \
+#define vrsqrts_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -4497,7 +4497,7 @@
      __rv.__i; \
    })
 
-#define vrsqrtsq_f32(__a, __b) \
+#define vrsqrtsq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -4506,7 +4506,7 @@
      __rv.__i; \
    })
 
-#define vshl_s8(__a, __b) \
+#define vshl_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4515,7 +4515,7 @@
      __rv.__i; \
    })
 
-#define vshl_s16(__a, __b) \
+#define vshl_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4524,7 +4524,7 @@
      __rv.__i; \
    })
 
-#define vshl_s32(__a, __b) \
+#define vshl_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4533,7 +4533,7 @@
      __rv.__i; \
    })
 
-#define vshl_s64(__a, __b) \
+#define vshl_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -4542,7 +4542,7 @@
      __rv.__i; \
    })
 
-#define vshl_u8(__a, __b) \
+#define vshl_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4551,7 +4551,7 @@
      __rv.__i; \
    })
 
-#define vshl_u16(__a, __b) \
+#define vshl_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4560,7 +4560,7 @@
      __rv.__i; \
    })
 
-#define vshl_u32(__a, __b) \
+#define vshl_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4569,7 +4569,7 @@
      __rv.__i; \
    })
 
-#define vshl_u64(__a, __b) \
+#define vshl_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -4578,7 +4578,7 @@
      __rv.__i; \
    })
 
-#define vshlq_s8(__a, __b) \
+#define vshlq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4587,7 +4587,7 @@
      __rv.__i; \
    })
 
-#define vshlq_s16(__a, __b) \
+#define vshlq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4596,7 +4596,7 @@
      __rv.__i; \
    })
 
-#define vshlq_s32(__a, __b) \
+#define vshlq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4605,7 +4605,7 @@
      __rv.__i; \
    })
 
-#define vshlq_s64(__a, __b) \
+#define vshlq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -4614,7 +4614,7 @@
      __rv.__i; \
    })
 
-#define vshlq_u8(__a, __b) \
+#define vshlq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4623,7 +4623,7 @@
      __rv.__i; \
    })
 
-#define vshlq_u16(__a, __b) \
+#define vshlq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4632,7 +4632,7 @@
      __rv.__i; \
    })
 
-#define vshlq_u32(__a, __b) \
+#define vshlq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4641,7 +4641,7 @@
      __rv.__i; \
    })
 
-#define vshlq_u64(__a, __b) \
+#define vshlq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -4650,7 +4650,7 @@
      __rv.__i; \
    })
 
-#define vrshl_s8(__a, __b) \
+#define vrshl_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4659,7 +4659,7 @@
      __rv.__i; \
    })
 
-#define vrshl_s16(__a, __b) \
+#define vrshl_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4668,7 +4668,7 @@
      __rv.__i; \
    })
 
-#define vrshl_s32(__a, __b) \
+#define vrshl_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4677,7 +4677,7 @@
      __rv.__i; \
    })
 
-#define vrshl_s64(__a, __b) \
+#define vrshl_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -4686,7 +4686,7 @@
      __rv.__i; \
    })
 
-#define vrshl_u8(__a, __b) \
+#define vrshl_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4695,7 +4695,7 @@
      __rv.__i; \
    })
 
-#define vrshl_u16(__a, __b) \
+#define vrshl_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4704,7 +4704,7 @@
      __rv.__i; \
    })
 
-#define vrshl_u32(__a, __b) \
+#define vrshl_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4713,7 +4713,7 @@
      __rv.__i; \
    })
 
-#define vrshl_u64(__a, __b) \
+#define vrshl_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -4722,7 +4722,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_s8(__a, __b) \
+#define vrshlq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4731,7 +4731,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_s16(__a, __b) \
+#define vrshlq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4740,7 +4740,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_s32(__a, __b) \
+#define vrshlq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4749,7 +4749,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_s64(__a, __b) \
+#define vrshlq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -4758,7 +4758,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_u8(__a, __b) \
+#define vrshlq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4767,7 +4767,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_u16(__a, __b) \
+#define vrshlq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4776,7 +4776,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_u32(__a, __b) \
+#define vrshlq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4785,7 +4785,7 @@
      __rv.__i; \
    })
 
-#define vrshlq_u64(__a, __b) \
+#define vrshlq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -4794,7 +4794,7 @@
      __rv.__i; \
    })
 
-#define vqshl_s8(__a, __b) \
+#define vqshl_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4803,7 +4803,7 @@
      __rv.__i; \
    })
 
-#define vqshl_s16(__a, __b) \
+#define vqshl_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4812,7 +4812,7 @@
      __rv.__i; \
    })
 
-#define vqshl_s32(__a, __b) \
+#define vqshl_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4821,7 +4821,7 @@
      __rv.__i; \
    })
 
-#define vqshl_s64(__a, __b) \
+#define vqshl_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -4830,7 +4830,7 @@
      __rv.__i; \
    })
 
-#define vqshl_u8(__a, __b) \
+#define vqshl_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4839,7 +4839,7 @@
      __rv.__i; \
    })
 
-#define vqshl_u16(__a, __b) \
+#define vqshl_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4848,7 +4848,7 @@
      __rv.__i; \
    })
 
-#define vqshl_u32(__a, __b) \
+#define vqshl_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4857,7 +4857,7 @@
      __rv.__i; \
    })
 
-#define vqshl_u64(__a, __b) \
+#define vqshl_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -4866,7 +4866,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_s8(__a, __b) \
+#define vqshlq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4875,7 +4875,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_s16(__a, __b) \
+#define vqshlq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4884,7 +4884,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_s32(__a, __b) \
+#define vqshlq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4893,7 +4893,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_s64(__a, __b) \
+#define vqshlq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -4902,7 +4902,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_u8(__a, __b) \
+#define vqshlq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -4911,7 +4911,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_u16(__a, __b) \
+#define vqshlq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -4920,7 +4920,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_u32(__a, __b) \
+#define vqshlq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -4929,7 +4929,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_u64(__a, __b) \
+#define vqshlq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -4938,7 +4938,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_s8(__a, __b) \
+#define vqrshl_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4947,7 +4947,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_s16(__a, __b) \
+#define vqrshl_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4956,7 +4956,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_s32(__a, __b) \
+#define vqrshl_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -4965,7 +4965,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_s64(__a, __b) \
+#define vqrshl_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -4974,7 +4974,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_u8(__a, __b) \
+#define vqrshl_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -4983,7 +4983,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_u16(__a, __b) \
+#define vqrshl_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -4992,7 +4992,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_u32(__a, __b) \
+#define vqrshl_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -5001,7 +5001,7 @@
      __rv.__i; \
    })
 
-#define vqrshl_u64(__a, __b) \
+#define vqrshl_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -5010,7 +5010,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_s8(__a, __b) \
+#define vqrshlq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -5019,7 +5019,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_s16(__a, __b) \
+#define vqrshlq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -5028,7 +5028,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_s32(__a, __b) \
+#define vqrshlq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -5037,7 +5037,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_s64(__a, __b) \
+#define vqrshlq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -5046,7 +5046,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_u8(__a, __b) \
+#define vqrshlq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -5055,7 +5055,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_u16(__a, __b) \
+#define vqrshlq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -5064,7 +5064,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_u32(__a, __b) \
+#define vqrshlq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -5073,7 +5073,7 @@
      __rv.__i; \
    })
 
-#define vqrshlq_u64(__a, __b) \
+#define vqrshlq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -5082,7 +5082,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_s8(__a, __b) \
+#define vshr_n_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5090,7 +5090,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_s16(__a, __b) \
+#define vshr_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5098,7 +5098,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_s32(__a, __b) \
+#define vshr_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5106,7 +5106,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_s64(__a, __b) \
+#define vshr_n_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5114,7 +5114,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_u8(__a, __b) \
+#define vshr_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5122,7 +5122,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_u16(__a, __b) \
+#define vshr_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5130,7 +5130,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_u32(__a, __b) \
+#define vshr_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5138,7 +5138,7 @@
      __rv.__i; \
    })
 
-#define vshr_n_u64(__a, __b) \
+#define vshr_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5146,7 +5146,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_s8(__a, __b) \
+#define vshrq_n_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5154,7 +5154,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_s16(__a, __b) \
+#define vshrq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5162,7 +5162,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_s32(__a, __b) \
+#define vshrq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5170,7 +5170,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_s64(__a, __b) \
+#define vshrq_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5178,7 +5178,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_u8(__a, __b) \
+#define vshrq_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5186,7 +5186,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_u16(__a, __b) \
+#define vshrq_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5194,7 +5194,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_u32(__a, __b) \
+#define vshrq_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5202,7 +5202,7 @@
      __rv.__i; \
    })
 
-#define vshrq_n_u64(__a, __b) \
+#define vshrq_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5210,7 +5210,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_s8(__a, __b) \
+#define vrshr_n_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5218,7 +5218,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_s16(__a, __b) \
+#define vrshr_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5226,7 +5226,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_s32(__a, __b) \
+#define vrshr_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5234,7 +5234,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_s64(__a, __b) \
+#define vrshr_n_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5242,7 +5242,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_u8(__a, __b) \
+#define vrshr_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5250,7 +5250,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_u16(__a, __b) \
+#define vrshr_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5258,7 +5258,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_u32(__a, __b) \
+#define vrshr_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5266,7 +5266,7 @@
      __rv.__i; \
    })
 
-#define vrshr_n_u64(__a, __b) \
+#define vrshr_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5274,7 +5274,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_s8(__a, __b) \
+#define vrshrq_n_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5282,7 +5282,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_s16(__a, __b) \
+#define vrshrq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5290,7 +5290,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_s32(__a, __b) \
+#define vrshrq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5298,7 +5298,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_s64(__a, __b) \
+#define vrshrq_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5306,7 +5306,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_u8(__a, __b) \
+#define vrshrq_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5314,7 +5314,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_u16(__a, __b) \
+#define vrshrq_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5322,7 +5322,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_u32(__a, __b) \
+#define vrshrq_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5330,7 +5330,7 @@
      __rv.__i; \
    })
 
-#define vrshrq_n_u64(__a, __b) \
+#define vrshrq_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5338,7 +5338,7 @@
      __rv.__i; \
    })
 
-#define vshrn_n_s16(__a, __b) \
+#define vshrn_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5346,7 +5346,7 @@
      __rv.__i; \
    })
 
-#define vshrn_n_s32(__a, __b) \
+#define vshrn_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5354,7 +5354,7 @@
      __rv.__i; \
    })
 
-#define vshrn_n_s64(__a, __b) \
+#define vshrn_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5362,7 +5362,7 @@
      __rv.__i; \
    })
 
-#define vshrn_n_u16(__a, __b) \
+#define vshrn_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5370,7 +5370,7 @@
      __rv.__i; \
    })
 
-#define vshrn_n_u32(__a, __b) \
+#define vshrn_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5378,7 +5378,7 @@
      __rv.__i; \
    })
 
-#define vshrn_n_u64(__a, __b) \
+#define vshrn_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5386,7 +5386,7 @@
      __rv.__i; \
    })
 
-#define vrshrn_n_s16(__a, __b) \
+#define vrshrn_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5394,7 +5394,7 @@
      __rv.__i; \
    })
 
-#define vrshrn_n_s32(__a, __b) \
+#define vrshrn_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5402,7 +5402,7 @@
      __rv.__i; \
    })
 
-#define vrshrn_n_s64(__a, __b) \
+#define vrshrn_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5410,7 +5410,7 @@
      __rv.__i; \
    })
 
-#define vrshrn_n_u16(__a, __b) \
+#define vrshrn_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5418,7 +5418,7 @@
      __rv.__i; \
    })
 
-#define vrshrn_n_u32(__a, __b) \
+#define vrshrn_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5426,7 +5426,7 @@
      __rv.__i; \
    })
 
-#define vrshrn_n_u64(__a, __b) \
+#define vrshrn_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5434,7 +5434,7 @@
      __rv.__i; \
    })
 
-#define vqshrn_n_s16(__a, __b) \
+#define vqshrn_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5442,7 +5442,7 @@
      __rv.__i; \
    })
 
-#define vqshrn_n_s32(__a, __b) \
+#define vqshrn_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5450,7 +5450,7 @@
      __rv.__i; \
    })
 
-#define vqshrn_n_s64(__a, __b) \
+#define vqshrn_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5458,7 +5458,7 @@
      __rv.__i; \
    })
 
-#define vqshrn_n_u16(__a, __b) \
+#define vqshrn_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5466,7 +5466,7 @@
      __rv.__i; \
    })
 
-#define vqshrn_n_u32(__a, __b) \
+#define vqshrn_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5474,7 +5474,7 @@
      __rv.__i; \
    })
 
-#define vqshrn_n_u64(__a, __b) \
+#define vqshrn_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5482,7 +5482,7 @@
      __rv.__i; \
    })
 
-#define vqrshrn_n_s16(__a, __b) \
+#define vqrshrn_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5490,7 +5490,7 @@
      __rv.__i; \
    })
 
-#define vqrshrn_n_s32(__a, __b) \
+#define vqrshrn_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5498,7 +5498,7 @@
      __rv.__i; \
    })
 
-#define vqrshrn_n_s64(__a, __b) \
+#define vqrshrn_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5506,7 +5506,7 @@
      __rv.__i; \
    })
 
-#define vqrshrn_n_u16(__a, __b) \
+#define vqrshrn_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5514,7 +5514,7 @@
      __rv.__i; \
    })
 
-#define vqrshrn_n_u32(__a, __b) \
+#define vqrshrn_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5522,7 +5522,7 @@
      __rv.__i; \
    })
 
-#define vqrshrn_n_u64(__a, __b) \
+#define vqrshrn_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5530,7 +5530,7 @@
      __rv.__i; \
    })
 
-#define vqshrun_n_s16(__a, __b) \
+#define vqshrun_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5538,7 +5538,7 @@
      __rv.__i; \
    })
 
-#define vqshrun_n_s32(__a, __b) \
+#define vqshrun_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5546,7 +5546,7 @@
      __rv.__i; \
    })
 
-#define vqshrun_n_s64(__a, __b) \
+#define vqshrun_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5554,7 +5554,7 @@
      __rv.__i; \
    })
 
-#define vqrshrun_n_s16(__a, __b) \
+#define vqrshrun_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5562,7 +5562,7 @@
      __rv.__i; \
    })
 
-#define vqrshrun_n_s32(__a, __b) \
+#define vqrshrun_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5570,7 +5570,7 @@
      __rv.__i; \
    })
 
-#define vqrshrun_n_s64(__a, __b) \
+#define vqrshrun_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5578,7 +5578,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_s8(__a, __b) \
+#define vshl_n_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5586,7 +5586,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_s16(__a, __b) \
+#define vshl_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5594,7 +5594,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_s32(__a, __b) \
+#define vshl_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5602,7 +5602,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_s64(__a, __b) \
+#define vshl_n_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5610,7 +5610,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_u8(__a, __b) \
+#define vshl_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5618,7 +5618,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_u16(__a, __b) \
+#define vshl_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5626,7 +5626,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_u32(__a, __b) \
+#define vshl_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5634,7 +5634,7 @@
      __rv.__i; \
    })
 
-#define vshl_n_u64(__a, __b) \
+#define vshl_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5642,7 +5642,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_s8(__a, __b) \
+#define vshlq_n_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5650,7 +5650,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_s16(__a, __b) \
+#define vshlq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5658,7 +5658,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_s32(__a, __b) \
+#define vshlq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5666,7 +5666,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_s64(__a, __b) \
+#define vshlq_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5674,7 +5674,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_u8(__a, __b) \
+#define vshlq_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5682,7 +5682,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_u16(__a, __b) \
+#define vshlq_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5690,7 +5690,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_u32(__a, __b) \
+#define vshlq_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5698,7 +5698,7 @@
      __rv.__i; \
    })
 
-#define vshlq_n_u64(__a, __b) \
+#define vshlq_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5706,7 +5706,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_s8(__a, __b) \
+#define vqshl_n_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5714,7 +5714,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_s16(__a, __b) \
+#define vqshl_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5722,7 +5722,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_s32(__a, __b) \
+#define vqshl_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5730,7 +5730,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_s64(__a, __b) \
+#define vqshl_n_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5738,7 +5738,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_u8(__a, __b) \
+#define vqshl_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5746,7 +5746,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_u16(__a, __b) \
+#define vqshl_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5754,7 +5754,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_u32(__a, __b) \
+#define vqshl_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5762,7 +5762,7 @@
      __rv.__i; \
    })
 
-#define vqshl_n_u64(__a, __b) \
+#define vqshl_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5770,7 +5770,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_s8(__a, __b) \
+#define vqshlq_n_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5778,7 +5778,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_s16(__a, __b) \
+#define vqshlq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5786,7 +5786,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_s32(__a, __b) \
+#define vqshlq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5794,7 +5794,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_s64(__a, __b) \
+#define vqshlq_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5802,7 +5802,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_u8(__a, __b) \
+#define vqshlq_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5810,7 +5810,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_u16(__a, __b) \
+#define vqshlq_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5818,7 +5818,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_u32(__a, __b) \
+#define vqshlq_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5826,7 +5826,7 @@
      __rv.__i; \
    })
 
-#define vqshlq_n_u64(__a, __b) \
+#define vqshlq_n_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5834,7 +5834,7 @@
      __rv.__i; \
    })
 
-#define vqshlu_n_s8(__a, __b) \
+#define vqshlu_n_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -5842,7 +5842,7 @@
      __rv.__i; \
    })
 
-#define vqshlu_n_s16(__a, __b) \
+#define vqshlu_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -5850,7 +5850,7 @@
      __rv.__i; \
    })
 
-#define vqshlu_n_s32(__a, __b) \
+#define vqshlu_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -5858,7 +5858,7 @@
      __rv.__i; \
    })
 
-#define vqshlu_n_s64(__a, __b) \
+#define vqshlu_n_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -5866,7 +5866,7 @@
      __rv.__i; \
    })
 
-#define vqshluq_n_s8(__a, __b) \
+#define vqshluq_n_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -5874,7 +5874,7 @@
      __rv.__i; \
    })
 
-#define vqshluq_n_s16(__a, __b) \
+#define vqshluq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5882,7 +5882,7 @@
      __rv.__i; \
    })
 
-#define vqshluq_n_s32(__a, __b) \
+#define vqshluq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5890,7 +5890,7 @@
      __rv.__i; \
    })
 
-#define vqshluq_n_s64(__a, __b) \
+#define vqshluq_n_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5898,7 +5898,7 @@
      __rv.__i; \
    })
 
-#define vshll_n_s8(__a, __b) \
+#define vshll_n_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5906,7 +5906,7 @@
      __rv.__i; \
    })
 
-#define vshll_n_s16(__a, __b) \
+#define vshll_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5914,7 +5914,7 @@
      __rv.__i; \
    })
 
-#define vshll_n_s32(__a, __b) \
+#define vshll_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5922,7 +5922,7 @@
      __rv.__i; \
    })
 
-#define vshll_n_u8(__a, __b) \
+#define vshll_n_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -5930,7 +5930,7 @@
      __rv.__i; \
    })
 
-#define vshll_n_u16(__a, __b) \
+#define vshll_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -5938,7 +5938,7 @@
      __rv.__i; \
    })
 
-#define vshll_n_u32(__a, __b) \
+#define vshll_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -5946,7 +5946,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_s8(__a, __b, __c) \
+#define vsra_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -5955,7 +5955,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_s16(__a, __b, __c) \
+#define vsra_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -5964,7 +5964,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_s32(__a, __b, __c) \
+#define vsra_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -5973,7 +5973,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_s64(__a, __b, __c) \
+#define vsra_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -5982,7 +5982,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_u8(__a, __b, __c) \
+#define vsra_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -5991,7 +5991,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_u16(__a, __b, __c) \
+#define vsra_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -6000,7 +6000,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_u32(__a, __b, __c) \
+#define vsra_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -6009,7 +6009,7 @@
      __rv.__i; \
    })
 
-#define vsra_n_u64(__a, __b, __c) \
+#define vsra_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -6018,7 +6018,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_s8(__a, __b, __c) \
+#define vsraq_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -6027,7 +6027,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_s16(__a, __b, __c) \
+#define vsraq_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -6036,7 +6036,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_s32(__a, __b, __c) \
+#define vsraq_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -6045,7 +6045,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_s64(__a, __b, __c) \
+#define vsraq_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -6054,7 +6054,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_u8(__a, __b, __c) \
+#define vsraq_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -6063,7 +6063,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_u16(__a, __b, __c) \
+#define vsraq_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -6072,7 +6072,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_u32(__a, __b, __c) \
+#define vsraq_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -6081,7 +6081,7 @@
      __rv.__i; \
    })
 
-#define vsraq_n_u64(__a, __b, __c) \
+#define vsraq_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -6090,7 +6090,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_s8(__a, __b, __c) \
+#define vrsra_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -6099,7 +6099,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_s16(__a, __b, __c) \
+#define vrsra_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -6108,7 +6108,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_s32(__a, __b, __c) \
+#define vrsra_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -6117,7 +6117,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_s64(__a, __b, __c) \
+#define vrsra_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -6126,7 +6126,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_u8(__a, __b, __c) \
+#define vrsra_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -6135,7 +6135,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_u16(__a, __b, __c) \
+#define vrsra_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -6144,7 +6144,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_u32(__a, __b, __c) \
+#define vrsra_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -6153,7 +6153,7 @@
      __rv.__i; \
    })
 
-#define vrsra_n_u64(__a, __b, __c) \
+#define vrsra_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -6162,7 +6162,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_s8(__a, __b, __c) \
+#define vrsraq_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -6171,7 +6171,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_s16(__a, __b, __c) \
+#define vrsraq_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -6180,7 +6180,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_s32(__a, __b, __c) \
+#define vrsraq_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -6189,7 +6189,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_s64(__a, __b, __c) \
+#define vrsraq_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -6198,7 +6198,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_u8(__a, __b, __c) \
+#define vrsraq_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -6207,7 +6207,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_u16(__a, __b, __c) \
+#define vrsraq_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -6216,7 +6216,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_u32(__a, __b, __c) \
+#define vrsraq_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -6225,7 +6225,7 @@
      __rv.__i; \
    })
 
-#define vrsraq_n_u64(__a, __b, __c) \
+#define vrsraq_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -6234,7 +6234,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_s8(__a, __b, __c) \
+#define vsri_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -6243,7 +6243,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_s16(__a, __b, __c) \
+#define vsri_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -6252,7 +6252,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_s32(__a, __b, __c) \
+#define vsri_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -6261,7 +6261,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_s64(__a, __b, __c) \
+#define vsri_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -6270,7 +6270,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_u8(__a, __b, __c) \
+#define vsri_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -6279,7 +6279,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_u16(__a, __b, __c) \
+#define vsri_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -6288,7 +6288,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_u32(__a, __b, __c) \
+#define vsri_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -6297,7 +6297,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_u64(__a, __b, __c) \
+#define vsri_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -6306,7 +6306,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_p8(__a, __b, __c) \
+#define vsri_n_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -6315,7 +6315,7 @@
      __rv.__i; \
    })
 
-#define vsri_n_p16(__a, __b, __c) \
+#define vsri_n_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -6324,7 +6324,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_s8(__a, __b, __c) \
+#define vsriq_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -6333,7 +6333,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_s16(__a, __b, __c) \
+#define vsriq_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -6342,7 +6342,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_s32(__a, __b, __c) \
+#define vsriq_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -6351,7 +6351,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_s64(__a, __b, __c) \
+#define vsriq_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -6360,7 +6360,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_u8(__a, __b, __c) \
+#define vsriq_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -6369,7 +6369,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_u16(__a, __b, __c) \
+#define vsriq_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -6378,7 +6378,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_u32(__a, __b, __c) \
+#define vsriq_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -6387,7 +6387,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_u64(__a, __b, __c) \
+#define vsriq_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -6396,7 +6396,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_p8(__a, __b, __c) \
+#define vsriq_n_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -6405,7 +6405,7 @@
      __rv.__i; \
    })
 
-#define vsriq_n_p16(__a, __b, __c) \
+#define vsriq_n_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -6414,7 +6414,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_s8(__a, __b, __c) \
+#define vsli_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -6423,7 +6423,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_s16(__a, __b, __c) \
+#define vsli_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -6432,7 +6432,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_s32(__a, __b, __c) \
+#define vsli_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -6441,7 +6441,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_s64(__a, __b, __c) \
+#define vsli_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -6450,7 +6450,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_u8(__a, __b, __c) \
+#define vsli_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -6459,7 +6459,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_u16(__a, __b, __c) \
+#define vsli_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -6468,7 +6468,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_u32(__a, __b, __c) \
+#define vsli_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -6477,7 +6477,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_u64(__a, __b, __c) \
+#define vsli_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -6486,7 +6486,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_p8(__a, __b, __c) \
+#define vsli_n_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -6495,7 +6495,7 @@
      __rv.__i; \
    })
 
-#define vsli_n_p16(__a, __b, __c) \
+#define vsli_n_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -6504,7 +6504,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_s8(__a, __b, __c) \
+#define vsliq_n_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -6513,7 +6513,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_s16(__a, __b, __c) \
+#define vsliq_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -6522,7 +6522,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_s32(__a, __b, __c) \
+#define vsliq_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -6531,7 +6531,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_s64(__a, __b, __c) \
+#define vsliq_n_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -6540,7 +6540,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_u8(__a, __b, __c) \
+#define vsliq_n_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -6549,7 +6549,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_u16(__a, __b, __c) \
+#define vsliq_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -6558,7 +6558,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_u32(__a, __b, __c) \
+#define vsliq_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -6567,7 +6567,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_u64(__a, __b, __c) \
+#define vsliq_n_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -6576,7 +6576,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_p8(__a, __b, __c) \
+#define vsliq_n_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -6585,7 +6585,7 @@
      __rv.__i; \
    })
 
-#define vsliq_n_p16(__a, __b, __c) \
+#define vsliq_n_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -6594,7 +6594,7 @@
      __rv.__i; \
    })
 
-#define vabs_s8(__a) \
+#define vabs_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6602,7 +6602,7 @@
      __rv.__i; \
    })
 
-#define vabs_s16(__a) \
+#define vabs_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6610,7 +6610,7 @@
      __rv.__i; \
    })
 
-#define vabs_s32(__a) \
+#define vabs_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -6618,7 +6618,7 @@
      __rv.__i; \
    })
 
-#define vabs_f32(__a) \
+#define vabs_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -6626,7 +6626,7 @@
      __rv.__i; \
    })
 
-#define vabsq_s8(__a) \
+#define vabsq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6634,7 +6634,7 @@
      __rv.__i; \
    })
 
-#define vabsq_s16(__a) \
+#define vabsq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -6642,7 +6642,7 @@
      __rv.__i; \
    })
 
-#define vabsq_s32(__a) \
+#define vabsq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -6650,7 +6650,7 @@
      __rv.__i; \
    })
 
-#define vabsq_f32(__a) \
+#define vabsq_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -6658,7 +6658,7 @@
      __rv.__i; \
    })
 
-#define vqabs_s8(__a) \
+#define vqabs_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6666,7 +6666,7 @@
      __rv.__i; \
    })
 
-#define vqabs_s16(__a) \
+#define vqabs_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6674,7 +6674,7 @@
      __rv.__i; \
    })
 
-#define vqabs_s32(__a) \
+#define vqabs_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -6682,7 +6682,7 @@
      __rv.__i; \
    })
 
-#define vqabsq_s8(__a) \
+#define vqabsq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6690,7 +6690,7 @@
      __rv.__i; \
    })
 
-#define vqabsq_s16(__a) \
+#define vqabsq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -6698,7 +6698,7 @@
      __rv.__i; \
    })
 
-#define vqabsq_s32(__a) \
+#define vqabsq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -6706,7 +6706,7 @@
      __rv.__i; \
    })
 
-#define vneg_s8(__a) \
+#define vneg_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6714,7 +6714,7 @@
      __rv.__i; \
    })
 
-#define vneg_s16(__a) \
+#define vneg_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6722,7 +6722,7 @@
      __rv.__i; \
    })
 
-#define vneg_s32(__a) \
+#define vneg_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -6730,7 +6730,7 @@
      __rv.__i; \
    })
 
-#define vneg_f32(__a) \
+#define vneg_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -6738,7 +6738,7 @@
      __rv.__i; \
    })
 
-#define vnegq_s8(__a) \
+#define vnegq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6746,7 +6746,7 @@
      __rv.__i; \
    })
 
-#define vnegq_s16(__a) \
+#define vnegq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -6754,7 +6754,7 @@
      __rv.__i; \
    })
 
-#define vnegq_s32(__a) \
+#define vnegq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -6762,7 +6762,7 @@
      __rv.__i; \
    })
 
-#define vnegq_f32(__a) \
+#define vnegq_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -6770,7 +6770,7 @@
      __rv.__i; \
    })
 
-#define vqneg_s8(__a) \
+#define vqneg_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6778,7 +6778,7 @@
      __rv.__i; \
    })
 
-#define vqneg_s16(__a) \
+#define vqneg_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6786,7 +6786,7 @@
      __rv.__i; \
    })
 
-#define vqneg_s32(__a) \
+#define vqneg_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -6794,7 +6794,7 @@
      __rv.__i; \
    })
 
-#define vqnegq_s8(__a) \
+#define vqnegq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6802,7 +6802,7 @@
      __rv.__i; \
    })
 
-#define vqnegq_s16(__a) \
+#define vqnegq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -6810,7 +6810,7 @@
      __rv.__i; \
    })
 
-#define vqnegq_s32(__a) \
+#define vqnegq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -6818,7 +6818,7 @@
      __rv.__i; \
    })
 
-#define vmvn_s8(__a) \
+#define vmvn_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6826,7 +6826,7 @@
      __rv.__i; \
    })
 
-#define vmvn_s16(__a) \
+#define vmvn_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6834,7 +6834,7 @@
      __rv.__i; \
    })
 
-#define vmvn_s32(__a) \
+#define vmvn_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -6842,7 +6842,7 @@
      __rv.__i; \
    })
 
-#define vmvn_u8(__a) \
+#define vmvn_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6850,7 +6850,7 @@
      __rv.__i; \
    })
 
-#define vmvn_u16(__a) \
+#define vmvn_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6858,7 +6858,7 @@
      __rv.__i; \
    })
 
-#define vmvn_u32(__a) \
+#define vmvn_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -6866,7 +6866,7 @@
      __rv.__i; \
    })
 
-#define vmvn_p8(__a) \
+#define vmvn_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6874,7 +6874,7 @@
      __rv.__i; \
    })
 
-#define vmvnq_s8(__a) \
+#define vmvnq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6882,7 +6882,7 @@
      __rv.__i; \
    })
 
-#define vmvnq_s16(__a) \
+#define vmvnq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -6890,7 +6890,7 @@
      __rv.__i; \
    })
 
-#define vmvnq_s32(__a) \
+#define vmvnq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -6898,7 +6898,7 @@
      __rv.__i; \
    })
 
-#define vmvnq_u8(__a) \
+#define vmvnq_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6906,7 +6906,7 @@
      __rv.__i; \
    })
 
-#define vmvnq_u16(__a) \
+#define vmvnq_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -6914,7 +6914,7 @@
      __rv.__i; \
    })
 
-#define vmvnq_u32(__a) \
+#define vmvnq_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -6922,7 +6922,7 @@
      __rv.__i; \
    })
 
-#define vmvnq_p8(__a) \
+#define vmvnq_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6930,7 +6930,7 @@
      __rv.__i; \
    })
 
-#define vcls_s8(__a) \
+#define vcls_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6938,7 +6938,7 @@
      __rv.__i; \
    })
 
-#define vcls_s16(__a) \
+#define vcls_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6946,7 +6946,7 @@
      __rv.__i; \
    })
 
-#define vcls_s32(__a) \
+#define vcls_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -6954,7 +6954,7 @@
      __rv.__i; \
    })
 
-#define vclsq_s8(__a) \
+#define vclsq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -6962,7 +6962,7 @@
      __rv.__i; \
    })
 
-#define vclsq_s16(__a) \
+#define vclsq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -6970,7 +6970,7 @@
      __rv.__i; \
    })
 
-#define vclsq_s32(__a) \
+#define vclsq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -6978,7 +6978,7 @@
      __rv.__i; \
    })
 
-#define vclz_s8(__a) \
+#define vclz_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -6986,7 +6986,7 @@
      __rv.__i; \
    })
 
-#define vclz_s16(__a) \
+#define vclz_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -6994,7 +6994,7 @@
      __rv.__i; \
    })
 
-#define vclz_s32(__a) \
+#define vclz_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7002,7 +7002,7 @@
      __rv.__i; \
    })
 
-#define vclz_u8(__a) \
+#define vclz_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7010,7 +7010,7 @@
      __rv.__i; \
    })
 
-#define vclz_u16(__a) \
+#define vclz_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7018,7 +7018,7 @@
      __rv.__i; \
    })
 
-#define vclz_u32(__a) \
+#define vclz_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7026,7 +7026,7 @@
      __rv.__i; \
    })
 
-#define vclzq_s8(__a) \
+#define vclzq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7034,7 +7034,7 @@
      __rv.__i; \
    })
 
-#define vclzq_s16(__a) \
+#define vclzq_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7042,7 +7042,7 @@
      __rv.__i; \
    })
 
-#define vclzq_s32(__a) \
+#define vclzq_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7050,7 +7050,7 @@
      __rv.__i; \
    })
 
-#define vclzq_u8(__a) \
+#define vclzq_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7058,7 +7058,7 @@
      __rv.__i; \
    })
 
-#define vclzq_u16(__a) \
+#define vclzq_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7066,7 +7066,7 @@
      __rv.__i; \
    })
 
-#define vclzq_u32(__a) \
+#define vclzq_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7074,7 +7074,7 @@
      __rv.__i; \
    })
 
-#define vcnt_s8(__a) \
+#define vcnt_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7082,7 +7082,7 @@
      __rv.__i; \
    })
 
-#define vcnt_u8(__a) \
+#define vcnt_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7090,7 +7090,7 @@
      __rv.__i; \
    })
 
-#define vcnt_p8(__a) \
+#define vcnt_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7098,7 +7098,7 @@
      __rv.__i; \
    })
 
-#define vcntq_s8(__a) \
+#define vcntq_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7106,7 +7106,7 @@
      __rv.__i; \
    })
 
-#define vcntq_u8(__a) \
+#define vcntq_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7114,7 +7114,7 @@
      __rv.__i; \
    })
 
-#define vcntq_p8(__a) \
+#define vcntq_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7122,7 +7122,7 @@
      __rv.__i; \
    })
 
-#define vrecpe_f32(__a) \
+#define vrecpe_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -7130,7 +7130,7 @@
      __rv.__i; \
    })
 
-#define vrecpe_u32(__a) \
+#define vrecpe_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7138,7 +7138,7 @@
      __rv.__i; \
    })
 
-#define vrecpeq_f32(__a) \
+#define vrecpeq_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -7146,7 +7146,7 @@
      __rv.__i; \
    })
 
-#define vrecpeq_u32(__a) \
+#define vrecpeq_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7154,7 +7154,7 @@
      __rv.__i; \
    })
 
-#define vrsqrte_f32(__a) \
+#define vrsqrte_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -7162,7 +7162,7 @@
      __rv.__i; \
    })
 
-#define vrsqrte_u32(__a) \
+#define vrsqrte_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7170,7 +7170,7 @@
      __rv.__i; \
    })
 
-#define vrsqrteq_f32(__a) \
+#define vrsqrteq_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -7178,7 +7178,7 @@
      __rv.__i; \
    })
 
-#define vrsqrteq_u32(__a) \
+#define vrsqrteq_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7186,139 +7186,139 @@
      __rv.__i; \
    })
 
-#define vget_lane_s8(__a, __b) \
+#define vget_lane_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      (int8_t)__builtin_neon_vget_lanev8qi (__ax.val, __b, 1); \
    })
 
-#define vget_lane_s16(__a, __b) \
+#define vget_lane_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      (int16_t)__builtin_neon_vget_lanev4hi (__ax.val, __b, 1); \
    })
 
-#define vget_lane_s32(__a, __b) \
+#define vget_lane_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      (int32_t)__builtin_neon_vget_lanev2si (__ax.val, __b, 1); \
    })
 
-#define vget_lane_f32(__a, __b) \
+#define vget_lane_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      (float32_t)__builtin_neon_vget_lanev2sf (__ax.val, __b, 5); \
    })
 
-#define vget_lane_u8(__a, __b) \
+#define vget_lane_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      (uint8_t)__builtin_neon_vget_lanev8qi ((__neon_int8x8_t) __ax.val, __b, 0); \
    })
 
-#define vget_lane_u16(__a, __b) \
+#define vget_lane_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      (uint16_t)__builtin_neon_vget_lanev4hi ((__neon_int16x4_t) __ax.val, __b, 0); \
    })
 
-#define vget_lane_u32(__a, __b) \
+#define vget_lane_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      (uint32_t)__builtin_neon_vget_lanev2si ((__neon_int32x2_t) __ax.val, __b, 0); \
    })
 
-#define vget_lane_p8(__a, __b) \
+#define vget_lane_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      (poly8_t)__builtin_neon_vget_lanev8qi ((__neon_int8x8_t) __ax.val, __b, 4); \
    })
 
-#define vget_lane_p16(__a, __b) \
+#define vget_lane_p16(__a, __b) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      (poly16_t)__builtin_neon_vget_lanev4hi ((__neon_int16x4_t) __ax.val, __b, 4); \
    })
 
-#define vget_lane_s64(__a, __b) \
+#define vget_lane_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      (int64_t)__builtin_neon_vget_lanev1di (__ax.val, __b, 1); \
    })
 
-#define vget_lane_u64(__a, __b) \
+#define vget_lane_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      (uint64_t)__builtin_neon_vget_lanev1di ((__neon_int64x1_t) __ax.val, __b, 0); \
    })
 
-#define vgetq_lane_s8(__a, __b) \
+#define vgetq_lane_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      (int8_t)__builtin_neon_vget_lanev16qi (__ax.val, __b, 1); \
    })
 
-#define vgetq_lane_s16(__a, __b) \
+#define vgetq_lane_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      (int16_t)__builtin_neon_vget_lanev8hi (__ax.val, __b, 1); \
    })
 
-#define vgetq_lane_s32(__a, __b) \
+#define vgetq_lane_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      (int32_t)__builtin_neon_vget_lanev4si (__ax.val, __b, 1); \
    })
 
-#define vgetq_lane_f32(__a, __b) \
+#define vgetq_lane_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      (float32_t)__builtin_neon_vget_lanev4sf (__ax.val, __b, 5); \
    })
 
-#define vgetq_lane_u8(__a, __b) \
+#define vgetq_lane_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      (uint8_t)__builtin_neon_vget_lanev16qi ((__neon_int8x16_t) __ax.val, __b, 0); \
    })
 
-#define vgetq_lane_u16(__a, __b) \
+#define vgetq_lane_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      (uint16_t)__builtin_neon_vget_lanev8hi ((__neon_int16x8_t) __ax.val, __b, 0); \
    })
 
-#define vgetq_lane_u32(__a, __b) \
+#define vgetq_lane_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      (uint32_t)__builtin_neon_vget_lanev4si ((__neon_int32x4_t) __ax.val, __b, 0); \
    })
 
-#define vgetq_lane_p8(__a, __b) \
+#define vgetq_lane_p8(__a, __b) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      (poly8_t)__builtin_neon_vget_lanev16qi ((__neon_int8x16_t) __ax.val, __b, 4); \
    })
 
-#define vgetq_lane_p16(__a, __b) \
+#define vgetq_lane_p16(__a, __b) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      (poly16_t)__builtin_neon_vget_lanev8hi ((__neon_int16x8_t) __ax.val, __b, 4); \
    })
 
-#define vgetq_lane_s64(__a, __b) \
+#define vgetq_lane_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      (int64_t)__builtin_neon_vget_lanev2di (__ax.val, __b, 1); \
    })
 
-#define vgetq_lane_u64(__a, __b) \
+#define vgetq_lane_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      (uint64_t)__builtin_neon_vget_lanev2di ((__neon_int64x2_t) __ax.val, __b, 0); \
    })
 
-#define vset_lane_s8(__a, __b, __c) \
+#define vset_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      int8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -7327,7 +7327,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_s16(__a, __b, __c) \
+#define vset_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -7336,7 +7336,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_s32(__a, __b, __c) \
+#define vset_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -7345,7 +7345,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_f32(__a, __b, __c) \
+#define vset_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -7354,7 +7354,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_u8(__a, __b, __c) \
+#define vset_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -7363,7 +7363,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_u16(__a, __b, __c) \
+#define vset_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -7372,7 +7372,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_u32(__a, __b, __c) \
+#define vset_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -7381,7 +7381,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_p8(__a, __b, __c) \
+#define vset_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -7390,7 +7390,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_p16(__a, __b, __c) \
+#define vset_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -7399,7 +7399,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_s64(__a, __b, __c) \
+#define vset_lane_s64(__a, __b, __c) __extension__ \
   ({ \
      int64_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -7408,7 +7408,7 @@
      __rv.__i; \
    })
 
-#define vset_lane_u64(__a, __b, __c) \
+#define vset_lane_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -7417,7 +7417,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_s8(__a, __b, __c) \
+#define vsetq_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      int8_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -7426,7 +7426,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_s16(__a, __b, __c) \
+#define vsetq_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -7435,7 +7435,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_s32(__a, __b, __c) \
+#define vsetq_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -7444,7 +7444,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_f32(__a, __b, __c) \
+#define vsetq_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -7453,7 +7453,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_u8(__a, __b, __c) \
+#define vsetq_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -7462,7 +7462,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_u16(__a, __b, __c) \
+#define vsetq_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -7471,7 +7471,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_u32(__a, __b, __c) \
+#define vsetq_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -7480,7 +7480,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_p8(__a, __b, __c) \
+#define vsetq_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -7489,7 +7489,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_p16(__a, __b, __c) \
+#define vsetq_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -7498,7 +7498,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_s64(__a, __b, __c) \
+#define vsetq_lane_s64(__a, __b, __c) __extension__ \
   ({ \
      int64_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -7507,7 +7507,7 @@
      __rv.__i; \
    })
 
-#define vsetq_lane_u64(__a, __b, __c) \
+#define vsetq_lane_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -7516,7 +7516,7 @@
      __rv.__i; \
    })
 
-#define vcreate_s8(__a) \
+#define vcreate_s8(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7524,7 +7524,7 @@
      __rv.__i; \
    })
 
-#define vcreate_s16(__a) \
+#define vcreate_s16(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7532,7 +7532,7 @@
      __rv.__i; \
    })
 
-#define vcreate_s32(__a) \
+#define vcreate_s32(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7540,7 +7540,7 @@
      __rv.__i; \
    })
 
-#define vcreate_s64(__a) \
+#define vcreate_s64(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -7548,7 +7548,7 @@
      __rv.__i; \
    })
 
-#define vcreate_f32(__a) \
+#define vcreate_f32(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -7556,7 +7556,7 @@
      __rv.__i; \
    })
 
-#define vcreate_u8(__a) \
+#define vcreate_u8(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7564,7 +7564,7 @@
      __rv.__i; \
    })
 
-#define vcreate_u16(__a) \
+#define vcreate_u16(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7572,7 +7572,7 @@
      __rv.__i; \
    })
 
-#define vcreate_u32(__a) \
+#define vcreate_u32(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7580,7 +7580,7 @@
      __rv.__i; \
    })
 
-#define vcreate_u64(__a) \
+#define vcreate_u64(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -7588,7 +7588,7 @@
      __rv.__i; \
    })
 
-#define vcreate_p8(__a) \
+#define vcreate_p8(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7596,7 +7596,7 @@
      __rv.__i; \
    })
 
-#define vcreate_p16(__a) \
+#define vcreate_p16(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7604,7 +7604,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_s8(__a) \
+#define vdup_n_s8(__a) __extension__ \
   ({ \
      int8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7612,7 +7612,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_s16(__a) \
+#define vdup_n_s16(__a) __extension__ \
   ({ \
      int16_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7620,7 +7620,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_s32(__a) \
+#define vdup_n_s32(__a) __extension__ \
   ({ \
      int32_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7628,7 +7628,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_f32(__a) \
+#define vdup_n_f32(__a) __extension__ \
   ({ \
      float32_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -7636,7 +7636,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_u8(__a) \
+#define vdup_n_u8(__a) __extension__ \
   ({ \
      uint8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7644,7 +7644,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_u16(__a) \
+#define vdup_n_u16(__a) __extension__ \
   ({ \
      uint16_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7652,7 +7652,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_u32(__a) \
+#define vdup_n_u32(__a) __extension__ \
   ({ \
      uint32_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7660,7 +7660,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_p8(__a) \
+#define vdup_n_p8(__a) __extension__ \
   ({ \
      poly8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7668,7 +7668,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_p16(__a) \
+#define vdup_n_p16(__a) __extension__ \
   ({ \
      poly16_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7676,7 +7676,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_s64(__a) \
+#define vdup_n_s64(__a) __extension__ \
   ({ \
      int64_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -7684,7 +7684,7 @@
      __rv.__i; \
    })
 
-#define vdup_n_u64(__a) \
+#define vdup_n_u64(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -7692,7 +7692,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_s8(__a) \
+#define vdupq_n_s8(__a) __extension__ \
   ({ \
      int8_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7700,7 +7700,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_s16(__a) \
+#define vdupq_n_s16(__a) __extension__ \
   ({ \
      int16_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7708,7 +7708,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_s32(__a) \
+#define vdupq_n_s32(__a) __extension__ \
   ({ \
      int32_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7716,7 +7716,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_f32(__a) \
+#define vdupq_n_f32(__a) __extension__ \
   ({ \
      float32_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -7724,7 +7724,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_u8(__a) \
+#define vdupq_n_u8(__a) __extension__ \
   ({ \
      uint8_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7732,7 +7732,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_u16(__a) \
+#define vdupq_n_u16(__a) __extension__ \
   ({ \
      uint16_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7740,7 +7740,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_u32(__a) \
+#define vdupq_n_u32(__a) __extension__ \
   ({ \
      uint32_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7748,7 +7748,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_p8(__a) \
+#define vdupq_n_p8(__a) __extension__ \
   ({ \
      poly8_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7756,7 +7756,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_p16(__a) \
+#define vdupq_n_p16(__a) __extension__ \
   ({ \
      poly16_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7764,7 +7764,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_s64(__a) \
+#define vdupq_n_s64(__a) __extension__ \
   ({ \
      int64_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -7772,7 +7772,7 @@
      __rv.__i; \
    })
 
-#define vdupq_n_u64(__a) \
+#define vdupq_n_u64(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -7780,7 +7780,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_s8(__a) \
+#define vmov_n_s8(__a) __extension__ \
   ({ \
      int8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7788,7 +7788,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_s16(__a) \
+#define vmov_n_s16(__a) __extension__ \
   ({ \
      int16_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7796,7 +7796,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_s32(__a) \
+#define vmov_n_s32(__a) __extension__ \
   ({ \
      int32_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7804,7 +7804,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_f32(__a) \
+#define vmov_n_f32(__a) __extension__ \
   ({ \
      float32_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -7812,7 +7812,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_u8(__a) \
+#define vmov_n_u8(__a) __extension__ \
   ({ \
      uint8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7820,7 +7820,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_u16(__a) \
+#define vmov_n_u16(__a) __extension__ \
   ({ \
      uint16_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7828,7 +7828,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_u32(__a) \
+#define vmov_n_u32(__a) __extension__ \
   ({ \
      uint32_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7836,7 +7836,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_p8(__a) \
+#define vmov_n_p8(__a) __extension__ \
   ({ \
      poly8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7844,7 +7844,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_p16(__a) \
+#define vmov_n_p16(__a) __extension__ \
   ({ \
      poly16_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7852,7 +7852,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_s64(__a) \
+#define vmov_n_s64(__a) __extension__ \
   ({ \
      int64_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -7860,7 +7860,7 @@
      __rv.__i; \
    })
 
-#define vmov_n_u64(__a) \
+#define vmov_n_u64(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -7868,7 +7868,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_s8(__a) \
+#define vmovq_n_s8(__a) __extension__ \
   ({ \
      int8_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7876,7 +7876,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_s16(__a) \
+#define vmovq_n_s16(__a) __extension__ \
   ({ \
      int16_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7884,7 +7884,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_s32(__a) \
+#define vmovq_n_s32(__a) __extension__ \
   ({ \
      int32_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7892,7 +7892,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_f32(__a) \
+#define vmovq_n_f32(__a) __extension__ \
   ({ \
      float32_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -7900,7 +7900,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_u8(__a) \
+#define vmovq_n_u8(__a) __extension__ \
   ({ \
      uint8_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7908,7 +7908,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_u16(__a) \
+#define vmovq_n_u16(__a) __extension__ \
   ({ \
      uint16_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7916,7 +7916,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_u32(__a) \
+#define vmovq_n_u32(__a) __extension__ \
   ({ \
      uint32_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -7924,7 +7924,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_p8(__a) \
+#define vmovq_n_p8(__a) __extension__ \
   ({ \
      poly8_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -7932,7 +7932,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_p16(__a) \
+#define vmovq_n_p16(__a) __extension__ \
   ({ \
      poly16_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -7940,7 +7940,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_s64(__a) \
+#define vmovq_n_s64(__a) __extension__ \
   ({ \
      int64_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -7948,7 +7948,7 @@
      __rv.__i; \
    })
 
-#define vmovq_n_u64(__a) \
+#define vmovq_n_u64(__a) __extension__ \
   ({ \
      uint64_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -7956,7 +7956,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_s8(__a, __b) \
+#define vdup_lane_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7964,7 +7964,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_s16(__a, __b) \
+#define vdup_lane_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -7972,7 +7972,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_s32(__a, __b) \
+#define vdup_lane_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -7980,7 +7980,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_f32(__a, __b) \
+#define vdup_lane_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -7988,7 +7988,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_u8(__a, __b) \
+#define vdup_lane_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -7996,7 +7996,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_u16(__a, __b) \
+#define vdup_lane_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8004,7 +8004,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_u32(__a, __b) \
+#define vdup_lane_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8012,7 +8012,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_p8(__a, __b) \
+#define vdup_lane_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8020,7 +8020,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_p16(__a, __b) \
+#define vdup_lane_p16(__a, __b) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8028,7 +8028,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_s64(__a, __b) \
+#define vdup_lane_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -8036,7 +8036,7 @@
      __rv.__i; \
    })
 
-#define vdup_lane_u64(__a, __b) \
+#define vdup_lane_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -8044,7 +8044,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_s8(__a, __b) \
+#define vdupq_lane_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -8052,7 +8052,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_s16(__a, __b) \
+#define vdupq_lane_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -8060,7 +8060,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_s32(__a, __b) \
+#define vdupq_lane_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8068,7 +8068,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_f32(__a, __b) \
+#define vdupq_lane_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -8076,7 +8076,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_u8(__a, __b) \
+#define vdupq_lane_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -8084,7 +8084,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_u16(__a, __b) \
+#define vdupq_lane_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -8092,7 +8092,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_u32(__a, __b) \
+#define vdupq_lane_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8100,7 +8100,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_p8(__a, __b) \
+#define vdupq_lane_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -8108,7 +8108,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_p16(__a, __b) \
+#define vdupq_lane_p16(__a, __b) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -8116,7 +8116,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_s64(__a, __b) \
+#define vdupq_lane_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -8124,7 +8124,7 @@
      __rv.__i; \
    })
 
-#define vdupq_lane_u64(__a, __b) \
+#define vdupq_lane_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -8132,7 +8132,7 @@
      __rv.__i; \
    })
 
-#define vcombine_s8(__a, __b) \
+#define vcombine_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -8141,7 +8141,7 @@
      __rv.__i; \
    })
 
-#define vcombine_s16(__a, __b) \
+#define vcombine_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -8150,7 +8150,7 @@
      __rv.__i; \
    })
 
-#define vcombine_s32(__a, __b) \
+#define vcombine_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -8159,7 +8159,7 @@
      __rv.__i; \
    })
 
-#define vcombine_s64(__a, __b) \
+#define vcombine_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -8168,7 +8168,7 @@
      __rv.__i; \
    })
 
-#define vcombine_f32(__a, __b) \
+#define vcombine_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -8177,7 +8177,7 @@
      __rv.__i; \
    })
 
-#define vcombine_u8(__a, __b) \
+#define vcombine_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -8186,7 +8186,7 @@
      __rv.__i; \
    })
 
-#define vcombine_u16(__a, __b) \
+#define vcombine_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -8195,7 +8195,7 @@
      __rv.__i; \
    })
 
-#define vcombine_u32(__a, __b) \
+#define vcombine_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -8204,7 +8204,7 @@
      __rv.__i; \
    })
 
-#define vcombine_u64(__a, __b) \
+#define vcombine_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -8213,7 +8213,7 @@
      __rv.__i; \
    })
 
-#define vcombine_p8(__a, __b) \
+#define vcombine_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -8222,7 +8222,7 @@
      __rv.__i; \
    })
 
-#define vcombine_p16(__a, __b) \
+#define vcombine_p16(__a, __b) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -8231,7 +8231,7 @@
      __rv.__i; \
    })
 
-#define vget_high_s8(__a) \
+#define vget_high_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8239,7 +8239,7 @@
      __rv.__i; \
    })
 
-#define vget_high_s16(__a) \
+#define vget_high_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8247,7 +8247,7 @@
      __rv.__i; \
    })
 
-#define vget_high_s32(__a) \
+#define vget_high_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8255,7 +8255,7 @@
      __rv.__i; \
    })
 
-#define vget_high_s64(__a) \
+#define vget_high_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -8263,7 +8263,7 @@
      __rv.__i; \
    })
 
-#define vget_high_f32(__a) \
+#define vget_high_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -8271,7 +8271,7 @@
      __rv.__i; \
    })
 
-#define vget_high_u8(__a) \
+#define vget_high_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8279,7 +8279,7 @@
      __rv.__i; \
    })
 
-#define vget_high_u16(__a) \
+#define vget_high_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8287,7 +8287,7 @@
      __rv.__i; \
    })
 
-#define vget_high_u32(__a) \
+#define vget_high_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8295,7 +8295,7 @@
      __rv.__i; \
    })
 
-#define vget_high_u64(__a) \
+#define vget_high_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -8303,7 +8303,7 @@
      __rv.__i; \
    })
 
-#define vget_high_p8(__a) \
+#define vget_high_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8311,7 +8311,7 @@
      __rv.__i; \
    })
 
-#define vget_high_p16(__a) \
+#define vget_high_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8319,7 +8319,7 @@
      __rv.__i; \
    })
 
-#define vget_low_s8(__a) \
+#define vget_low_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8327,7 +8327,7 @@
      __rv.__i; \
    })
 
-#define vget_low_s16(__a) \
+#define vget_low_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8335,7 +8335,7 @@
      __rv.__i; \
    })
 
-#define vget_low_s32(__a) \
+#define vget_low_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8343,7 +8343,7 @@
      __rv.__i; \
    })
 
-#define vget_low_s64(__a) \
+#define vget_low_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -8351,7 +8351,7 @@
      __rv.__i; \
    })
 
-#define vget_low_f32(__a) \
+#define vget_low_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -8359,7 +8359,7 @@
      __rv.__i; \
    })
 
-#define vget_low_u8(__a) \
+#define vget_low_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8367,7 +8367,7 @@
      __rv.__i; \
    })
 
-#define vget_low_u16(__a) \
+#define vget_low_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8375,7 +8375,7 @@
      __rv.__i; \
    })
 
-#define vget_low_u32(__a) \
+#define vget_low_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8383,7 +8383,7 @@
      __rv.__i; \
    })
 
-#define vget_low_u64(__a) \
+#define vget_low_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -8391,7 +8391,7 @@
      __rv.__i; \
    })
 
-#define vget_low_p8(__a) \
+#define vget_low_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8399,7 +8399,7 @@
      __rv.__i; \
    })
 
-#define vget_low_p16(__a) \
+#define vget_low_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8407,7 +8407,7 @@
      __rv.__i; \
    })
 
-#define vcvt_s32_f32(__a) \
+#define vcvt_s32_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8415,7 +8415,7 @@
      __rv.__i; \
    })
 
-#define vcvt_f32_s32(__a) \
+#define vcvt_f32_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -8423,7 +8423,7 @@
      __rv.__i; \
    })
 
-#define vcvt_f32_u32(__a) \
+#define vcvt_f32_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -8431,7 +8431,7 @@
      __rv.__i; \
    })
 
-#define vcvt_u32_f32(__a) \
+#define vcvt_u32_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8439,7 +8439,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_s32_f32(__a) \
+#define vcvtq_s32_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8447,7 +8447,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_f32_s32(__a) \
+#define vcvtq_f32_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -8455,7 +8455,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_f32_u32(__a) \
+#define vcvtq_f32_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -8463,7 +8463,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_u32_f32(__a) \
+#define vcvtq_u32_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8471,7 +8471,7 @@
      __rv.__i; \
    })
 
-#define vcvt_n_s32_f32(__a, __b) \
+#define vcvt_n_s32_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8479,7 +8479,7 @@
      __rv.__i; \
    })
 
-#define vcvt_n_f32_s32(__a, __b) \
+#define vcvt_n_f32_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -8487,7 +8487,7 @@
      __rv.__i; \
    })
 
-#define vcvt_n_f32_u32(__a, __b) \
+#define vcvt_n_f32_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -8495,7 +8495,7 @@
      __rv.__i; \
    })
 
-#define vcvt_n_u32_f32(__a, __b) \
+#define vcvt_n_u32_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8503,7 +8503,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_n_s32_f32(__a, __b) \
+#define vcvtq_n_s32_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8511,7 +8511,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_n_f32_s32(__a, __b) \
+#define vcvtq_n_f32_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -8519,7 +8519,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_n_f32_u32(__a, __b) \
+#define vcvtq_n_f32_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -8527,7 +8527,7 @@
      __rv.__i; \
    })
 
-#define vcvtq_n_u32_f32(__a, __b) \
+#define vcvtq_n_u32_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8535,7 +8535,7 @@
      __rv.__i; \
    })
 
-#define vmovn_s16(__a) \
+#define vmovn_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8543,7 +8543,7 @@
      __rv.__i; \
    })
 
-#define vmovn_s32(__a) \
+#define vmovn_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8551,7 +8551,7 @@
      __rv.__i; \
    })
 
-#define vmovn_s64(__a) \
+#define vmovn_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8559,7 +8559,7 @@
      __rv.__i; \
    })
 
-#define vmovn_u16(__a) \
+#define vmovn_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8567,7 +8567,7 @@
      __rv.__i; \
    })
 
-#define vmovn_u32(__a) \
+#define vmovn_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8575,7 +8575,7 @@
      __rv.__i; \
    })
 
-#define vmovn_u64(__a) \
+#define vmovn_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8583,7 +8583,7 @@
      __rv.__i; \
    })
 
-#define vqmovn_s16(__a) \
+#define vqmovn_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8591,7 +8591,7 @@
      __rv.__i; \
    })
 
-#define vqmovn_s32(__a) \
+#define vqmovn_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8599,7 +8599,7 @@
      __rv.__i; \
    })
 
-#define vqmovn_s64(__a) \
+#define vqmovn_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8607,7 +8607,7 @@
      __rv.__i; \
    })
 
-#define vqmovn_u16(__a) \
+#define vqmovn_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8615,7 +8615,7 @@
      __rv.__i; \
    })
 
-#define vqmovn_u32(__a) \
+#define vqmovn_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8623,7 +8623,7 @@
      __rv.__i; \
    })
 
-#define vqmovn_u64(__a) \
+#define vqmovn_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8631,7 +8631,7 @@
      __rv.__i; \
    })
 
-#define vqmovun_s16(__a) \
+#define vqmovun_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -8639,7 +8639,7 @@
      __rv.__i; \
    })
 
-#define vqmovun_s32(__a) \
+#define vqmovun_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -8647,7 +8647,7 @@
      __rv.__i; \
    })
 
-#define vqmovun_s64(__a) \
+#define vqmovun_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -8655,7 +8655,7 @@
      __rv.__i; \
    })
 
-#define vmovl_s8(__a) \
+#define vmovl_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -8663,7 +8663,7 @@
      __rv.__i; \
    })
 
-#define vmovl_s16(__a) \
+#define vmovl_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8671,7 +8671,7 @@
      __rv.__i; \
    })
 
-#define vmovl_s32(__a) \
+#define vmovl_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -8679,7 +8679,7 @@
      __rv.__i; \
    })
 
-#define vmovl_u8(__a) \
+#define vmovl_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -8687,7 +8687,7 @@
      __rv.__i; \
    })
 
-#define vmovl_u16(__a) \
+#define vmovl_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -8695,7 +8695,7 @@
      __rv.__i; \
    })
 
-#define vmovl_u32(__a) \
+#define vmovl_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -8703,7 +8703,7 @@
      __rv.__i; \
    })
 
-#define vtbl1_s8(__a, __b) \
+#define vtbl1_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -8712,7 +8712,7 @@
      __rv.__i; \
    })
 
-#define vtbl1_u8(__a, __b) \
+#define vtbl1_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -8721,7 +8721,7 @@
      __rv.__i; \
    })
 
-#define vtbl1_p8(__a, __b) \
+#define vtbl1_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -8730,7 +8730,7 @@
      __rv.__i; \
    })
 
-#define vtbl2_s8(__a, __b) \
+#define vtbl2_s8(__a, __b) __extension__ \
   ({ \
      union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \
      int8x8_t __bx = __b; \
@@ -8739,7 +8739,7 @@
      __rv.__i; \
    })
 
-#define vtbl2_u8(__a, __b) \
+#define vtbl2_u8(__a, __b) __extension__ \
   ({ \
      union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \
      uint8x8_t __bx = __b; \
@@ -8748,7 +8748,7 @@
      __rv.__i; \
    })
 
-#define vtbl2_p8(__a, __b) \
+#define vtbl2_p8(__a, __b) __extension__ \
   ({ \
      union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \
      uint8x8_t __bx = __b; \
@@ -8757,7 +8757,7 @@
      __rv.__i; \
    })
 
-#define vtbl3_s8(__a, __b) \
+#define vtbl3_s8(__a, __b) __extension__ \
   ({ \
      union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \
      int8x8_t __bx = __b; \
@@ -8766,7 +8766,7 @@
      __rv.__i; \
    })
 
-#define vtbl3_u8(__a, __b) \
+#define vtbl3_u8(__a, __b) __extension__ \
   ({ \
      union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \
      uint8x8_t __bx = __b; \
@@ -8775,7 +8775,7 @@
      __rv.__i; \
    })
 
-#define vtbl3_p8(__a, __b) \
+#define vtbl3_p8(__a, __b) __extension__ \
   ({ \
      union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \
      uint8x8_t __bx = __b; \
@@ -8784,7 +8784,7 @@
      __rv.__i; \
    })
 
-#define vtbl4_s8(__a, __b) \
+#define vtbl4_s8(__a, __b) __extension__ \
   ({ \
      union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \
      int8x8_t __bx = __b; \
@@ -8793,7 +8793,7 @@
      __rv.__i; \
    })
 
-#define vtbl4_u8(__a, __b) \
+#define vtbl4_u8(__a, __b) __extension__ \
   ({ \
      union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \
      uint8x8_t __bx = __b; \
@@ -8802,7 +8802,7 @@
      __rv.__i; \
    })
 
-#define vtbl4_p8(__a, __b) \
+#define vtbl4_p8(__a, __b) __extension__ \
   ({ \
      union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \
      uint8x8_t __bx = __b; \
@@ -8811,7 +8811,7 @@
      __rv.__i; \
    })
 
-#define vtbx1_s8(__a, __b, __c) \
+#define vtbx1_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -8821,7 +8821,7 @@
      __rv.__i; \
    })
 
-#define vtbx1_u8(__a, __b, __c) \
+#define vtbx1_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -8831,7 +8831,7 @@
      __rv.__i; \
    })
 
-#define vtbx1_p8(__a, __b, __c) \
+#define vtbx1_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -8841,7 +8841,7 @@
      __rv.__i; \
    })
 
-#define vtbx2_s8(__a, __b, __c) \
+#define vtbx2_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
@@ -8851,7 +8851,7 @@
      __rv.__i; \
    })
 
-#define vtbx2_u8(__a, __b, __c) \
+#define vtbx2_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
@@ -8861,7 +8861,7 @@
      __rv.__i; \
    })
 
-#define vtbx2_p8(__a, __b, __c) \
+#define vtbx2_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
@@ -8871,7 +8871,7 @@
      __rv.__i; \
    })
 
-#define vtbx3_s8(__a, __b, __c) \
+#define vtbx3_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
@@ -8881,7 +8881,7 @@
      __rv.__i; \
    })
 
-#define vtbx3_u8(__a, __b, __c) \
+#define vtbx3_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
@@ -8891,7 +8891,7 @@
      __rv.__i; \
    })
 
-#define vtbx3_p8(__a, __b, __c) \
+#define vtbx3_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
@@ -8901,7 +8901,7 @@
      __rv.__i; \
    })
 
-#define vtbx4_s8(__a, __b, __c) \
+#define vtbx4_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
@@ -8911,7 +8911,7 @@
      __rv.__i; \
    })
 
-#define vtbx4_u8(__a, __b, __c) \
+#define vtbx4_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
@@ -8921,7 +8921,7 @@
      __rv.__i; \
    })
 
-#define vtbx4_p8(__a, __b, __c) \
+#define vtbx4_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
@@ -8931,7 +8931,7 @@
      __rv.__i; \
    })
 
-#define vmul_lane_s16(__a, __b, __c) \
+#define vmul_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -8940,7 +8940,7 @@
      __rv.__i; \
    })
 
-#define vmul_lane_s32(__a, __b, __c) \
+#define vmul_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -8949,7 +8949,7 @@
      __rv.__i; \
    })
 
-#define vmul_lane_f32(__a, __b, __c) \
+#define vmul_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -8958,7 +8958,7 @@
      __rv.__i; \
    })
 
-#define vmul_lane_u16(__a, __b, __c) \
+#define vmul_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -8967,7 +8967,7 @@
      __rv.__i; \
    })
 
-#define vmul_lane_u32(__a, __b, __c) \
+#define vmul_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -8976,7 +8976,7 @@
      __rv.__i; \
    })
 
-#define vmulq_lane_s16(__a, __b, __c) \
+#define vmulq_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -8985,7 +8985,7 @@
      __rv.__i; \
    })
 
-#define vmulq_lane_s32(__a, __b, __c) \
+#define vmulq_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -8994,7 +8994,7 @@
      __rv.__i; \
    })
 
-#define vmulq_lane_f32(__a, __b, __c) \
+#define vmulq_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -9003,7 +9003,7 @@
      __rv.__i; \
    })
 
-#define vmulq_lane_u16(__a, __b, __c) \
+#define vmulq_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9012,7 +9012,7 @@
      __rv.__i; \
    })
 
-#define vmulq_lane_u32(__a, __b, __c) \
+#define vmulq_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9021,7 +9021,7 @@
      __rv.__i; \
    })
 
-#define vmla_lane_s16(__a, __b, __c, __d) \
+#define vmla_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9031,7 +9031,7 @@
      __rv.__i; \
    })
 
-#define vmla_lane_s32(__a, __b, __c, __d) \
+#define vmla_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9041,7 +9041,7 @@
      __rv.__i; \
    })
 
-#define vmla_lane_f32(__a, __b, __c, __d) \
+#define vmla_lane_f32(__a, __b, __c, __d) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -9051,7 +9051,7 @@
      __rv.__i; \
    })
 
-#define vmla_lane_u16(__a, __b, __c, __d) \
+#define vmla_lane_u16(__a, __b, __c, __d) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9061,7 +9061,7 @@
      __rv.__i; \
    })
 
-#define vmla_lane_u32(__a, __b, __c, __d) \
+#define vmla_lane_u32(__a, __b, __c, __d) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9071,7 +9071,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_lane_s16(__a, __b, __c, __d) \
+#define vmlaq_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -9081,7 +9081,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_lane_s32(__a, __b, __c, __d) \
+#define vmlaq_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -9091,7 +9091,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_lane_f32(__a, __b, __c, __d) \
+#define vmlaq_lane_f32(__a, __b, __c, __d) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -9101,7 +9101,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_lane_u16(__a, __b, __c, __d) \
+#define vmlaq_lane_u16(__a, __b, __c, __d) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -9111,7 +9111,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_lane_u32(__a, __b, __c, __d) \
+#define vmlaq_lane_u32(__a, __b, __c, __d) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -9121,7 +9121,7 @@
      __rv.__i; \
    })
 
-#define vmlal_lane_s16(__a, __b, __c, __d) \
+#define vmlal_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9131,7 +9131,7 @@
      __rv.__i; \
    })
 
-#define vmlal_lane_s32(__a, __b, __c, __d) \
+#define vmlal_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9141,7 +9141,7 @@
      __rv.__i; \
    })
 
-#define vmlal_lane_u16(__a, __b, __c, __d) \
+#define vmlal_lane_u16(__a, __b, __c, __d) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9151,7 +9151,7 @@
      __rv.__i; \
    })
 
-#define vmlal_lane_u32(__a, __b, __c, __d) \
+#define vmlal_lane_u32(__a, __b, __c, __d) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9161,7 +9161,7 @@
      __rv.__i; \
    })
 
-#define vqdmlal_lane_s16(__a, __b, __c, __d) \
+#define vqdmlal_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9171,7 +9171,7 @@
      __rv.__i; \
    })
 
-#define vqdmlal_lane_s32(__a, __b, __c, __d) \
+#define vqdmlal_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9181,7 +9181,7 @@
      __rv.__i; \
    })
 
-#define vmls_lane_s16(__a, __b, __c, __d) \
+#define vmls_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9191,7 +9191,7 @@
      __rv.__i; \
    })
 
-#define vmls_lane_s32(__a, __b, __c, __d) \
+#define vmls_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9201,7 +9201,7 @@
      __rv.__i; \
    })
 
-#define vmls_lane_f32(__a, __b, __c, __d) \
+#define vmls_lane_f32(__a, __b, __c, __d) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -9211,7 +9211,7 @@
      __rv.__i; \
    })
 
-#define vmls_lane_u16(__a, __b, __c, __d) \
+#define vmls_lane_u16(__a, __b, __c, __d) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9221,7 +9221,7 @@
      __rv.__i; \
    })
 
-#define vmls_lane_u32(__a, __b, __c, __d) \
+#define vmls_lane_u32(__a, __b, __c, __d) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9231,7 +9231,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_lane_s16(__a, __b, __c, __d) \
+#define vmlsq_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -9241,7 +9241,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_lane_s32(__a, __b, __c, __d) \
+#define vmlsq_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -9251,7 +9251,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_lane_f32(__a, __b, __c, __d) \
+#define vmlsq_lane_f32(__a, __b, __c, __d) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -9261,7 +9261,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_lane_u16(__a, __b, __c, __d) \
+#define vmlsq_lane_u16(__a, __b, __c, __d) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -9271,7 +9271,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_lane_u32(__a, __b, __c, __d) \
+#define vmlsq_lane_u32(__a, __b, __c, __d) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -9281,7 +9281,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_lane_s16(__a, __b, __c, __d) \
+#define vmlsl_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9291,7 +9291,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_lane_s32(__a, __b, __c, __d) \
+#define vmlsl_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9301,7 +9301,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_lane_u16(__a, __b, __c, __d) \
+#define vmlsl_lane_u16(__a, __b, __c, __d) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9311,7 +9311,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_lane_u32(__a, __b, __c, __d) \
+#define vmlsl_lane_u32(__a, __b, __c, __d) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9321,7 +9321,7 @@
      __rv.__i; \
    })
 
-#define vqdmlsl_lane_s16(__a, __b, __c, __d) \
+#define vqdmlsl_lane_s16(__a, __b, __c, __d) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9331,7 +9331,7 @@
      __rv.__i; \
    })
 
-#define vqdmlsl_lane_s32(__a, __b, __c, __d) \
+#define vqdmlsl_lane_s32(__a, __b, __c, __d) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9341,7 +9341,7 @@
      __rv.__i; \
    })
 
-#define vmull_lane_s16(__a, __b, __c) \
+#define vmull_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9350,7 +9350,7 @@
      __rv.__i; \
    })
 
-#define vmull_lane_s32(__a, __b, __c) \
+#define vmull_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9359,7 +9359,7 @@
      __rv.__i; \
    })
 
-#define vmull_lane_u16(__a, __b, __c) \
+#define vmull_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9368,7 +9368,7 @@
      __rv.__i; \
    })
 
-#define vmull_lane_u32(__a, __b, __c) \
+#define vmull_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9377,7 +9377,7 @@
      __rv.__i; \
    })
 
-#define vqdmull_lane_s16(__a, __b, __c) \
+#define vqdmull_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9386,7 +9386,7 @@
      __rv.__i; \
    })
 
-#define vqdmull_lane_s32(__a, __b, __c) \
+#define vqdmull_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9395,7 +9395,7 @@
      __rv.__i; \
    })
 
-#define vqdmulhq_lane_s16(__a, __b, __c) \
+#define vqdmulhq_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9404,7 +9404,7 @@
      __rv.__i; \
    })
 
-#define vqdmulhq_lane_s32(__a, __b, __c) \
+#define vqdmulhq_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9413,7 +9413,7 @@
      __rv.__i; \
    })
 
-#define vqdmulh_lane_s16(__a, __b, __c) \
+#define vqdmulh_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9422,7 +9422,7 @@
      __rv.__i; \
    })
 
-#define vqdmulh_lane_s32(__a, __b, __c) \
+#define vqdmulh_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9431,7 +9431,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulhq_lane_s16(__a, __b, __c) \
+#define vqrdmulhq_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9440,7 +9440,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulhq_lane_s32(__a, __b, __c) \
+#define vqrdmulhq_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9449,7 +9449,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulh_lane_s16(__a, __b, __c) \
+#define vqrdmulh_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9458,7 +9458,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulh_lane_s32(__a, __b, __c) \
+#define vqrdmulh_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9467,7 +9467,7 @@
      __rv.__i; \
    })
 
-#define vmul_n_s16(__a, __b) \
+#define vmul_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9476,7 +9476,7 @@
      __rv.__i; \
    })
 
-#define vmul_n_s32(__a, __b) \
+#define vmul_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9485,7 +9485,7 @@
      __rv.__i; \
    })
 
-#define vmul_n_f32(__a, __b) \
+#define vmul_n_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32_t __bx = __b; \
@@ -9494,7 +9494,7 @@
      __rv.__i; \
    })
 
-#define vmul_n_u16(__a, __b) \
+#define vmul_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16_t __bx = __b; \
@@ -9503,7 +9503,7 @@
      __rv.__i; \
    })
 
-#define vmul_n_u32(__a, __b) \
+#define vmul_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32_t __bx = __b; \
@@ -9512,7 +9512,7 @@
      __rv.__i; \
    })
 
-#define vmulq_n_s16(__a, __b) \
+#define vmulq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9521,7 +9521,7 @@
      __rv.__i; \
    })
 
-#define vmulq_n_s32(__a, __b) \
+#define vmulq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9530,7 +9530,7 @@
      __rv.__i; \
    })
 
-#define vmulq_n_f32(__a, __b) \
+#define vmulq_n_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32_t __bx = __b; \
@@ -9539,7 +9539,7 @@
      __rv.__i; \
    })
 
-#define vmulq_n_u16(__a, __b) \
+#define vmulq_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16_t __bx = __b; \
@@ -9548,7 +9548,7 @@
      __rv.__i; \
    })
 
-#define vmulq_n_u32(__a, __b) \
+#define vmulq_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32_t __bx = __b; \
@@ -9557,7 +9557,7 @@
      __rv.__i; \
    })
 
-#define vmull_n_s16(__a, __b) \
+#define vmull_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9566,7 +9566,7 @@
      __rv.__i; \
    })
 
-#define vmull_n_s32(__a, __b) \
+#define vmull_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9575,7 +9575,7 @@
      __rv.__i; \
    })
 
-#define vmull_n_u16(__a, __b) \
+#define vmull_n_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16_t __bx = __b; \
@@ -9584,7 +9584,7 @@
      __rv.__i; \
    })
 
-#define vmull_n_u32(__a, __b) \
+#define vmull_n_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32_t __bx = __b; \
@@ -9593,7 +9593,7 @@
      __rv.__i; \
    })
 
-#define vqdmull_n_s16(__a, __b) \
+#define vqdmull_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9602,7 +9602,7 @@
      __rv.__i; \
    })
 
-#define vqdmull_n_s32(__a, __b) \
+#define vqdmull_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9611,7 +9611,7 @@
      __rv.__i; \
    })
 
-#define vqdmulhq_n_s16(__a, __b) \
+#define vqdmulhq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9620,7 +9620,7 @@
      __rv.__i; \
    })
 
-#define vqdmulhq_n_s32(__a, __b) \
+#define vqdmulhq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9629,7 +9629,7 @@
      __rv.__i; \
    })
 
-#define vqdmulh_n_s16(__a, __b) \
+#define vqdmulh_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9638,7 +9638,7 @@
      __rv.__i; \
    })
 
-#define vqdmulh_n_s32(__a, __b) \
+#define vqdmulh_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9647,7 +9647,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulhq_n_s16(__a, __b) \
+#define vqrdmulhq_n_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9656,7 +9656,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulhq_n_s32(__a, __b) \
+#define vqrdmulhq_n_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9665,7 +9665,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulh_n_s16(__a, __b) \
+#define vqrdmulh_n_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16_t __bx = __b; \
@@ -9674,7 +9674,7 @@
      __rv.__i; \
    })
 
-#define vqrdmulh_n_s32(__a, __b) \
+#define vqrdmulh_n_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32_t __bx = __b; \
@@ -9683,7 +9683,7 @@
      __rv.__i; \
    })
 
-#define vmla_n_s16(__a, __b, __c) \
+#define vmla_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9693,7 +9693,7 @@
      __rv.__i; \
    })
 
-#define vmla_n_s32(__a, __b, __c) \
+#define vmla_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9703,7 +9703,7 @@
      __rv.__i; \
    })
 
-#define vmla_n_f32(__a, __b, __c) \
+#define vmla_n_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -9713,7 +9713,7 @@
      __rv.__i; \
    })
 
-#define vmla_n_u16(__a, __b, __c) \
+#define vmla_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9723,7 +9723,7 @@
      __rv.__i; \
    })
 
-#define vmla_n_u32(__a, __b, __c) \
+#define vmla_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9733,7 +9733,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_n_s16(__a, __b, __c) \
+#define vmlaq_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -9743,7 +9743,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_n_s32(__a, __b, __c) \
+#define vmlaq_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -9753,7 +9753,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_n_f32(__a, __b, __c) \
+#define vmlaq_n_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -9763,7 +9763,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_n_u16(__a, __b, __c) \
+#define vmlaq_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -9773,7 +9773,7 @@
      __rv.__i; \
    })
 
-#define vmlaq_n_u32(__a, __b, __c) \
+#define vmlaq_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -9783,7 +9783,7 @@
      __rv.__i; \
    })
 
-#define vmlal_n_s16(__a, __b, __c) \
+#define vmlal_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9793,7 +9793,7 @@
      __rv.__i; \
    })
 
-#define vmlal_n_s32(__a, __b, __c) \
+#define vmlal_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9803,7 +9803,7 @@
      __rv.__i; \
    })
 
-#define vmlal_n_u16(__a, __b, __c) \
+#define vmlal_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9813,7 +9813,7 @@
      __rv.__i; \
    })
 
-#define vmlal_n_u32(__a, __b, __c) \
+#define vmlal_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9823,7 +9823,7 @@
      __rv.__i; \
    })
 
-#define vqdmlal_n_s16(__a, __b, __c) \
+#define vqdmlal_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9833,7 +9833,7 @@
      __rv.__i; \
    })
 
-#define vqdmlal_n_s32(__a, __b, __c) \
+#define vqdmlal_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9843,7 +9843,7 @@
      __rv.__i; \
    })
 
-#define vmls_n_s16(__a, __b, __c) \
+#define vmls_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9853,7 +9853,7 @@
      __rv.__i; \
    })
 
-#define vmls_n_s32(__a, __b, __c) \
+#define vmls_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9863,7 +9863,7 @@
      __rv.__i; \
    })
 
-#define vmls_n_f32(__a, __b, __c) \
+#define vmls_n_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -9873,7 +9873,7 @@
      __rv.__i; \
    })
 
-#define vmls_n_u16(__a, __b, __c) \
+#define vmls_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9883,7 +9883,7 @@
      __rv.__i; \
    })
 
-#define vmls_n_u32(__a, __b, __c) \
+#define vmls_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9893,7 +9893,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_n_s16(__a, __b, __c) \
+#define vmlsq_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -9903,7 +9903,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_n_s32(__a, __b, __c) \
+#define vmlsq_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -9913,7 +9913,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_n_f32(__a, __b, __c) \
+#define vmlsq_n_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -9923,7 +9923,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_n_u16(__a, __b, __c) \
+#define vmlsq_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -9933,7 +9933,7 @@
      __rv.__i; \
    })
 
-#define vmlsq_n_u32(__a, __b, __c) \
+#define vmlsq_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -9943,7 +9943,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_n_s16(__a, __b, __c) \
+#define vmlsl_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9953,7 +9953,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_n_s32(__a, __b, __c) \
+#define vmlsl_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -9963,7 +9963,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_n_u16(__a, __b, __c) \
+#define vmlsl_n_u16(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -9973,7 +9973,7 @@
      __rv.__i; \
    })
 
-#define vmlsl_n_u32(__a, __b, __c) \
+#define vmlsl_n_u32(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -9983,7 +9983,7 @@
      __rv.__i; \
    })
 
-#define vqdmlsl_n_s16(__a, __b, __c) \
+#define vqdmlsl_n_s16(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -9993,7 +9993,7 @@
      __rv.__i; \
    })
 
-#define vqdmlsl_n_s32(__a, __b, __c) \
+#define vqdmlsl_n_s32(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -10003,7 +10003,7 @@
      __rv.__i; \
    })
 
-#define vext_s8(__a, __b, __c) \
+#define vext_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -10012,7 +10012,7 @@
      __rv.__i; \
    })
 
-#define vext_s16(__a, __b, __c) \
+#define vext_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -10021,7 +10021,7 @@
      __rv.__i; \
    })
 
-#define vext_s32(__a, __b, __c) \
+#define vext_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -10030,7 +10030,7 @@
      __rv.__i; \
    })
 
-#define vext_s64(__a, __b, __c) \
+#define vext_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -10039,7 +10039,7 @@
      __rv.__i; \
    })
 
-#define vext_f32(__a, __b, __c) \
+#define vext_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -10048,7 +10048,7 @@
      __rv.__i; \
    })
 
-#define vext_u8(__a, __b, __c) \
+#define vext_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -10057,7 +10057,7 @@
      __rv.__i; \
    })
 
-#define vext_u16(__a, __b, __c) \
+#define vext_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -10066,7 +10066,7 @@
      __rv.__i; \
    })
 
-#define vext_u32(__a, __b, __c) \
+#define vext_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -10075,7 +10075,7 @@
      __rv.__i; \
    })
 
-#define vext_u64(__a, __b, __c) \
+#define vext_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -10084,7 +10084,7 @@
      __rv.__i; \
    })
 
-#define vext_p8(__a, __b, __c) \
+#define vext_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -10093,7 +10093,7 @@
      __rv.__i; \
    })
 
-#define vext_p16(__a, __b, __c) \
+#define vext_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -10102,7 +10102,7 @@
      __rv.__i; \
    })
 
-#define vextq_s8(__a, __b, __c) \
+#define vextq_s8(__a, __b, __c) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -10111,7 +10111,7 @@
      __rv.__i; \
    })
 
-#define vextq_s16(__a, __b, __c) \
+#define vextq_s16(__a, __b, __c) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -10120,7 +10120,7 @@
      __rv.__i; \
    })
 
-#define vextq_s32(__a, __b, __c) \
+#define vextq_s32(__a, __b, __c) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -10129,7 +10129,7 @@
      __rv.__i; \
    })
 
-#define vextq_s64(__a, __b, __c) \
+#define vextq_s64(__a, __b, __c) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -10138,7 +10138,7 @@
      __rv.__i; \
    })
 
-#define vextq_f32(__a, __b, __c) \
+#define vextq_f32(__a, __b, __c) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -10147,7 +10147,7 @@
      __rv.__i; \
    })
 
-#define vextq_u8(__a, __b, __c) \
+#define vextq_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -10156,7 +10156,7 @@
      __rv.__i; \
    })
 
-#define vextq_u16(__a, __b, __c) \
+#define vextq_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -10165,7 +10165,7 @@
      __rv.__i; \
    })
 
-#define vextq_u32(__a, __b, __c) \
+#define vextq_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -10174,7 +10174,7 @@
      __rv.__i; \
    })
 
-#define vextq_u64(__a, __b, __c) \
+#define vextq_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -10183,7 +10183,7 @@
      __rv.__i; \
    })
 
-#define vextq_p8(__a, __b, __c) \
+#define vextq_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -10192,7 +10192,7 @@
      __rv.__i; \
    })
 
-#define vextq_p16(__a, __b, __c) \
+#define vextq_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -10201,7 +10201,7 @@
      __rv.__i; \
    })
 
-#define vrev64_s8(__a) \
+#define vrev64_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10209,7 +10209,7 @@
      __rv.__i; \
    })
 
-#define vrev64_s16(__a) \
+#define vrev64_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -10217,7 +10217,7 @@
      __rv.__i; \
    })
 
-#define vrev64_s32(__a) \
+#define vrev64_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -10225,7 +10225,7 @@
      __rv.__i; \
    })
 
-#define vrev64_f32(__a) \
+#define vrev64_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -10233,7 +10233,7 @@
      __rv.__i; \
    })
 
-#define vrev64_u8(__a) \
+#define vrev64_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10241,7 +10241,7 @@
      __rv.__i; \
    })
 
-#define vrev64_u16(__a) \
+#define vrev64_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -10249,7 +10249,7 @@
      __rv.__i; \
    })
 
-#define vrev64_u32(__a) \
+#define vrev64_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -10257,7 +10257,7 @@
      __rv.__i; \
    })
 
-#define vrev64_p8(__a) \
+#define vrev64_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10265,7 +10265,7 @@
      __rv.__i; \
    })
 
-#define vrev64_p16(__a) \
+#define vrev64_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -10273,7 +10273,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_s8(__a) \
+#define vrev64q_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10281,7 +10281,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_s16(__a) \
+#define vrev64q_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -10289,7 +10289,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_s32(__a) \
+#define vrev64q_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -10297,7 +10297,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_f32(__a) \
+#define vrev64q_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -10305,7 +10305,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_u8(__a) \
+#define vrev64q_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10313,7 +10313,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_u16(__a) \
+#define vrev64q_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -10321,7 +10321,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_u32(__a) \
+#define vrev64q_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -10329,7 +10329,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_p8(__a) \
+#define vrev64q_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10337,7 +10337,7 @@
      __rv.__i; \
    })
 
-#define vrev64q_p16(__a) \
+#define vrev64q_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -10345,7 +10345,7 @@
      __rv.__i; \
    })
 
-#define vrev32_s8(__a) \
+#define vrev32_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10353,7 +10353,7 @@
      __rv.__i; \
    })
 
-#define vrev32_s16(__a) \
+#define vrev32_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -10361,7 +10361,7 @@
      __rv.__i; \
    })
 
-#define vrev32_u8(__a) \
+#define vrev32_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10369,7 +10369,7 @@
      __rv.__i; \
    })
 
-#define vrev32_u16(__a) \
+#define vrev32_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -10377,7 +10377,7 @@
      __rv.__i; \
    })
 
-#define vrev32_p8(__a) \
+#define vrev32_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10385,7 +10385,7 @@
      __rv.__i; \
    })
 
-#define vrev32_p16(__a) \
+#define vrev32_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -10393,7 +10393,7 @@
      __rv.__i; \
    })
 
-#define vrev32q_s8(__a) \
+#define vrev32q_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10401,7 +10401,7 @@
      __rv.__i; \
    })
 
-#define vrev32q_s16(__a) \
+#define vrev32q_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -10409,7 +10409,7 @@
      __rv.__i; \
    })
 
-#define vrev32q_u8(__a) \
+#define vrev32q_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10417,7 +10417,7 @@
      __rv.__i; \
    })
 
-#define vrev32q_u16(__a) \
+#define vrev32q_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -10425,7 +10425,7 @@
      __rv.__i; \
    })
 
-#define vrev32q_p8(__a) \
+#define vrev32q_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10433,7 +10433,7 @@
      __rv.__i; \
    })
 
-#define vrev32q_p16(__a) \
+#define vrev32q_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -10441,7 +10441,7 @@
      __rv.__i; \
    })
 
-#define vrev16_s8(__a) \
+#define vrev16_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10449,7 +10449,7 @@
      __rv.__i; \
    })
 
-#define vrev16_u8(__a) \
+#define vrev16_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10457,7 +10457,7 @@
      __rv.__i; \
    })
 
-#define vrev16_p8(__a) \
+#define vrev16_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -10465,7 +10465,7 @@
      __rv.__i; \
    })
 
-#define vrev16q_s8(__a) \
+#define vrev16q_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10473,7 +10473,7 @@
      __rv.__i; \
    })
 
-#define vrev16q_u8(__a) \
+#define vrev16q_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10481,7 +10481,7 @@
      __rv.__i; \
    })
 
-#define vrev16q_p8(__a) \
+#define vrev16q_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -10489,7 +10489,7 @@
      __rv.__i; \
    })
 
-#define vbsl_s8(__a, __b, __c) \
+#define vbsl_s8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -10499,7 +10499,7 @@
      __rv.__i; \
    })
 
-#define vbsl_s16(__a, __b, __c) \
+#define vbsl_s16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -10509,7 +10509,7 @@
      __rv.__i; \
    })
 
-#define vbsl_s32(__a, __b, __c) \
+#define vbsl_s32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -10519,7 +10519,7 @@
      __rv.__i; \
    })
 
-#define vbsl_s64(__a, __b, __c) \
+#define vbsl_s64(__a, __b, __c) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -10529,7 +10529,7 @@
      __rv.__i; \
    })
 
-#define vbsl_f32(__a, __b, __c) \
+#define vbsl_f32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -10539,7 +10539,7 @@
      __rv.__i; \
    })
 
-#define vbsl_u8(__a, __b, __c) \
+#define vbsl_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -10549,7 +10549,7 @@
      __rv.__i; \
    })
 
-#define vbsl_u16(__a, __b, __c) \
+#define vbsl_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -10559,7 +10559,7 @@
      __rv.__i; \
    })
 
-#define vbsl_u32(__a, __b, __c) \
+#define vbsl_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -10569,7 +10569,7 @@
      __rv.__i; \
    })
 
-#define vbsl_u64(__a, __b, __c) \
+#define vbsl_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -10579,7 +10579,7 @@
      __rv.__i; \
    })
 
-#define vbsl_p8(__a, __b, __c) \
+#define vbsl_p8(__a, __b, __c) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -10589,7 +10589,7 @@
      __rv.__i; \
    })
 
-#define vbsl_p16(__a, __b, __c) \
+#define vbsl_p16(__a, __b, __c) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -10599,7 +10599,7 @@
      __rv.__i; \
    })
 
-#define vbslq_s8(__a, __b, __c) \
+#define vbslq_s8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -10609,7 +10609,7 @@
      __rv.__i; \
    })
 
-#define vbslq_s16(__a, __b, __c) \
+#define vbslq_s16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -10619,7 +10619,7 @@
      __rv.__i; \
    })
 
-#define vbslq_s32(__a, __b, __c) \
+#define vbslq_s32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -10629,7 +10629,7 @@
      __rv.__i; \
    })
 
-#define vbslq_s64(__a, __b, __c) \
+#define vbslq_s64(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -10639,7 +10639,7 @@
      __rv.__i; \
    })
 
-#define vbslq_f32(__a, __b, __c) \
+#define vbslq_f32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -10649,7 +10649,7 @@
      __rv.__i; \
    })
 
-#define vbslq_u8(__a, __b, __c) \
+#define vbslq_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -10659,7 +10659,7 @@
      __rv.__i; \
    })
 
-#define vbslq_u16(__a, __b, __c) \
+#define vbslq_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -10669,7 +10669,7 @@
      __rv.__i; \
    })
 
-#define vbslq_u32(__a, __b, __c) \
+#define vbslq_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -10679,7 +10679,7 @@
      __rv.__i; \
    })
 
-#define vbslq_u64(__a, __b, __c) \
+#define vbslq_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -10689,7 +10689,7 @@
      __rv.__i; \
    })
 
-#define vbslq_p8(__a, __b, __c) \
+#define vbslq_p8(__a, __b, __c) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -10699,7 +10699,7 @@
      __rv.__i; \
    })
 
-#define vbslq_p16(__a, __b, __c) \
+#define vbslq_p16(__a, __b, __c) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -10709,7 +10709,7 @@
      __rv.__i; \
    })
 
-#define vtrn_s8(__a, __b) \
+#define vtrn_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -10718,7 +10718,7 @@
      __rv.__i; \
    })
 
-#define vtrn_s16(__a, __b) \
+#define vtrn_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -10727,7 +10727,7 @@
      __rv.__i; \
    })
 
-#define vtrn_s32(__a, __b) \
+#define vtrn_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -10736,7 +10736,7 @@
      __rv.__i; \
    })
 
-#define vtrn_f32(__a, __b) \
+#define vtrn_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -10745,7 +10745,7 @@
      __rv.__i; \
    })
 
-#define vtrn_u8(__a, __b) \
+#define vtrn_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -10754,7 +10754,7 @@
      __rv.__i; \
    })
 
-#define vtrn_u16(__a, __b) \
+#define vtrn_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -10763,7 +10763,7 @@
      __rv.__i; \
    })
 
-#define vtrn_u32(__a, __b) \
+#define vtrn_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -10772,7 +10772,7 @@
      __rv.__i; \
    })
 
-#define vtrn_p8(__a, __b) \
+#define vtrn_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -10781,7 +10781,7 @@
      __rv.__i; \
    })
 
-#define vtrn_p16(__a, __b) \
+#define vtrn_p16(__a, __b) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -10790,7 +10790,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_s8(__a, __b) \
+#define vtrnq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -10799,7 +10799,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_s16(__a, __b) \
+#define vtrnq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -10808,7 +10808,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_s32(__a, __b) \
+#define vtrnq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -10817,7 +10817,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_f32(__a, __b) \
+#define vtrnq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -10826,7 +10826,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_u8(__a, __b) \
+#define vtrnq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -10835,7 +10835,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_u16(__a, __b) \
+#define vtrnq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -10844,7 +10844,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_u32(__a, __b) \
+#define vtrnq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -10853,7 +10853,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_p8(__a, __b) \
+#define vtrnq_p8(__a, __b) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -10862,7 +10862,7 @@
      __rv.__i; \
    })
 
-#define vtrnq_p16(__a, __b) \
+#define vtrnq_p16(__a, __b) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -10871,7 +10871,7 @@
      __rv.__i; \
    })
 
-#define vzip_s8(__a, __b) \
+#define vzip_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -10880,7 +10880,7 @@
      __rv.__i; \
    })
 
-#define vzip_s16(__a, __b) \
+#define vzip_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -10889,7 +10889,7 @@
      __rv.__i; \
    })
 
-#define vzip_s32(__a, __b) \
+#define vzip_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -10898,7 +10898,7 @@
      __rv.__i; \
    })
 
-#define vzip_f32(__a, __b) \
+#define vzip_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -10907,7 +10907,7 @@
      __rv.__i; \
    })
 
-#define vzip_u8(__a, __b) \
+#define vzip_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -10916,7 +10916,7 @@
      __rv.__i; \
    })
 
-#define vzip_u16(__a, __b) \
+#define vzip_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -10925,7 +10925,7 @@
      __rv.__i; \
    })
 
-#define vzip_u32(__a, __b) \
+#define vzip_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -10934,7 +10934,7 @@
      __rv.__i; \
    })
 
-#define vzip_p8(__a, __b) \
+#define vzip_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -10943,7 +10943,7 @@
      __rv.__i; \
    })
 
-#define vzip_p16(__a, __b) \
+#define vzip_p16(__a, __b) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -10952,7 +10952,7 @@
      __rv.__i; \
    })
 
-#define vzipq_s8(__a, __b) \
+#define vzipq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -10961,7 +10961,7 @@
      __rv.__i; \
    })
 
-#define vzipq_s16(__a, __b) \
+#define vzipq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -10970,7 +10970,7 @@
      __rv.__i; \
    })
 
-#define vzipq_s32(__a, __b) \
+#define vzipq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -10979,7 +10979,7 @@
      __rv.__i; \
    })
 
-#define vzipq_f32(__a, __b) \
+#define vzipq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -10988,7 +10988,7 @@
      __rv.__i; \
    })
 
-#define vzipq_u8(__a, __b) \
+#define vzipq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -10997,7 +10997,7 @@
      __rv.__i; \
    })
 
-#define vzipq_u16(__a, __b) \
+#define vzipq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -11006,7 +11006,7 @@
      __rv.__i; \
    })
 
-#define vzipq_u32(__a, __b) \
+#define vzipq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -11015,7 +11015,7 @@
      __rv.__i; \
    })
 
-#define vzipq_p8(__a, __b) \
+#define vzipq_p8(__a, __b) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -11024,7 +11024,7 @@
      __rv.__i; \
    })
 
-#define vzipq_p16(__a, __b) \
+#define vzipq_p16(__a, __b) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -11033,7 +11033,7 @@
      __rv.__i; \
    })
 
-#define vuzp_s8(__a, __b) \
+#define vuzp_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -11042,7 +11042,7 @@
      __rv.__i; \
    })
 
-#define vuzp_s16(__a, __b) \
+#define vuzp_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -11051,7 +11051,7 @@
      __rv.__i; \
    })
 
-#define vuzp_s32(__a, __b) \
+#define vuzp_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -11060,7 +11060,7 @@
      __rv.__i; \
    })
 
-#define vuzp_f32(__a, __b) \
+#define vuzp_f32(__a, __b) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      float32x2_t __bx = __b; \
@@ -11069,7 +11069,7 @@
      __rv.__i; \
    })
 
-#define vuzp_u8(__a, __b) \
+#define vuzp_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -11078,7 +11078,7 @@
      __rv.__i; \
    })
 
-#define vuzp_u16(__a, __b) \
+#define vuzp_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -11087,7 +11087,7 @@
      __rv.__i; \
    })
 
-#define vuzp_u32(__a, __b) \
+#define vuzp_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -11096,7 +11096,7 @@
      __rv.__i; \
    })
 
-#define vuzp_p8(__a, __b) \
+#define vuzp_p8(__a, __b) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -11105,7 +11105,7 @@
      __rv.__i; \
    })
 
-#define vuzp_p16(__a, __b) \
+#define vuzp_p16(__a, __b) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -11114,7 +11114,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_s8(__a, __b) \
+#define vuzpq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -11123,7 +11123,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_s16(__a, __b) \
+#define vuzpq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -11132,7 +11132,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_s32(__a, __b) \
+#define vuzpq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -11141,7 +11141,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_f32(__a, __b) \
+#define vuzpq_f32(__a, __b) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      float32x4_t __bx = __b; \
@@ -11150,7 +11150,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_u8(__a, __b) \
+#define vuzpq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -11159,7 +11159,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_u16(__a, __b) \
+#define vuzpq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -11168,7 +11168,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_u32(__a, __b) \
+#define vuzpq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -11177,7 +11177,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_p8(__a, __b) \
+#define vuzpq_p8(__a, __b) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -11186,7 +11186,7 @@
      __rv.__i; \
    })
 
-#define vuzpq_p16(__a, __b) \
+#define vuzpq_p16(__a, __b) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -11195,7 +11195,7 @@
      __rv.__i; \
    })
 
-#define vld1_s8(__a) \
+#define vld1_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -11203,7 +11203,7 @@
      __rv.__i; \
    })
 
-#define vld1_s16(__a) \
+#define vld1_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -11211,7 +11211,7 @@
      __rv.__i; \
    })
 
-#define vld1_s32(__a) \
+#define vld1_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -11219,7 +11219,7 @@
      __rv.__i; \
    })
 
-#define vld1_s64(__a) \
+#define vld1_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -11227,7 +11227,7 @@
      __rv.__i; \
    })
 
-#define vld1_f32(__a) \
+#define vld1_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -11235,7 +11235,7 @@
      __rv.__i; \
    })
 
-#define vld1_u8(__a) \
+#define vld1_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -11243,7 +11243,7 @@
      __rv.__i; \
    })
 
-#define vld1_u16(__a) \
+#define vld1_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -11251,7 +11251,7 @@
      __rv.__i; \
    })
 
-#define vld1_u32(__a) \
+#define vld1_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -11259,7 +11259,7 @@
      __rv.__i; \
    })
 
-#define vld1_u64(__a) \
+#define vld1_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -11267,7 +11267,7 @@
      __rv.__i; \
    })
 
-#define vld1_p8(__a) \
+#define vld1_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -11275,7 +11275,7 @@
      __rv.__i; \
    })
 
-#define vld1_p16(__a) \
+#define vld1_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -11283,7 +11283,7 @@
      __rv.__i; \
    })
 
-#define vld1q_s8(__a) \
+#define vld1q_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -11291,7 +11291,7 @@
      __rv.__i; \
    })
 
-#define vld1q_s16(__a) \
+#define vld1q_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -11299,7 +11299,7 @@
      __rv.__i; \
    })
 
-#define vld1q_s32(__a) \
+#define vld1q_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -11307,7 +11307,7 @@
      __rv.__i; \
    })
 
-#define vld1q_s64(__a) \
+#define vld1q_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -11315,7 +11315,7 @@
      __rv.__i; \
    })
 
-#define vld1q_f32(__a) \
+#define vld1q_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -11323,7 +11323,7 @@
      __rv.__i; \
    })
 
-#define vld1q_u8(__a) \
+#define vld1q_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -11331,7 +11331,7 @@
      __rv.__i; \
    })
 
-#define vld1q_u16(__a) \
+#define vld1q_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -11339,7 +11339,7 @@
      __rv.__i; \
    })
 
-#define vld1q_u32(__a) \
+#define vld1q_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -11347,7 +11347,7 @@
      __rv.__i; \
    })
 
-#define vld1q_u64(__a) \
+#define vld1q_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -11355,7 +11355,7 @@
      __rv.__i; \
    })
 
-#define vld1q_p8(__a) \
+#define vld1q_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -11363,7 +11363,7 @@
      __rv.__i; \
    })
 
-#define vld1q_p16(__a) \
+#define vld1q_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -11371,7 +11371,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_s8(__a, __b, __c) \
+#define vld1_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      int8x8_t __bx = __b; \
@@ -11380,7 +11380,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_s16(__a, __b, __c) \
+#define vld1_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      int16x4_t __bx = __b; \
@@ -11389,7 +11389,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_s32(__a, __b, __c) \
+#define vld1_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      int32x2_t __bx = __b; \
@@ -11398,7 +11398,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_f32(__a, __b, __c) \
+#define vld1_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      float32x2_t __bx = __b; \
@@ -11407,7 +11407,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_u8(__a, __b, __c) \
+#define vld1_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -11416,7 +11416,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_u16(__a, __b, __c) \
+#define vld1_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -11425,7 +11425,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_u32(__a, __b, __c) \
+#define vld1_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -11434,7 +11434,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_p8(__a, __b, __c) \
+#define vld1_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      poly8x8_t __bx = __b; \
@@ -11443,7 +11443,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_p16(__a, __b, __c) \
+#define vld1_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      poly16x4_t __bx = __b; \
@@ -11452,7 +11452,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_s64(__a, __b, __c) \
+#define vld1_lane_s64(__a, __b, __c) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      int64x1_t __bx = __b; \
@@ -11461,7 +11461,7 @@
      __rv.__i; \
    })
 
-#define vld1_lane_u64(__a, __b, __c) \
+#define vld1_lane_u64(__a, __b, __c) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -11470,7 +11470,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_s8(__a, __b, __c) \
+#define vld1q_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      int8x16_t __bx = __b; \
@@ -11479,7 +11479,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_s16(__a, __b, __c) \
+#define vld1q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      int16x8_t __bx = __b; \
@@ -11488,7 +11488,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_s32(__a, __b, __c) \
+#define vld1q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      int32x4_t __bx = __b; \
@@ -11497,7 +11497,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_f32(__a, __b, __c) \
+#define vld1q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      float32x4_t __bx = __b; \
@@ -11506,7 +11506,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_u8(__a, __b, __c) \
+#define vld1q_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -11515,7 +11515,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_u16(__a, __b, __c) \
+#define vld1q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -11524,7 +11524,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_u32(__a, __b, __c) \
+#define vld1q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -11533,7 +11533,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_p8(__a, __b, __c) \
+#define vld1q_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      poly8x16_t __bx = __b; \
@@ -11542,7 +11542,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_p16(__a, __b, __c) \
+#define vld1q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      poly16x8_t __bx = __b; \
@@ -11551,7 +11551,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_s64(__a, __b, __c) \
+#define vld1q_lane_s64(__a, __b, __c) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      int64x2_t __bx = __b; \
@@ -11560,7 +11560,7 @@
      __rv.__i; \
    })
 
-#define vld1q_lane_u64(__a, __b, __c) \
+#define vld1q_lane_u64(__a, __b, __c) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -11569,7 +11569,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_s8(__a) \
+#define vld1_dup_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -11577,7 +11577,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_s16(__a) \
+#define vld1_dup_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -11585,7 +11585,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_s32(__a) \
+#define vld1_dup_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -11593,7 +11593,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_f32(__a) \
+#define vld1_dup_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -11601,7 +11601,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_u8(__a) \
+#define vld1_dup_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -11609,7 +11609,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_u16(__a) \
+#define vld1_dup_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -11617,7 +11617,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_u32(__a) \
+#define vld1_dup_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -11625,7 +11625,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_p8(__a) \
+#define vld1_dup_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -11633,7 +11633,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_p16(__a) \
+#define vld1_dup_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -11641,7 +11641,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_s64(__a) \
+#define vld1_dup_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -11649,7 +11649,7 @@
      __rv.__i; \
    })
 
-#define vld1_dup_u64(__a) \
+#define vld1_dup_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -11657,7 +11657,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_s8(__a) \
+#define vld1q_dup_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -11665,7 +11665,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_s16(__a) \
+#define vld1q_dup_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -11673,7 +11673,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_s32(__a) \
+#define vld1q_dup_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -11681,7 +11681,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_f32(__a) \
+#define vld1q_dup_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -11689,7 +11689,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_u8(__a) \
+#define vld1q_dup_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -11697,7 +11697,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_u16(__a) \
+#define vld1q_dup_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -11705,7 +11705,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_u32(__a) \
+#define vld1q_dup_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -11713,7 +11713,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_p8(__a) \
+#define vld1q_dup_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -11721,7 +11721,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_p16(__a) \
+#define vld1q_dup_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -11729,7 +11729,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_s64(__a) \
+#define vld1q_dup_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -11737,7 +11737,7 @@
      __rv.__i; \
    })
 
-#define vld1q_dup_u64(__a) \
+#define vld1q_dup_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -11745,315 +11745,315 @@
      __rv.__i; \
    })
 
-#define vst1_s8(__a, __b) \
+#define vst1_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      int8x8_t __bx = __b; \
      __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val); \
    })
 
-#define vst1_s16(__a, __b) \
+#define vst1_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      int16x4_t __bx = __b; \
      __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val); \
    })
 
-#define vst1_s32(__a, __b) \
+#define vst1_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      int32x2_t __bx = __b; \
      __builtin_neon_vst1v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val); \
    })
 
-#define vst1_s64(__a, __b) \
+#define vst1_s64(__a, __b) __extension__ \
   ({ \
      int64_t * __ax = __a; \
      int64x1_t __bx = __b; \
      __builtin_neon_vst1v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val); \
    })
 
-#define vst1_f32(__a, __b) \
+#define vst1_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      float32x2_t __bx = __b; \
      __builtin_neon_vst1v2sf (__ax, __bx.val); \
    })
 
-#define vst1_u8(__a, __b) \
+#define vst1_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      uint8x8_t __bx = __b; \
      __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val); \
    })
 
-#define vst1_u16(__a, __b) \
+#define vst1_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      uint16x4_t __bx = __b; \
      __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val); \
    })
 
-#define vst1_u32(__a, __b) \
+#define vst1_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      uint32x2_t __bx = __b; \
      __builtin_neon_vst1v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x2_t) __bx.val); \
    })
 
-#define vst1_u64(__a, __b) \
+#define vst1_u64(__a, __b) __extension__ \
   ({ \
      uint64_t * __ax = __a; \
      uint64x1_t __bx = __b; \
      __builtin_neon_vst1v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x1_t) __bx.val); \
    })
 
-#define vst1_p8(__a, __b) \
+#define vst1_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      poly8x8_t __bx = __b; \
      __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val); \
    })
 
-#define vst1_p16(__a, __b) \
+#define vst1_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      poly16x4_t __bx = __b; \
      __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val); \
    })
 
-#define vst1q_s8(__a, __b) \
+#define vst1q_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      int8x16_t __bx = __b; \
      __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val); \
    })
 
-#define vst1q_s16(__a, __b) \
+#define vst1q_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      int16x8_t __bx = __b; \
      __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val); \
    })
 
-#define vst1q_s32(__a, __b) \
+#define vst1q_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      int32x4_t __bx = __b; \
      __builtin_neon_vst1v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val); \
    })
 
-#define vst1q_s64(__a, __b) \
+#define vst1q_s64(__a, __b) __extension__ \
   ({ \
      int64_t * __ax = __a; \
      int64x2_t __bx = __b; \
      __builtin_neon_vst1v2di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val); \
    })
 
-#define vst1q_f32(__a, __b) \
+#define vst1q_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      float32x4_t __bx = __b; \
      __builtin_neon_vst1v4sf (__ax, __bx.val); \
    })
 
-#define vst1q_u8(__a, __b) \
+#define vst1q_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      uint8x16_t __bx = __b; \
      __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val); \
    })
 
-#define vst1q_u16(__a, __b) \
+#define vst1q_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      uint16x8_t __bx = __b; \
      __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val); \
    })
 
-#define vst1q_u32(__a, __b) \
+#define vst1q_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      uint32x4_t __bx = __b; \
      __builtin_neon_vst1v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x4_t) __bx.val); \
    })
 
-#define vst1q_u64(__a, __b) \
+#define vst1q_u64(__a, __b) __extension__ \
   ({ \
      uint64_t * __ax = __a; \
      uint64x2_t __bx = __b; \
      __builtin_neon_vst1v2di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x2_t) __bx.val); \
    })
 
-#define vst1q_p8(__a, __b) \
+#define vst1q_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      poly8x16_t __bx = __b; \
      __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val); \
    })
 
-#define vst1q_p16(__a, __b) \
+#define vst1q_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      poly16x8_t __bx = __b; \
      __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val); \
    })
 
-#define vst1_lane_s8(__a, __b, __c) \
+#define vst1_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      int8x8_t __bx = __b; \
      __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val, __c); \
    })
 
-#define vst1_lane_s16(__a, __b, __c) \
+#define vst1_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      int16x4_t __bx = __b; \
      __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val, __c); \
    })
 
-#define vst1_lane_s32(__a, __b, __c) \
+#define vst1_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      int32x2_t __bx = __b; \
      __builtin_neon_vst1_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val, __c); \
    })
 
-#define vst1_lane_f32(__a, __b, __c) \
+#define vst1_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      float32x2_t __bx = __b; \
      __builtin_neon_vst1_lanev2sf (__ax, __bx.val, __c); \
    })
 
-#define vst1_lane_u8(__a, __b, __c) \
+#define vst1_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      uint8x8_t __bx = __b; \
      __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val, __c); \
    })
 
-#define vst1_lane_u16(__a, __b, __c) \
+#define vst1_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      uint16x4_t __bx = __b; \
      __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val, __c); \
    })
 
-#define vst1_lane_u32(__a, __b, __c) \
+#define vst1_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      uint32x2_t __bx = __b; \
      __builtin_neon_vst1_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x2_t) __bx.val, __c); \
    })
 
-#define vst1_lane_p8(__a, __b, __c) \
+#define vst1_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      poly8x8_t __bx = __b; \
      __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val, __c); \
    })
 
-#define vst1_lane_p16(__a, __b, __c) \
+#define vst1_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      poly16x4_t __bx = __b; \
      __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val, __c); \
    })
 
-#define vst1_lane_s64(__a, __b, __c) \
+#define vst1_lane_s64(__a, __b, __c) __extension__ \
   ({ \
      int64_t * __ax = __a; \
      int64x1_t __bx = __b; \
      __builtin_neon_vst1_lanev1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val, __c); \
    })
 
-#define vst1_lane_u64(__a, __b, __c) \
+#define vst1_lane_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64_t * __ax = __a; \
      uint64x1_t __bx = __b; \
      __builtin_neon_vst1_lanev1di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x1_t) __bx.val, __c); \
    })
 
-#define vst1q_lane_s8(__a, __b, __c) \
+#define vst1q_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      int8x16_t __bx = __b; \
      __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val, __c); \
    })
 
-#define vst1q_lane_s16(__a, __b, __c) \
+#define vst1q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      int16x8_t __bx = __b; \
      __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val, __c); \
    })
 
-#define vst1q_lane_s32(__a, __b, __c) \
+#define vst1q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      int32x4_t __bx = __b; \
      __builtin_neon_vst1_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val, __c); \
    })
 
-#define vst1q_lane_f32(__a, __b, __c) \
+#define vst1q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      float32x4_t __bx = __b; \
      __builtin_neon_vst1_lanev4sf (__ax, __bx.val, __c); \
    })
 
-#define vst1q_lane_u8(__a, __b, __c) \
+#define vst1q_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      uint8x16_t __bx = __b; \
      __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val, __c); \
    })
 
-#define vst1q_lane_u16(__a, __b, __c) \
+#define vst1q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      uint16x8_t __bx = __b; \
      __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val, __c); \
    })
 
-#define vst1q_lane_u32(__a, __b, __c) \
+#define vst1q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      uint32x4_t __bx = __b; \
      __builtin_neon_vst1_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x4_t) __bx.val, __c); \
    })
 
-#define vst1q_lane_p8(__a, __b, __c) \
+#define vst1q_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      poly8x16_t __bx = __b; \
      __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val, __c); \
    })
 
-#define vst1q_lane_p16(__a, __b, __c) \
+#define vst1q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      poly16x8_t __bx = __b; \
      __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val, __c); \
    })
 
-#define vst1q_lane_s64(__a, __b, __c) \
+#define vst1q_lane_s64(__a, __b, __c) __extension__ \
   ({ \
      int64_t * __ax = __a; \
      int64x2_t __bx = __b; \
      __builtin_neon_vst1_lanev2di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val, __c); \
    })
 
-#define vst1q_lane_u64(__a, __b, __c) \
+#define vst1q_lane_u64(__a, __b, __c) __extension__ \
   ({ \
      uint64_t * __ax = __a; \
      uint64x2_t __bx = __b; \
      __builtin_neon_vst1_lanev2di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x2_t) __bx.val, __c); \
    })
 
-#define vld2_s8(__a) \
+#define vld2_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \
@@ -12061,7 +12061,7 @@
      __rv.__i; \
    })
 
-#define vld2_s16(__a) \
+#define vld2_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \
@@ -12069,7 +12069,7 @@
      __rv.__i; \
    })
 
-#define vld2_s32(__a) \
+#define vld2_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \
@@ -12077,7 +12077,7 @@
      __rv.__i; \
    })
 
-#define vld2_f32(__a) \
+#define vld2_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \
@@ -12085,7 +12085,7 @@
      __rv.__i; \
    })
 
-#define vld2_u8(__a) \
+#define vld2_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \
@@ -12093,7 +12093,7 @@
      __rv.__i; \
    })
 
-#define vld2_u16(__a) \
+#define vld2_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \
@@ -12101,7 +12101,7 @@
      __rv.__i; \
    })
 
-#define vld2_u32(__a) \
+#define vld2_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \
@@ -12109,7 +12109,7 @@
      __rv.__i; \
    })
 
-#define vld2_p8(__a) \
+#define vld2_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \
@@ -12117,7 +12117,7 @@
      __rv.__i; \
    })
 
-#define vld2_p16(__a) \
+#define vld2_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \
@@ -12125,7 +12125,7 @@
      __rv.__i; \
    })
 
-#define vld2_s64(__a) \
+#define vld2_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \
@@ -12133,7 +12133,7 @@
      __rv.__i; \
    })
 
-#define vld2_u64(__a) \
+#define vld2_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \
@@ -12141,7 +12141,7 @@
      __rv.__i; \
    })
 
-#define vld2q_s8(__a) \
+#define vld2q_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \
@@ -12149,7 +12149,7 @@
      __rv.__i; \
    })
 
-#define vld2q_s16(__a) \
+#define vld2q_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \
@@ -12157,7 +12157,7 @@
      __rv.__i; \
    })
 
-#define vld2q_s32(__a) \
+#define vld2q_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \
@@ -12165,7 +12165,7 @@
      __rv.__i; \
    })
 
-#define vld2q_f32(__a) \
+#define vld2q_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \
@@ -12173,7 +12173,7 @@
      __rv.__i; \
    })
 
-#define vld2q_u8(__a) \
+#define vld2q_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \
@@ -12181,7 +12181,7 @@
      __rv.__i; \
    })
 
-#define vld2q_u16(__a) \
+#define vld2q_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \
@@ -12189,7 +12189,7 @@
      __rv.__i; \
    })
 
-#define vld2q_u32(__a) \
+#define vld2q_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \
@@ -12197,7 +12197,7 @@
      __rv.__i; \
    })
 
-#define vld2q_p8(__a) \
+#define vld2q_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \
@@ -12205,7 +12205,7 @@
      __rv.__i; \
    })
 
-#define vld2q_p16(__a) \
+#define vld2q_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \
@@ -12213,7 +12213,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_s8(__a, __b, __c) \
+#define vld2_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
@@ -12222,7 +12222,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_s16(__a, __b, __c) \
+#define vld2_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
@@ -12231,7 +12231,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_s32(__a, __b, __c) \
+#define vld2_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \
@@ -12240,7 +12240,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_f32(__a, __b, __c) \
+#define vld2_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \
@@ -12249,7 +12249,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_u8(__a, __b, __c) \
+#define vld2_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
@@ -12258,7 +12258,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_u16(__a, __b, __c) \
+#define vld2_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
@@ -12267,7 +12267,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_u32(__a, __b, __c) \
+#define vld2_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \
@@ -12276,7 +12276,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_p8(__a, __b, __c) \
+#define vld2_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
@@ -12285,7 +12285,7 @@
      __rv.__i; \
    })
 
-#define vld2_lane_p16(__a, __b, __c) \
+#define vld2_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
@@ -12294,7 +12294,7 @@
      __rv.__i; \
    })
 
-#define vld2q_lane_s16(__a, __b, __c) \
+#define vld2q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
@@ -12303,7 +12303,7 @@
      __rv.__i; \
    })
 
-#define vld2q_lane_s32(__a, __b, __c) \
+#define vld2q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \
@@ -12312,7 +12312,7 @@
      __rv.__i; \
    })
 
-#define vld2q_lane_f32(__a, __b, __c) \
+#define vld2q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \
@@ -12321,7 +12321,7 @@
      __rv.__i; \
    })
 
-#define vld2q_lane_u16(__a, __b, __c) \
+#define vld2q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
@@ -12330,7 +12330,7 @@
      __rv.__i; \
    })
 
-#define vld2q_lane_u32(__a, __b, __c) \
+#define vld2q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \
@@ -12339,7 +12339,7 @@
      __rv.__i; \
    })
 
-#define vld2q_lane_p16(__a, __b, __c) \
+#define vld2q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
@@ -12348,7 +12348,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_s8(__a) \
+#define vld2_dup_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \
@@ -12356,7 +12356,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_s16(__a) \
+#define vld2_dup_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \
@@ -12364,7 +12364,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_s32(__a) \
+#define vld2_dup_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \
@@ -12372,7 +12372,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_f32(__a) \
+#define vld2_dup_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \
@@ -12380,7 +12380,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_u8(__a) \
+#define vld2_dup_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \
@@ -12388,7 +12388,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_u16(__a) \
+#define vld2_dup_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \
@@ -12396,7 +12396,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_u32(__a) \
+#define vld2_dup_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \
@@ -12404,7 +12404,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_p8(__a) \
+#define vld2_dup_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \
@@ -12412,7 +12412,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_p16(__a) \
+#define vld2_dup_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \
@@ -12420,7 +12420,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_s64(__a) \
+#define vld2_dup_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \
@@ -12428,7 +12428,7 @@
      __rv.__i; \
    })
 
-#define vld2_dup_u64(__a) \
+#define vld2_dup_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \
@@ -12436,252 +12436,252 @@
      __rv.__i; \
    })
 
-#define vst2_s8(__a, __b) \
+#define vst2_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst2_s16(__a, __b) \
+#define vst2_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst2_s32(__a, __b) \
+#define vst2_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst2_f32(__a, __b) \
+#define vst2_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v2sf (__ax, __bu.__o); \
    })
 
-#define vst2_u8(__a, __b) \
+#define vst2_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst2_u16(__a, __b) \
+#define vst2_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst2_u32(__a, __b) \
+#define vst2_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst2_p8(__a, __b) \
+#define vst2_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst2_p16(__a, __b) \
+#define vst2_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst2_s64(__a, __b) \
+#define vst2_s64(__a, __b) __extension__ \
   ({ \
      int64_t * __ax = __a; \
      union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \
    })
 
-#define vst2_u64(__a, __b) \
+#define vst2_u64(__a, __b) __extension__ \
   ({ \
      uint64_t * __ax = __a; \
      union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \
    })
 
-#define vst2q_s8(__a, __b) \
+#define vst2q_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst2q_s16(__a, __b) \
+#define vst2q_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst2q_s32(__a, __b) \
+#define vst2q_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst2q_f32(__a, __b) \
+#define vst2q_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v4sf (__ax, __bu.__o); \
    })
 
-#define vst2q_u8(__a, __b) \
+#define vst2q_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst2q_u16(__a, __b) \
+#define vst2q_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst2q_u32(__a, __b) \
+#define vst2q_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst2q_p8(__a, __b) \
+#define vst2q_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst2q_p16(__a, __b) \
+#define vst2q_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst2_lane_s8(__a, __b, __c) \
+#define vst2_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2_lane_s16(__a, __b, __c) \
+#define vst2_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2_lane_s32(__a, __b, __c) \
+#define vst2_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst2_lane_f32(__a, __b, __c) \
+#define vst2_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev2sf (__ax, __bu.__o, __c); \
    })
 
-#define vst2_lane_u8(__a, __b, __c) \
+#define vst2_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2_lane_u16(__a, __b, __c) \
+#define vst2_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2_lane_u32(__a, __b, __c) \
+#define vst2_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst2_lane_p8(__a, __b, __c) \
+#define vst2_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2_lane_p16(__a, __b, __c) \
+#define vst2_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2q_lane_s16(__a, __b, __c) \
+#define vst2q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2q_lane_s32(__a, __b, __c) \
+#define vst2q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst2q_lane_f32(__a, __b, __c) \
+#define vst2q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev4sf (__ax, __bu.__o, __c); \
    })
 
-#define vst2q_lane_u16(__a, __b, __c) \
+#define vst2q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst2q_lane_u32(__a, __b, __c) \
+#define vst2q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst2q_lane_p16(__a, __b, __c) \
+#define vst2q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \
      __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vld3_s8(__a) \
+#define vld3_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \
@@ -12689,7 +12689,7 @@
      __rv.__i; \
    })
 
-#define vld3_s16(__a) \
+#define vld3_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \
@@ -12697,7 +12697,7 @@
      __rv.__i; \
    })
 
-#define vld3_s32(__a) \
+#define vld3_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \
@@ -12705,7 +12705,7 @@
      __rv.__i; \
    })
 
-#define vld3_f32(__a) \
+#define vld3_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \
@@ -12713,7 +12713,7 @@
      __rv.__i; \
    })
 
-#define vld3_u8(__a) \
+#define vld3_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \
@@ -12721,7 +12721,7 @@
      __rv.__i; \
    })
 
-#define vld3_u16(__a) \
+#define vld3_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \
@@ -12729,7 +12729,7 @@
      __rv.__i; \
    })
 
-#define vld3_u32(__a) \
+#define vld3_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \
@@ -12737,7 +12737,7 @@
      __rv.__i; \
    })
 
-#define vld3_p8(__a) \
+#define vld3_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \
@@ -12745,7 +12745,7 @@
      __rv.__i; \
    })
 
-#define vld3_p16(__a) \
+#define vld3_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \
@@ -12753,7 +12753,7 @@
      __rv.__i; \
    })
 
-#define vld3_s64(__a) \
+#define vld3_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \
@@ -12761,7 +12761,7 @@
      __rv.__i; \
    })
 
-#define vld3_u64(__a) \
+#define vld3_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \
@@ -12769,7 +12769,7 @@
      __rv.__i; \
    })
 
-#define vld3q_s8(__a) \
+#define vld3q_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \
@@ -12777,7 +12777,7 @@
      __rv.__i; \
    })
 
-#define vld3q_s16(__a) \
+#define vld3q_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \
@@ -12785,7 +12785,7 @@
      __rv.__i; \
    })
 
-#define vld3q_s32(__a) \
+#define vld3q_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \
@@ -12793,7 +12793,7 @@
      __rv.__i; \
    })
 
-#define vld3q_f32(__a) \
+#define vld3q_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __rv; \
@@ -12801,7 +12801,7 @@
      __rv.__i; \
    })
 
-#define vld3q_u8(__a) \
+#define vld3q_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \
@@ -12809,7 +12809,7 @@
      __rv.__i; \
    })
 
-#define vld3q_u16(__a) \
+#define vld3q_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \
@@ -12817,7 +12817,7 @@
      __rv.__i; \
    })
 
-#define vld3q_u32(__a) \
+#define vld3q_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \
@@ -12825,7 +12825,7 @@
      __rv.__i; \
    })
 
-#define vld3q_p8(__a) \
+#define vld3q_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \
@@ -12833,7 +12833,7 @@
      __rv.__i; \
    })
 
-#define vld3q_p16(__a) \
+#define vld3q_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \
@@ -12841,7 +12841,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_s8(__a, __b, __c) \
+#define vld3_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
@@ -12850,7 +12850,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_s16(__a, __b, __c) \
+#define vld3_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
@@ -12859,7 +12859,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_s32(__a, __b, __c) \
+#define vld3_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \
@@ -12868,7 +12868,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_f32(__a, __b, __c) \
+#define vld3_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \
@@ -12877,7 +12877,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_u8(__a, __b, __c) \
+#define vld3_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
@@ -12886,7 +12886,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_u16(__a, __b, __c) \
+#define vld3_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
@@ -12895,7 +12895,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_u32(__a, __b, __c) \
+#define vld3_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \
@@ -12904,7 +12904,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_p8(__a, __b, __c) \
+#define vld3_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
@@ -12913,7 +12913,7 @@
      __rv.__i; \
    })
 
-#define vld3_lane_p16(__a, __b, __c) \
+#define vld3_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
@@ -12922,7 +12922,7 @@
      __rv.__i; \
    })
 
-#define vld3q_lane_s16(__a, __b, __c) \
+#define vld3q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
@@ -12931,7 +12931,7 @@
      __rv.__i; \
    })
 
-#define vld3q_lane_s32(__a, __b, __c) \
+#define vld3q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \
@@ -12940,7 +12940,7 @@
      __rv.__i; \
    })
 
-#define vld3q_lane_f32(__a, __b, __c) \
+#define vld3q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \
@@ -12949,7 +12949,7 @@
      __rv.__i; \
    })
 
-#define vld3q_lane_u16(__a, __b, __c) \
+#define vld3q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
@@ -12958,7 +12958,7 @@
      __rv.__i; \
    })
 
-#define vld3q_lane_u32(__a, __b, __c) \
+#define vld3q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \
@@ -12967,7 +12967,7 @@
      __rv.__i; \
    })
 
-#define vld3q_lane_p16(__a, __b, __c) \
+#define vld3q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
@@ -12976,7 +12976,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_s8(__a) \
+#define vld3_dup_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \
@@ -12984,7 +12984,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_s16(__a) \
+#define vld3_dup_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \
@@ -12992,7 +12992,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_s32(__a) \
+#define vld3_dup_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \
@@ -13000,7 +13000,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_f32(__a) \
+#define vld3_dup_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \
@@ -13008,7 +13008,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_u8(__a) \
+#define vld3_dup_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \
@@ -13016,7 +13016,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_u16(__a) \
+#define vld3_dup_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \
@@ -13024,7 +13024,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_u32(__a) \
+#define vld3_dup_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \
@@ -13032,7 +13032,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_p8(__a) \
+#define vld3_dup_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \
@@ -13040,7 +13040,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_p16(__a) \
+#define vld3_dup_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \
@@ -13048,7 +13048,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_s64(__a) \
+#define vld3_dup_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \
@@ -13056,7 +13056,7 @@
      __rv.__i; \
    })
 
-#define vld3_dup_u64(__a) \
+#define vld3_dup_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \
@@ -13064,252 +13064,252 @@
      __rv.__i; \
    })
 
-#define vst3_s8(__a, __b) \
+#define vst3_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst3_s16(__a, __b) \
+#define vst3_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst3_s32(__a, __b) \
+#define vst3_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst3_f32(__a, __b) \
+#define vst3_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v2sf (__ax, __bu.__o); \
    })
 
-#define vst3_u8(__a, __b) \
+#define vst3_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst3_u16(__a, __b) \
+#define vst3_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst3_u32(__a, __b) \
+#define vst3_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst3_p8(__a, __b) \
+#define vst3_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst3_p16(__a, __b) \
+#define vst3_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst3_s64(__a, __b) \
+#define vst3_s64(__a, __b) __extension__ \
   ({ \
      int64_t * __ax = __a; \
      union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \
    })
 
-#define vst3_u64(__a, __b) \
+#define vst3_u64(__a, __b) __extension__ \
   ({ \
      uint64_t * __ax = __a; \
      union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \
    })
 
-#define vst3q_s8(__a, __b) \
+#define vst3q_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst3q_s16(__a, __b) \
+#define vst3q_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst3q_s32(__a, __b) \
+#define vst3q_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst3q_f32(__a, __b) \
+#define vst3q_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v4sf (__ax, __bu.__o); \
    })
 
-#define vst3q_u8(__a, __b) \
+#define vst3q_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst3q_u16(__a, __b) \
+#define vst3q_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst3q_u32(__a, __b) \
+#define vst3q_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst3q_p8(__a, __b) \
+#define vst3q_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst3q_p16(__a, __b) \
+#define vst3q_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst3_lane_s8(__a, __b, __c) \
+#define vst3_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3_lane_s16(__a, __b, __c) \
+#define vst3_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3_lane_s32(__a, __b, __c) \
+#define vst3_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst3_lane_f32(__a, __b, __c) \
+#define vst3_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev2sf (__ax, __bu.__o, __c); \
    })
 
-#define vst3_lane_u8(__a, __b, __c) \
+#define vst3_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3_lane_u16(__a, __b, __c) \
+#define vst3_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3_lane_u32(__a, __b, __c) \
+#define vst3_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst3_lane_p8(__a, __b, __c) \
+#define vst3_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3_lane_p16(__a, __b, __c) \
+#define vst3_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3q_lane_s16(__a, __b, __c) \
+#define vst3q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3q_lane_s32(__a, __b, __c) \
+#define vst3q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst3q_lane_f32(__a, __b, __c) \
+#define vst3q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev4sf (__ax, __bu.__o, __c); \
    })
 
-#define vst3q_lane_u16(__a, __b, __c) \
+#define vst3q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst3q_lane_u32(__a, __b, __c) \
+#define vst3q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst3q_lane_p16(__a, __b, __c) \
+#define vst3q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \
      __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vld4_s8(__a) \
+#define vld4_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \
@@ -13317,7 +13317,7 @@
      __rv.__i; \
    })
 
-#define vld4_s16(__a) \
+#define vld4_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \
@@ -13325,7 +13325,7 @@
      __rv.__i; \
    })
 
-#define vld4_s32(__a) \
+#define vld4_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \
@@ -13333,7 +13333,7 @@
      __rv.__i; \
    })
 
-#define vld4_f32(__a) \
+#define vld4_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \
@@ -13341,7 +13341,7 @@
      __rv.__i; \
    })
 
-#define vld4_u8(__a) \
+#define vld4_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \
@@ -13349,7 +13349,7 @@
      __rv.__i; \
    })
 
-#define vld4_u16(__a) \
+#define vld4_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \
@@ -13357,7 +13357,7 @@
      __rv.__i; \
    })
 
-#define vld4_u32(__a) \
+#define vld4_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \
@@ -13365,7 +13365,7 @@
      __rv.__i; \
    })
 
-#define vld4_p8(__a) \
+#define vld4_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \
@@ -13373,7 +13373,7 @@
      __rv.__i; \
    })
 
-#define vld4_p16(__a) \
+#define vld4_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \
@@ -13381,7 +13381,7 @@
      __rv.__i; \
    })
 
-#define vld4_s64(__a) \
+#define vld4_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \
@@ -13389,7 +13389,7 @@
      __rv.__i; \
    })
 
-#define vld4_u64(__a) \
+#define vld4_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \
@@ -13397,7 +13397,7 @@
      __rv.__i; \
    })
 
-#define vld4q_s8(__a) \
+#define vld4q_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \
@@ -13405,7 +13405,7 @@
      __rv.__i; \
    })
 
-#define vld4q_s16(__a) \
+#define vld4q_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \
@@ -13413,7 +13413,7 @@
      __rv.__i; \
    })
 
-#define vld4q_s32(__a) \
+#define vld4q_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \
@@ -13421,7 +13421,7 @@
      __rv.__i; \
    })
 
-#define vld4q_f32(__a) \
+#define vld4q_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __rv; \
@@ -13429,7 +13429,7 @@
      __rv.__i; \
    })
 
-#define vld4q_u8(__a) \
+#define vld4q_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \
@@ -13437,7 +13437,7 @@
      __rv.__i; \
    })
 
-#define vld4q_u16(__a) \
+#define vld4q_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \
@@ -13445,7 +13445,7 @@
      __rv.__i; \
    })
 
-#define vld4q_u32(__a) \
+#define vld4q_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \
@@ -13453,7 +13453,7 @@
      __rv.__i; \
    })
 
-#define vld4q_p8(__a) \
+#define vld4q_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \
@@ -13461,7 +13461,7 @@
      __rv.__i; \
    })
 
-#define vld4q_p16(__a) \
+#define vld4q_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \
@@ -13469,7 +13469,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_s8(__a, __b, __c) \
+#define vld4_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
@@ -13478,7 +13478,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_s16(__a, __b, __c) \
+#define vld4_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
@@ -13487,7 +13487,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_s32(__a, __b, __c) \
+#define vld4_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \
@@ -13496,7 +13496,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_f32(__a, __b, __c) \
+#define vld4_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \
@@ -13505,7 +13505,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_u8(__a, __b, __c) \
+#define vld4_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
@@ -13514,7 +13514,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_u16(__a, __b, __c) \
+#define vld4_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
@@ -13523,7 +13523,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_u32(__a, __b, __c) \
+#define vld4_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \
@@ -13532,7 +13532,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_p8(__a, __b, __c) \
+#define vld4_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
@@ -13541,7 +13541,7 @@
      __rv.__i; \
    })
 
-#define vld4_lane_p16(__a, __b, __c) \
+#define vld4_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
@@ -13550,7 +13550,7 @@
      __rv.__i; \
    })
 
-#define vld4q_lane_s16(__a, __b, __c) \
+#define vld4q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
@@ -13559,7 +13559,7 @@
      __rv.__i; \
    })
 
-#define vld4q_lane_s32(__a, __b, __c) \
+#define vld4q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \
@@ -13568,7 +13568,7 @@
      __rv.__i; \
    })
 
-#define vld4q_lane_f32(__a, __b, __c) \
+#define vld4q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \
@@ -13577,7 +13577,7 @@
      __rv.__i; \
    })
 
-#define vld4q_lane_u16(__a, __b, __c) \
+#define vld4q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
@@ -13586,7 +13586,7 @@
      __rv.__i; \
    })
 
-#define vld4q_lane_u32(__a, __b, __c) \
+#define vld4q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \
@@ -13595,7 +13595,7 @@
      __rv.__i; \
    })
 
-#define vld4q_lane_p16(__a, __b, __c) \
+#define vld4q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
@@ -13604,7 +13604,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_s8(__a) \
+#define vld4_dup_s8(__a) __extension__ \
   ({ \
      const int8_t * __ax = __a; \
      union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \
@@ -13612,7 +13612,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_s16(__a) \
+#define vld4_dup_s16(__a) __extension__ \
   ({ \
      const int16_t * __ax = __a; \
      union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \
@@ -13620,7 +13620,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_s32(__a) \
+#define vld4_dup_s32(__a) __extension__ \
   ({ \
      const int32_t * __ax = __a; \
      union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \
@@ -13628,7 +13628,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_f32(__a) \
+#define vld4_dup_f32(__a) __extension__ \
   ({ \
      const float32_t * __ax = __a; \
      union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \
@@ -13636,7 +13636,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_u8(__a) \
+#define vld4_dup_u8(__a) __extension__ \
   ({ \
      const uint8_t * __ax = __a; \
      union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \
@@ -13644,7 +13644,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_u16(__a) \
+#define vld4_dup_u16(__a) __extension__ \
   ({ \
      const uint16_t * __ax = __a; \
      union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \
@@ -13652,7 +13652,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_u32(__a) \
+#define vld4_dup_u32(__a) __extension__ \
   ({ \
      const uint32_t * __ax = __a; \
      union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \
@@ -13660,7 +13660,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_p8(__a) \
+#define vld4_dup_p8(__a) __extension__ \
   ({ \
      const poly8_t * __ax = __a; \
      union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \
@@ -13668,7 +13668,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_p16(__a) \
+#define vld4_dup_p16(__a) __extension__ \
   ({ \
      const poly16_t * __ax = __a; \
      union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \
@@ -13676,7 +13676,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_s64(__a) \
+#define vld4_dup_s64(__a) __extension__ \
   ({ \
      const int64_t * __ax = __a; \
      union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \
@@ -13684,7 +13684,7 @@
      __rv.__i; \
    })
 
-#define vld4_dup_u64(__a) \
+#define vld4_dup_u64(__a) __extension__ \
   ({ \
      const uint64_t * __ax = __a; \
      union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \
@@ -13692,252 +13692,252 @@
      __rv.__i; \
    })
 
-#define vst4_s8(__a, __b) \
+#define vst4_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst4_s16(__a, __b) \
+#define vst4_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst4_s32(__a, __b) \
+#define vst4_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst4_f32(__a, __b) \
+#define vst4_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v2sf (__ax, __bu.__o); \
    })
 
-#define vst4_u8(__a, __b) \
+#define vst4_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst4_u16(__a, __b) \
+#define vst4_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst4_u32(__a, __b) \
+#define vst4_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst4_p8(__a, __b) \
+#define vst4_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst4_p16(__a, __b) \
+#define vst4_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst4_s64(__a, __b) \
+#define vst4_s64(__a, __b) __extension__ \
   ({ \
      int64_t * __ax = __a; \
      union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \
    })
 
-#define vst4_u64(__a, __b) \
+#define vst4_u64(__a, __b) __extension__ \
   ({ \
      uint64_t * __ax = __a; \
      union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \
    })
 
-#define vst4q_s8(__a, __b) \
+#define vst4q_s8(__a, __b) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst4q_s16(__a, __b) \
+#define vst4q_s16(__a, __b) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst4q_s32(__a, __b) \
+#define vst4q_s32(__a, __b) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst4q_f32(__a, __b) \
+#define vst4q_f32(__a, __b) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v4sf (__ax, __bu.__o); \
    })
 
-#define vst4q_u8(__a, __b) \
+#define vst4q_u8(__a, __b) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst4q_u16(__a, __b) \
+#define vst4q_u16(__a, __b) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst4q_u32(__a, __b) \
+#define vst4q_u32(__a, __b) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \
    })
 
-#define vst4q_p8(__a, __b) \
+#define vst4q_p8(__a, __b) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \
    })
 
-#define vst4q_p16(__a, __b) \
+#define vst4q_p16(__a, __b) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \
    })
 
-#define vst4_lane_s8(__a, __b, __c) \
+#define vst4_lane_s8(__a, __b, __c) __extension__ \
   ({ \
      int8_t * __ax = __a; \
      union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4_lane_s16(__a, __b, __c) \
+#define vst4_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4_lane_s32(__a, __b, __c) \
+#define vst4_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst4_lane_f32(__a, __b, __c) \
+#define vst4_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev2sf (__ax, __bu.__o, __c); \
    })
 
-#define vst4_lane_u8(__a, __b, __c) \
+#define vst4_lane_u8(__a, __b, __c) __extension__ \
   ({ \
      uint8_t * __ax = __a; \
      union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4_lane_u16(__a, __b, __c) \
+#define vst4_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4_lane_u32(__a, __b, __c) \
+#define vst4_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst4_lane_p8(__a, __b, __c) \
+#define vst4_lane_p8(__a, __b, __c) __extension__ \
   ({ \
      poly8_t * __ax = __a; \
      union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4_lane_p16(__a, __b, __c) \
+#define vst4_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4q_lane_s16(__a, __b, __c) \
+#define vst4q_lane_s16(__a, __b, __c) __extension__ \
   ({ \
      int16_t * __ax = __a; \
      union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4q_lane_s32(__a, __b, __c) \
+#define vst4q_lane_s32(__a, __b, __c) __extension__ \
   ({ \
      int32_t * __ax = __a; \
      union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst4q_lane_f32(__a, __b, __c) \
+#define vst4q_lane_f32(__a, __b, __c) __extension__ \
   ({ \
      float32_t * __ax = __a; \
      union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev4sf (__ax, __bu.__o, __c); \
    })
 
-#define vst4q_lane_u16(__a, __b, __c) \
+#define vst4q_lane_u16(__a, __b, __c) __extension__ \
   ({ \
      uint16_t * __ax = __a; \
      union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vst4q_lane_u32(__a, __b, __c) \
+#define vst4q_lane_u32(__a, __b, __c) __extension__ \
   ({ \
      uint32_t * __ax = __a; \
      union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \
    })
 
-#define vst4q_lane_p16(__a, __b, __c) \
+#define vst4q_lane_p16(__a, __b, __c) __extension__ \
   ({ \
      poly16_t * __ax = __a; \
      union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \
      __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \
    })
 
-#define vand_s8(__a, __b) \
+#define vand_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -13946,7 +13946,7 @@
      __rv.__i; \
    })
 
-#define vand_s16(__a, __b) \
+#define vand_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -13955,7 +13955,7 @@
      __rv.__i; \
    })
 
-#define vand_s32(__a, __b) \
+#define vand_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -13964,7 +13964,7 @@
      __rv.__i; \
    })
 
-#define vand_s64(__a, __b) \
+#define vand_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -13973,7 +13973,7 @@
      __rv.__i; \
    })
 
-#define vand_u8(__a, __b) \
+#define vand_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -13982,7 +13982,7 @@
      __rv.__i; \
    })
 
-#define vand_u16(__a, __b) \
+#define vand_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -13991,7 +13991,7 @@
      __rv.__i; \
    })
 
-#define vand_u32(__a, __b) \
+#define vand_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -14000,7 +14000,7 @@
      __rv.__i; \
    })
 
-#define vand_u64(__a, __b) \
+#define vand_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -14009,7 +14009,7 @@
      __rv.__i; \
    })
 
-#define vandq_s8(__a, __b) \
+#define vandq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -14018,7 +14018,7 @@
      __rv.__i; \
    })
 
-#define vandq_s16(__a, __b) \
+#define vandq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -14027,7 +14027,7 @@
      __rv.__i; \
    })
 
-#define vandq_s32(__a, __b) \
+#define vandq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -14036,7 +14036,7 @@
      __rv.__i; \
    })
 
-#define vandq_s64(__a, __b) \
+#define vandq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -14045,7 +14045,7 @@
      __rv.__i; \
    })
 
-#define vandq_u8(__a, __b) \
+#define vandq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -14054,7 +14054,7 @@
      __rv.__i; \
    })
 
-#define vandq_u16(__a, __b) \
+#define vandq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -14063,7 +14063,7 @@
      __rv.__i; \
    })
 
-#define vandq_u32(__a, __b) \
+#define vandq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -14072,7 +14072,7 @@
      __rv.__i; \
    })
 
-#define vandq_u64(__a, __b) \
+#define vandq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -14081,7 +14081,7 @@
      __rv.__i; \
    })
 
-#define vorr_s8(__a, __b) \
+#define vorr_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -14090,7 +14090,7 @@
      __rv.__i; \
    })
 
-#define vorr_s16(__a, __b) \
+#define vorr_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -14099,7 +14099,7 @@
      __rv.__i; \
    })
 
-#define vorr_s32(__a, __b) \
+#define vorr_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -14108,7 +14108,7 @@
      __rv.__i; \
    })
 
-#define vorr_s64(__a, __b) \
+#define vorr_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -14117,7 +14117,7 @@
      __rv.__i; \
    })
 
-#define vorr_u8(__a, __b) \
+#define vorr_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -14126,7 +14126,7 @@
      __rv.__i; \
    })
 
-#define vorr_u16(__a, __b) \
+#define vorr_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -14135,7 +14135,7 @@
      __rv.__i; \
    })
 
-#define vorr_u32(__a, __b) \
+#define vorr_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -14144,7 +14144,7 @@
      __rv.__i; \
    })
 
-#define vorr_u64(__a, __b) \
+#define vorr_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -14153,7 +14153,7 @@
      __rv.__i; \
    })
 
-#define vorrq_s8(__a, __b) \
+#define vorrq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -14162,7 +14162,7 @@
      __rv.__i; \
    })
 
-#define vorrq_s16(__a, __b) \
+#define vorrq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -14171,7 +14171,7 @@
      __rv.__i; \
    })
 
-#define vorrq_s32(__a, __b) \
+#define vorrq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -14180,7 +14180,7 @@
      __rv.__i; \
    })
 
-#define vorrq_s64(__a, __b) \
+#define vorrq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -14189,7 +14189,7 @@
      __rv.__i; \
    })
 
-#define vorrq_u8(__a, __b) \
+#define vorrq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -14198,7 +14198,7 @@
      __rv.__i; \
    })
 
-#define vorrq_u16(__a, __b) \
+#define vorrq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -14207,7 +14207,7 @@
      __rv.__i; \
    })
 
-#define vorrq_u32(__a, __b) \
+#define vorrq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -14216,7 +14216,7 @@
      __rv.__i; \
    })
 
-#define vorrq_u64(__a, __b) \
+#define vorrq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -14225,7 +14225,7 @@
      __rv.__i; \
    })
 
-#define veor_s8(__a, __b) \
+#define veor_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -14234,7 +14234,7 @@
      __rv.__i; \
    })
 
-#define veor_s16(__a, __b) \
+#define veor_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -14243,7 +14243,7 @@
      __rv.__i; \
    })
 
-#define veor_s32(__a, __b) \
+#define veor_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -14252,7 +14252,7 @@
      __rv.__i; \
    })
 
-#define veor_s64(__a, __b) \
+#define veor_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -14261,7 +14261,7 @@
      __rv.__i; \
    })
 
-#define veor_u8(__a, __b) \
+#define veor_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -14270,7 +14270,7 @@
      __rv.__i; \
    })
 
-#define veor_u16(__a, __b) \
+#define veor_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -14279,7 +14279,7 @@
      __rv.__i; \
    })
 
-#define veor_u32(__a, __b) \
+#define veor_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -14288,7 +14288,7 @@
      __rv.__i; \
    })
 
-#define veor_u64(__a, __b) \
+#define veor_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -14297,7 +14297,7 @@
      __rv.__i; \
    })
 
-#define veorq_s8(__a, __b) \
+#define veorq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -14306,7 +14306,7 @@
      __rv.__i; \
    })
 
-#define veorq_s16(__a, __b) \
+#define veorq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -14315,7 +14315,7 @@
      __rv.__i; \
    })
 
-#define veorq_s32(__a, __b) \
+#define veorq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -14324,7 +14324,7 @@
      __rv.__i; \
    })
 
-#define veorq_s64(__a, __b) \
+#define veorq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -14333,7 +14333,7 @@
      __rv.__i; \
    })
 
-#define veorq_u8(__a, __b) \
+#define veorq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -14342,7 +14342,7 @@
      __rv.__i; \
    })
 
-#define veorq_u16(__a, __b) \
+#define veorq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -14351,7 +14351,7 @@
      __rv.__i; \
    })
 
-#define veorq_u32(__a, __b) \
+#define veorq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -14360,7 +14360,7 @@
      __rv.__i; \
    })
 
-#define veorq_u64(__a, __b) \
+#define veorq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -14369,7 +14369,7 @@
      __rv.__i; \
    })
 
-#define vbic_s8(__a, __b) \
+#define vbic_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -14378,7 +14378,7 @@
      __rv.__i; \
    })
 
-#define vbic_s16(__a, __b) \
+#define vbic_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -14387,7 +14387,7 @@
      __rv.__i; \
    })
 
-#define vbic_s32(__a, __b) \
+#define vbic_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -14396,7 +14396,7 @@
      __rv.__i; \
    })
 
-#define vbic_s64(__a, __b) \
+#define vbic_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -14405,7 +14405,7 @@
      __rv.__i; \
    })
 
-#define vbic_u8(__a, __b) \
+#define vbic_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -14414,7 +14414,7 @@
      __rv.__i; \
    })
 
-#define vbic_u16(__a, __b) \
+#define vbic_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -14423,7 +14423,7 @@
      __rv.__i; \
    })
 
-#define vbic_u32(__a, __b) \
+#define vbic_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -14432,7 +14432,7 @@
      __rv.__i; \
    })
 
-#define vbic_u64(__a, __b) \
+#define vbic_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -14441,7 +14441,7 @@
      __rv.__i; \
    })
 
-#define vbicq_s8(__a, __b) \
+#define vbicq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -14450,7 +14450,7 @@
      __rv.__i; \
    })
 
-#define vbicq_s16(__a, __b) \
+#define vbicq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -14459,7 +14459,7 @@
      __rv.__i; \
    })
 
-#define vbicq_s32(__a, __b) \
+#define vbicq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -14468,7 +14468,7 @@
      __rv.__i; \
    })
 
-#define vbicq_s64(__a, __b) \
+#define vbicq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -14477,7 +14477,7 @@
      __rv.__i; \
    })
 
-#define vbicq_u8(__a, __b) \
+#define vbicq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -14486,7 +14486,7 @@
      __rv.__i; \
    })
 
-#define vbicq_u16(__a, __b) \
+#define vbicq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -14495,7 +14495,7 @@
      __rv.__i; \
    })
 
-#define vbicq_u32(__a, __b) \
+#define vbicq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -14504,7 +14504,7 @@
      __rv.__i; \
    })
 
-#define vbicq_u64(__a, __b) \
+#define vbicq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -14513,7 +14513,7 @@
      __rv.__i; \
    })
 
-#define vorn_s8(__a, __b) \
+#define vorn_s8(__a, __b) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      int8x8_t __bx = __b; \
@@ -14522,7 +14522,7 @@
      __rv.__i; \
    })
 
-#define vorn_s16(__a, __b) \
+#define vorn_s16(__a, __b) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      int16x4_t __bx = __b; \
@@ -14531,7 +14531,7 @@
      __rv.__i; \
    })
 
-#define vorn_s32(__a, __b) \
+#define vorn_s32(__a, __b) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      int32x2_t __bx = __b; \
@@ -14540,7 +14540,7 @@
      __rv.__i; \
    })
 
-#define vorn_s64(__a, __b) \
+#define vorn_s64(__a, __b) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      int64x1_t __bx = __b; \
@@ -14549,7 +14549,7 @@
      __rv.__i; \
    })
 
-#define vorn_u8(__a, __b) \
+#define vorn_u8(__a, __b) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      uint8x8_t __bx = __b; \
@@ -14558,7 +14558,7 @@
      __rv.__i; \
    })
 
-#define vorn_u16(__a, __b) \
+#define vorn_u16(__a, __b) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      uint16x4_t __bx = __b; \
@@ -14567,7 +14567,7 @@
      __rv.__i; \
    })
 
-#define vorn_u32(__a, __b) \
+#define vorn_u32(__a, __b) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      uint32x2_t __bx = __b; \
@@ -14576,7 +14576,7 @@
      __rv.__i; \
    })
 
-#define vorn_u64(__a, __b) \
+#define vorn_u64(__a, __b) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      uint64x1_t __bx = __b; \
@@ -14585,7 +14585,7 @@
      __rv.__i; \
    })
 
-#define vornq_s8(__a, __b) \
+#define vornq_s8(__a, __b) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      int8x16_t __bx = __b; \
@@ -14594,7 +14594,7 @@
      __rv.__i; \
    })
 
-#define vornq_s16(__a, __b) \
+#define vornq_s16(__a, __b) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      int16x8_t __bx = __b; \
@@ -14603,7 +14603,7 @@
      __rv.__i; \
    })
 
-#define vornq_s32(__a, __b) \
+#define vornq_s32(__a, __b) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      int32x4_t __bx = __b; \
@@ -14612,7 +14612,7 @@
      __rv.__i; \
    })
 
-#define vornq_s64(__a, __b) \
+#define vornq_s64(__a, __b) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      int64x2_t __bx = __b; \
@@ -14621,7 +14621,7 @@
      __rv.__i; \
    })
 
-#define vornq_u8(__a, __b) \
+#define vornq_u8(__a, __b) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      uint8x16_t __bx = __b; \
@@ -14630,7 +14630,7 @@
      __rv.__i; \
    })
 
-#define vornq_u16(__a, __b) \
+#define vornq_u16(__a, __b) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      uint16x8_t __bx = __b; \
@@ -14639,7 +14639,7 @@
      __rv.__i; \
    })
 
-#define vornq_u32(__a, __b) \
+#define vornq_u32(__a, __b) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      uint32x4_t __bx = __b; \
@@ -14648,7 +14648,7 @@
      __rv.__i; \
    })
 
-#define vornq_u64(__a, __b) \
+#define vornq_u64(__a, __b) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      uint64x2_t __bx = __b; \
@@ -14658,7 +14658,7 @@
    })
 
 
-#define vreinterpret_p8_s8(__a) \
+#define vreinterpret_p8_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14666,7 +14666,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_s16(__a) \
+#define vreinterpret_p8_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14674,7 +14674,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_s32(__a) \
+#define vreinterpret_p8_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14682,7 +14682,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_s64(__a) \
+#define vreinterpret_p8_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14690,7 +14690,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_f32(__a) \
+#define vreinterpret_p8_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14698,7 +14698,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_u8(__a) \
+#define vreinterpret_p8_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14706,7 +14706,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_u16(__a) \
+#define vreinterpret_p8_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14714,7 +14714,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_u32(__a) \
+#define vreinterpret_p8_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14722,7 +14722,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_u64(__a) \
+#define vreinterpret_p8_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14730,7 +14730,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p8_p16(__a) \
+#define vreinterpret_p8_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -14738,7 +14738,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_s8(__a) \
+#define vreinterpretq_p8_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14746,7 +14746,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_s16(__a) \
+#define vreinterpretq_p8_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14754,7 +14754,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_s32(__a) \
+#define vreinterpretq_p8_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14762,7 +14762,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_s64(__a) \
+#define vreinterpretq_p8_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14770,7 +14770,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_f32(__a) \
+#define vreinterpretq_p8_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14778,7 +14778,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_u8(__a) \
+#define vreinterpretq_p8_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14786,7 +14786,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_u16(__a) \
+#define vreinterpretq_p8_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14794,7 +14794,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_u32(__a) \
+#define vreinterpretq_p8_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14802,7 +14802,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_u64(__a) \
+#define vreinterpretq_p8_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14810,7 +14810,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p8_p16(__a) \
+#define vreinterpretq_p8_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -14818,7 +14818,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_s8(__a) \
+#define vreinterpret_p16_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14826,7 +14826,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_s16(__a) \
+#define vreinterpret_p16_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14834,7 +14834,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_s32(__a) \
+#define vreinterpret_p16_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14842,7 +14842,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_s64(__a) \
+#define vreinterpret_p16_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14850,7 +14850,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_f32(__a) \
+#define vreinterpret_p16_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14858,7 +14858,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_u8(__a) \
+#define vreinterpret_p16_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14866,7 +14866,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_u16(__a) \
+#define vreinterpret_p16_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14874,7 +14874,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_u32(__a) \
+#define vreinterpret_p16_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14882,7 +14882,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_u64(__a) \
+#define vreinterpret_p16_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14890,7 +14890,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_p16_p8(__a) \
+#define vreinterpret_p16_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -14898,7 +14898,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_s8(__a) \
+#define vreinterpretq_p16_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14906,7 +14906,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_s16(__a) \
+#define vreinterpretq_p16_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14914,7 +14914,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_s32(__a) \
+#define vreinterpretq_p16_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14922,7 +14922,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_s64(__a) \
+#define vreinterpretq_p16_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14930,7 +14930,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_f32(__a) \
+#define vreinterpretq_p16_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14938,7 +14938,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_u8(__a) \
+#define vreinterpretq_p16_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14946,7 +14946,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_u16(__a) \
+#define vreinterpretq_p16_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14954,7 +14954,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_u32(__a) \
+#define vreinterpretq_p16_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14962,7 +14962,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_u64(__a) \
+#define vreinterpretq_p16_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14970,7 +14970,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_p16_p8(__a) \
+#define vreinterpretq_p16_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -14978,7 +14978,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_s8(__a) \
+#define vreinterpret_f32_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -14986,7 +14986,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_s16(__a) \
+#define vreinterpret_f32_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -14994,7 +14994,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_s32(__a) \
+#define vreinterpret_f32_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15002,7 +15002,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_s64(__a) \
+#define vreinterpret_f32_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15010,7 +15010,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_u8(__a) \
+#define vreinterpret_f32_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15018,7 +15018,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_u16(__a) \
+#define vreinterpret_f32_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15026,7 +15026,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_u32(__a) \
+#define vreinterpret_f32_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15034,7 +15034,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_u64(__a) \
+#define vreinterpret_f32_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15042,7 +15042,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_p8(__a) \
+#define vreinterpret_f32_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15050,7 +15050,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_f32_p16(__a) \
+#define vreinterpret_f32_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \
@@ -15058,7 +15058,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_s8(__a) \
+#define vreinterpretq_f32_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15066,7 +15066,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_s16(__a) \
+#define vreinterpretq_f32_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15074,7 +15074,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_s32(__a) \
+#define vreinterpretq_f32_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15082,7 +15082,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_s64(__a) \
+#define vreinterpretq_f32_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15090,7 +15090,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_u8(__a) \
+#define vreinterpretq_f32_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15098,7 +15098,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_u16(__a) \
+#define vreinterpretq_f32_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15106,7 +15106,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_u32(__a) \
+#define vreinterpretq_f32_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15114,7 +15114,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_u64(__a) \
+#define vreinterpretq_f32_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15122,7 +15122,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_p8(__a) \
+#define vreinterpretq_f32_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15130,7 +15130,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_f32_p16(__a) \
+#define vreinterpretq_f32_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \
@@ -15138,7 +15138,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_s8(__a) \
+#define vreinterpret_s64_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15146,7 +15146,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_s16(__a) \
+#define vreinterpret_s64_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15154,7 +15154,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_s32(__a) \
+#define vreinterpret_s64_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15162,7 +15162,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_f32(__a) \
+#define vreinterpret_s64_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15170,7 +15170,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_u8(__a) \
+#define vreinterpret_s64_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15178,7 +15178,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_u16(__a) \
+#define vreinterpret_s64_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15186,7 +15186,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_u32(__a) \
+#define vreinterpret_s64_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15194,7 +15194,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_u64(__a) \
+#define vreinterpret_s64_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15202,7 +15202,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_p8(__a) \
+#define vreinterpret_s64_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15210,7 +15210,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s64_p16(__a) \
+#define vreinterpret_s64_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15218,7 +15218,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_s8(__a) \
+#define vreinterpretq_s64_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15226,7 +15226,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_s16(__a) \
+#define vreinterpretq_s64_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15234,7 +15234,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_s32(__a) \
+#define vreinterpretq_s64_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15242,7 +15242,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_f32(__a) \
+#define vreinterpretq_s64_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15250,7 +15250,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_u8(__a) \
+#define vreinterpretq_s64_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15258,7 +15258,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_u16(__a) \
+#define vreinterpretq_s64_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15266,7 +15266,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_u32(__a) \
+#define vreinterpretq_s64_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15274,7 +15274,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_u64(__a) \
+#define vreinterpretq_s64_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15282,7 +15282,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_p8(__a) \
+#define vreinterpretq_s64_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15290,7 +15290,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s64_p16(__a) \
+#define vreinterpretq_s64_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15298,7 +15298,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_s8(__a) \
+#define vreinterpret_u64_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15306,7 +15306,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_s16(__a) \
+#define vreinterpret_u64_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15314,7 +15314,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_s32(__a) \
+#define vreinterpret_u64_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15322,7 +15322,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_s64(__a) \
+#define vreinterpret_u64_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15330,7 +15330,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_f32(__a) \
+#define vreinterpret_u64_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15338,7 +15338,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_u8(__a) \
+#define vreinterpret_u64_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15346,7 +15346,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_u16(__a) \
+#define vreinterpret_u64_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15354,7 +15354,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_u32(__a) \
+#define vreinterpret_u64_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15362,7 +15362,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_p8(__a) \
+#define vreinterpret_u64_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15370,7 +15370,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u64_p16(__a) \
+#define vreinterpret_u64_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \
@@ -15378,7 +15378,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_s8(__a) \
+#define vreinterpretq_u64_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15386,7 +15386,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_s16(__a) \
+#define vreinterpretq_u64_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15394,7 +15394,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_s32(__a) \
+#define vreinterpretq_u64_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15402,7 +15402,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_s64(__a) \
+#define vreinterpretq_u64_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15410,7 +15410,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_f32(__a) \
+#define vreinterpretq_u64_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15418,7 +15418,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_u8(__a) \
+#define vreinterpretq_u64_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15426,7 +15426,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_u16(__a) \
+#define vreinterpretq_u64_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15434,7 +15434,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_u32(__a) \
+#define vreinterpretq_u64_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15442,7 +15442,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_p8(__a) \
+#define vreinterpretq_u64_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15450,7 +15450,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u64_p16(__a) \
+#define vreinterpretq_u64_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \
@@ -15458,7 +15458,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_s16(__a) \
+#define vreinterpret_s8_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15466,7 +15466,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_s32(__a) \
+#define vreinterpret_s8_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15474,7 +15474,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_s64(__a) \
+#define vreinterpret_s8_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15482,7 +15482,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_f32(__a) \
+#define vreinterpret_s8_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15490,7 +15490,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_u8(__a) \
+#define vreinterpret_s8_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15498,7 +15498,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_u16(__a) \
+#define vreinterpret_s8_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15506,7 +15506,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_u32(__a) \
+#define vreinterpret_s8_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15514,7 +15514,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_u64(__a) \
+#define vreinterpret_s8_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15522,7 +15522,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_p8(__a) \
+#define vreinterpret_s8_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15530,7 +15530,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s8_p16(__a) \
+#define vreinterpret_s8_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15538,7 +15538,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_s16(__a) \
+#define vreinterpretq_s8_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15546,7 +15546,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_s32(__a) \
+#define vreinterpretq_s8_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15554,7 +15554,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_s64(__a) \
+#define vreinterpretq_s8_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15562,7 +15562,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_f32(__a) \
+#define vreinterpretq_s8_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15570,7 +15570,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_u8(__a) \
+#define vreinterpretq_s8_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15578,7 +15578,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_u16(__a) \
+#define vreinterpretq_s8_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15586,7 +15586,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_u32(__a) \
+#define vreinterpretq_s8_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15594,7 +15594,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_u64(__a) \
+#define vreinterpretq_s8_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15602,7 +15602,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_p8(__a) \
+#define vreinterpretq_s8_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15610,7 +15610,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s8_p16(__a) \
+#define vreinterpretq_s8_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -15618,7 +15618,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_s8(__a) \
+#define vreinterpret_s16_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15626,7 +15626,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_s32(__a) \
+#define vreinterpret_s16_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15634,7 +15634,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_s64(__a) \
+#define vreinterpret_s16_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15642,7 +15642,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_f32(__a) \
+#define vreinterpret_s16_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15650,7 +15650,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_u8(__a) \
+#define vreinterpret_s16_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15658,7 +15658,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_u16(__a) \
+#define vreinterpret_s16_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15666,7 +15666,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_u32(__a) \
+#define vreinterpret_s16_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15674,7 +15674,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_u64(__a) \
+#define vreinterpret_s16_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15682,7 +15682,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_p8(__a) \
+#define vreinterpret_s16_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15690,7 +15690,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s16_p16(__a) \
+#define vreinterpret_s16_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -15698,7 +15698,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_s8(__a) \
+#define vreinterpretq_s16_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15706,7 +15706,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_s32(__a) \
+#define vreinterpretq_s16_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15714,7 +15714,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_s64(__a) \
+#define vreinterpretq_s16_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15722,7 +15722,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_f32(__a) \
+#define vreinterpretq_s16_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15730,7 +15730,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_u8(__a) \
+#define vreinterpretq_s16_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15738,7 +15738,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_u16(__a) \
+#define vreinterpretq_s16_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15746,7 +15746,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_u32(__a) \
+#define vreinterpretq_s16_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15754,7 +15754,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_u64(__a) \
+#define vreinterpretq_s16_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15762,7 +15762,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_p8(__a) \
+#define vreinterpretq_s16_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15770,7 +15770,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s16_p16(__a) \
+#define vreinterpretq_s16_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -15778,7 +15778,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_s8(__a) \
+#define vreinterpret_s32_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15786,7 +15786,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_s16(__a) \
+#define vreinterpret_s32_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15794,7 +15794,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_s64(__a) \
+#define vreinterpret_s32_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15802,7 +15802,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_f32(__a) \
+#define vreinterpret_s32_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15810,7 +15810,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_u8(__a) \
+#define vreinterpret_s32_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15818,7 +15818,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_u16(__a) \
+#define vreinterpret_s32_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15826,7 +15826,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_u32(__a) \
+#define vreinterpret_s32_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15834,7 +15834,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_u64(__a) \
+#define vreinterpret_s32_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15842,7 +15842,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_p8(__a) \
+#define vreinterpret_s32_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15850,7 +15850,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_s32_p16(__a) \
+#define vreinterpret_s32_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -15858,7 +15858,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_s8(__a) \
+#define vreinterpretq_s32_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15866,7 +15866,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_s16(__a) \
+#define vreinterpretq_s32_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15874,7 +15874,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_s64(__a) \
+#define vreinterpretq_s32_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15882,7 +15882,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_f32(__a) \
+#define vreinterpretq_s32_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15890,7 +15890,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_u8(__a) \
+#define vreinterpretq_s32_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15898,7 +15898,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_u16(__a) \
+#define vreinterpretq_s32_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15906,7 +15906,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_u32(__a) \
+#define vreinterpretq_s32_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15914,7 +15914,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_u64(__a) \
+#define vreinterpretq_s32_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15922,7 +15922,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_p8(__a) \
+#define vreinterpretq_s32_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15930,7 +15930,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_s32_p16(__a) \
+#define vreinterpretq_s32_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -15938,7 +15938,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_s8(__a) \
+#define vreinterpret_u8_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15946,7 +15946,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_s16(__a) \
+#define vreinterpret_u8_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15954,7 +15954,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_s32(__a) \
+#define vreinterpret_u8_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15962,7 +15962,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_s64(__a) \
+#define vreinterpret_u8_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15970,7 +15970,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_f32(__a) \
+#define vreinterpret_u8_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15978,7 +15978,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_u16(__a) \
+#define vreinterpret_u8_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15986,7 +15986,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_u32(__a) \
+#define vreinterpret_u8_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -15994,7 +15994,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_u64(__a) \
+#define vreinterpret_u8_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -16002,7 +16002,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_p8(__a) \
+#define vreinterpret_u8_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -16010,7 +16010,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u8_p16(__a) \
+#define vreinterpret_u8_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \
@@ -16018,7 +16018,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_s8(__a) \
+#define vreinterpretq_u8_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16026,7 +16026,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_s16(__a) \
+#define vreinterpretq_u8_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16034,7 +16034,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_s32(__a) \
+#define vreinterpretq_u8_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16042,7 +16042,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_s64(__a) \
+#define vreinterpretq_u8_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16050,7 +16050,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_f32(__a) \
+#define vreinterpretq_u8_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16058,7 +16058,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_u16(__a) \
+#define vreinterpretq_u8_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16066,7 +16066,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_u32(__a) \
+#define vreinterpretq_u8_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16074,7 +16074,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_u64(__a) \
+#define vreinterpretq_u8_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16082,7 +16082,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_p8(__a) \
+#define vreinterpretq_u8_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16090,7 +16090,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u8_p16(__a) \
+#define vreinterpretq_u8_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \
@@ -16098,7 +16098,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_s8(__a) \
+#define vreinterpret_u16_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16106,7 +16106,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_s16(__a) \
+#define vreinterpret_u16_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16114,7 +16114,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_s32(__a) \
+#define vreinterpret_u16_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16122,7 +16122,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_s64(__a) \
+#define vreinterpret_u16_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16130,7 +16130,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_f32(__a) \
+#define vreinterpret_u16_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16138,7 +16138,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_u8(__a) \
+#define vreinterpret_u16_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16146,7 +16146,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_u32(__a) \
+#define vreinterpret_u16_u32(__a) __extension__ \
   ({ \
      uint32x2_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16154,7 +16154,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_u64(__a) \
+#define vreinterpret_u16_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16162,7 +16162,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_p8(__a) \
+#define vreinterpret_u16_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16170,7 +16170,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u16_p16(__a) \
+#define vreinterpret_u16_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \
@@ -16178,7 +16178,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_s8(__a) \
+#define vreinterpretq_u16_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16186,7 +16186,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_s16(__a) \
+#define vreinterpretq_u16_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16194,7 +16194,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_s32(__a) \
+#define vreinterpretq_u16_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16202,7 +16202,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_s64(__a) \
+#define vreinterpretq_u16_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16210,7 +16210,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_f32(__a) \
+#define vreinterpretq_u16_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16218,7 +16218,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_u8(__a) \
+#define vreinterpretq_u16_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16226,7 +16226,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_u32(__a) \
+#define vreinterpretq_u16_u32(__a) __extension__ \
   ({ \
      uint32x4_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16234,7 +16234,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_u64(__a) \
+#define vreinterpretq_u16_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16242,7 +16242,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_p8(__a) \
+#define vreinterpretq_u16_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16250,7 +16250,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u16_p16(__a) \
+#define vreinterpretq_u16_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \
@@ -16258,7 +16258,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_s8(__a) \
+#define vreinterpret_u32_s8(__a) __extension__ \
   ({ \
      int8x8_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16266,7 +16266,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_s16(__a) \
+#define vreinterpret_u32_s16(__a) __extension__ \
   ({ \
      int16x4_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16274,7 +16274,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_s32(__a) \
+#define vreinterpret_u32_s32(__a) __extension__ \
   ({ \
      int32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16282,7 +16282,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_s64(__a) \
+#define vreinterpret_u32_s64(__a) __extension__ \
   ({ \
      int64x1_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16290,7 +16290,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_f32(__a) \
+#define vreinterpret_u32_f32(__a) __extension__ \
   ({ \
      float32x2_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16298,7 +16298,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_u8(__a) \
+#define vreinterpret_u32_u8(__a) __extension__ \
   ({ \
      uint8x8_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16306,7 +16306,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_u16(__a) \
+#define vreinterpret_u32_u16(__a) __extension__ \
   ({ \
      uint16x4_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16314,7 +16314,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_u64(__a) \
+#define vreinterpret_u32_u64(__a) __extension__ \
   ({ \
      uint64x1_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16322,7 +16322,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_p8(__a) \
+#define vreinterpret_u32_p8(__a) __extension__ \
   ({ \
      poly8x8_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16330,7 +16330,7 @@
      __rv.__i; \
    })
 
-#define vreinterpret_u32_p16(__a) \
+#define vreinterpret_u32_p16(__a) __extension__ \
   ({ \
      poly16x4_t __ax = __a; \
      union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \
@@ -16338,7 +16338,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_s8(__a) \
+#define vreinterpretq_u32_s8(__a) __extension__ \
   ({ \
      int8x16_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16346,7 +16346,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_s16(__a) \
+#define vreinterpretq_u32_s16(__a) __extension__ \
   ({ \
      int16x8_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16354,7 +16354,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_s32(__a) \
+#define vreinterpretq_u32_s32(__a) __extension__ \
   ({ \
      int32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16362,7 +16362,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_s64(__a) \
+#define vreinterpretq_u32_s64(__a) __extension__ \
   ({ \
      int64x2_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16370,7 +16370,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_f32(__a) \
+#define vreinterpretq_u32_f32(__a) __extension__ \
   ({ \
      float32x4_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16378,7 +16378,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_u8(__a) \
+#define vreinterpretq_u32_u8(__a) __extension__ \
   ({ \
      uint8x16_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16386,7 +16386,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_u16(__a) \
+#define vreinterpretq_u32_u16(__a) __extension__ \
   ({ \
      uint16x8_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16394,7 +16394,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_u64(__a) \
+#define vreinterpretq_u32_u64(__a) __extension__ \
   ({ \
      uint64x2_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16402,7 +16402,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_p8(__a) \
+#define vreinterpretq_u32_p8(__a) __extension__ \
   ({ \
      poly8x16_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \
@@ -16410,7 +16410,7 @@
      __rv.__i; \
    })
 
-#define vreinterpretq_u32_p16(__a) \
+#define vreinterpretq_u32_p16(__a) __extension__ \
   ({ \
      poly16x8_t __ax = __a; \
      union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \




From stoklund at 2pi.dk  Wed Apr  7 11:45:18 2010
From: stoklund at 2pi.dk (Jakob Stoklund Olesen)
Date: Wed, 07 Apr 2010 16:45:18 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r100632 -
	/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp
Message-ID: <20100407164518.0FCB12A6C12C@llvm.org>

Author: stoklund
Date: Wed Apr  7 11:45:17 2010
New Revision: 100632

URL: http://llvm.org/viewvc/llvm-project?rev=100632&view=rev
Log:
Create larger struct fields for consecutive bitfields.

When creating an LLVM struct type for a struct with bitfields, try to allocate
integer fields that are as large as possible, taking the following bitfields
into account.

For example, this struct:

  struct S {
    int a, b;
    void *c;
    unsigned d : 8;
    unsigned e : 8;
  };

used to be:

  %struct.S = type { i32, i32, i8*, i8, i8 },

but now it becomes:

  %struct.S = type { i32, i32, i8*, i16 }

This makes it easier for the code generator to shuffle register sized bitfield
groups around in fewer registers. It also means that it is harder for SROA to
isolate bit fields as scalars.

The larger struct fields are only produced when they would be properly aligned
and smaller than the natural machine word size.

This strategy is similar to what clang will be doing. We will see how it works
out.

Modified:
    llvm-gcc-4.2/trunk/gcc/llvm-types.cpp

Modified: llvm-gcc-4.2/trunk/gcc/llvm-types.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp?rev=100632&r1=100631&r2=100632&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/llvm-types.cpp (original)
+++ llvm-gcc-4.2/trunk/gcc/llvm-types.cpp Wed Apr  7 11:45:17 2010
@@ -1620,7 +1620,8 @@
     return ~0U;
   }
 
-  void addNewBitField(uint64_t Size, uint64_t FirstUnallocatedByte);
+  void addNewBitField(uint64_t Size, uint64_t Extra,
+                      uint64_t FirstUnallocatedByte);
 
   void dump() const;
 };
@@ -1628,22 +1629,43 @@
 // Add new element which is a bit field. Size is not the size of bit field,
 // but size of bits required to determine type of new Field which will be
 // used to access this bit field.
-void StructTypeConversionInfo::addNewBitField(uint64_t Size,
+// If possible, allocate a field with room for Size+Extra bits.
+void StructTypeConversionInfo::addNewBitField(uint64_t Size, uint64_t Extra,
                                               uint64_t FirstUnallocatedByte) {
 
   // Figure out the LLVM type that we will use for the new field.
   // Note, Size is not necessarily size of the new field. It indicates
   // additional bits required after FirstunallocatedByte to cover new field.
-  const Type *NewFieldTy;
-  if (Size <= 8)
-    NewFieldTy = Type::getInt8Ty(Context);
-  else if (Size <= 16)
-    NewFieldTy = Type::getInt16Ty(Context);
-  else if (Size <= 32)
-    NewFieldTy = Type::getInt32Ty(Context);
-  else {
-    assert(Size <= 64 && "Bitfield too large!");
-    NewFieldTy = Type::getInt64Ty(Context);
+  const Type *NewFieldTy = 0;
+
+  // First try an ABI-aligned field including (some of) the Extra bits.
+  // This field must satisfy Size <= w && w <= XSize.
+  uint64_t XSize = RoundUpToAlignment(Size + Extra, 8);
+  for (unsigned w = NextPowerOf2(std::min(UINT64_C(64), XSize)/2);
+       w >= Size && w >= 8; w /= 2) {
+    if (TD.isIllegalInteger(w))
+      continue;
+    // Would a w-sized integer field be aligned here?
+    const unsigned a = TD.getABIIntegerTypeAlignment(w);
+    if (FirstUnallocatedByte & (a-1) || a > getGCCStructAlignmentInBytes())
+      continue;
+    // OK, use w-sized integer.
+    NewFieldTy = IntegerType::get(Context, w);
+    break;
+  }
+
+  // Try an integer field that holds Size bits.
+  if (!NewFieldTy) {
+    if (Size <= 8)
+      NewFieldTy = Type::getInt8Ty(Context);
+    else if (Size <= 16)
+      NewFieldTy = Type::getInt16Ty(Context);
+    else if (Size <= 32)
+      NewFieldTy = Type::getInt32Ty(Context);
+    else {
+      assert(Size <= 64 && "Bitfield too large!");
+      NewFieldTy = Type::getInt64Ty(Context);
+    }
   }
 
   // Check that the alignment of NewFieldTy won't cause a gap in the structure!
@@ -1987,7 +2009,19 @@
   // LLVM struct such that there are no holes in the struct where the bitfield
   // is: these holes would make it impossible to statically initialize a global
   // of this type that has an initializer for the bitfield.
-  
+
+  // We want the integer-typed fields as large as possible up to the machine
+  // word size. If there are more bitfields following this one, try to include
+  // them in the same field.
+
+  // Calculate the total number of bits in the continuous group of bitfields
+  // following this one. This is the number of bits that addNewBitField should
+  // try to include.
+  unsigned ExtraSizeInBits = 0;
+  for (tree f = TREE_CHAIN(Field); f && ExtraSizeInBits < 64 && isBitfield(f);
+       f = TREE_CHAIN(f))
+    ExtraSizeInBits += TREE_INT_CST_LOW(DECL_SIZE(f));
+
   // Compute the number of bits that we need to add to this struct to cover
   // this field.
   uint64_t FirstUnallocatedByte = Info.getEndUnallocatedByte();
@@ -2001,7 +2035,7 @@
       // This field starts at byte boundry. Need to allocate space
       // for additional bytes not yet allocated.
       unsigned NumBitsToAdd = FieldSizeInBits - AvailableBits;
-      Info.addNewBitField(NumBitsToAdd, FirstUnallocatedByte);
+      Info.addNewBitField(NumBitsToAdd, ExtraSizeInBits, FirstUnallocatedByte);
       return;
     }
 
@@ -2025,7 +2059,7 @@
       for (unsigned idx = 0; idx < (prevFieldTypeSizeInBits/8); ++idx)
 	FirstUnallocatedByte--;
     }
-    Info.addNewBitField(NumBitsRequired, FirstUnallocatedByte);
+    Info.addNewBitField(NumBitsRequired, ExtraSizeInBits, FirstUnallocatedByte);
     // Do this after adding Field.
     Info.lastFieldStartsAtNonByteBoundry(true);
     return;
@@ -2059,7 +2093,7 @@
   }
 
   // Now, Field starts at FirstUnallocatedByte and everything is aligned.
-  Info.addNewBitField(FieldSizeInBits, FirstUnallocatedByte);
+  Info.addNewBitField(FieldSizeInBits, ExtraSizeInBits, FirstUnallocatedByte);
 }
 
 /// UnionHasOnlyZeroOffsets - Check if a union type has only members with
@@ -2131,7 +2165,7 @@
 
     if (isBitfield(UnionField)) {
       unsigned FieldSizeInBits = TREE_INT_CST_LOW(DECL_SIZE(UnionField));
-      Info.addNewBitField(FieldSizeInBits, 0);
+      Info.addNewBitField(FieldSizeInBits, 0, 0);
     } else {
       Info.allFieldsAreNotBitFields();
       Info.addElement(UnionTy, 0, Info.getTypeSize(UnionTy));




From baldrick at free.fr  Wed Apr  7 12:02:33 2010
From: baldrick at free.fr (Duncan Sands)
Date: Wed, 07 Apr 2010 19:02:33 +0200
Subject: [llvm-commits] [llvm-gcc-4.2] r100632
	-	/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp
In-Reply-To: <20100407164518.0FCB12A6C12C@llvm.org>
References: <20100407164518.0FCB12A6C12C@llvm.org>
Message-ID: <4BBCBAA9.2020802@free.fr>

Hi Jakob,

> For example, this struct:
>
>    struct S {
>      int a, b;
>      void *c;
>      unsigned d : 8;
>      unsigned e : 8;
>    };
>
> used to be:
>
>    %struct.S = type { i32, i32, i8*, i8, i8 },
>
> but now it becomes:
>
>    %struct.S = type { i32, i32, i8*, i16 }

this means that you can't write S.d and S.e separately, so if one thread is
writing to S.d while another writes to S.e then you are in trouble.  Also, if
this type is describing a memory mapped I/O region then writing or reading both
d and e when the original code only touches one of them is bad.  I appreciate
that the C standard doesn't require this kind of thing to work, but I'm pretty
sure the Ada standard does [*].  (That said, what llvm-gcc did before was
already broken for Ada, due to this kind of thing).

I would much rather see reads and writes touch as little as possible, and
have the optimizers combine them if they can prove that it is safe.

Another possibility is to handle volatile reads and writes specially, making
only these be minimal.

Ciao,

Duncan.

[*] The Ada standard requires reads and writes marked atomic to be rejected
as an error by the compiler if it isn't able to turn them into appropriate
atomic processor operations.  The Ada front-end "knows" what kinds of things
gcc will turn into atomic operations, and so knows what it has to reject.
Unfortunately llvm-gcc doesn't make exactly the same decisions...


From stoklund at 2pi.dk  Wed Apr  7 12:19:07 2010
From: stoklund at 2pi.dk (Jakob Stoklund Olesen)
Date: Wed, 7 Apr 2010 10:19:07 -0700
Subject: [llvm-commits] [llvm-gcc-4.2] r100632
	-	/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp
In-Reply-To: <4BBCBAA9.2020802@free.fr>
References: <20100407164518.0FCB12A6C12C@llvm.org> <4BBCBAA9.2020802@free.fr>
Message-ID: <05AB1A9F-1C8D-4F77-AE0B-7555D620260B@2pi.dk>


On Apr 7, 2010, at 10:02 AM, Duncan Sands wrote:

> Hi Jakob,
> 
>> For example, this struct:
>> 
>>   struct S {
>>     int a, b;
>>     void *c;
>>     unsigned d : 8;
>>     unsigned e : 8;
>>   };
>> 
>> used to be:
>> 
>>   %struct.S = type { i32, i32, i8*, i8, i8 },
>> 
>> but now it becomes:
>> 
>>   %struct.S = type { i32, i32, i8*, i16 }
> 
> this means that you can't write S.d and S.e separately, so if one thread is
> writing to S.d while another writes to S.e then you are in trouble.  Also, if
> this type is describing a memory mapped I/O region then writing or reading both
> d and e when the original code only touches one of them is bad.  I appreciate
> that the C standard doesn't require this kind of thing to work, but I'm pretty
> sure the Ada standard does [*].  (That said, what llvm-gcc did before was
> already broken for Ada, due to this kind of thing).

You want atomic bitfield access??

It sounds to me like the Ada front end should convert bit fields to integer fields if they require atomic access.

Did it work before? The code generated for the example above used 32-bit loads and stores before my change.

> [*] The Ada standard requires reads and writes marked atomic to be rejected
> as an error by the compiler if it isn't able to turn them into appropriate
> atomic processor operations.  The Ada front-end "knows" what kinds of things
> gcc will turn into atomic operations, and so knows what it has to reject.
> Unfortunately llvm-gcc doesn't make exactly the same decisions...

So the model goes from broken to broken with this change. That's not a regression ;-)




From dalej at apple.com  Wed Apr  7 12:30:54 2010
From: dalej at apple.com (Dale Johannesen)
Date: Wed, 07 Apr 2010 17:30:54 -0000
Subject: [llvm-commits] [test-suite] r100634 -
	/test-suite/trunk/TEST.optllcdbg.Makefile
Message-ID: <20100407173054.9B6412A6C12C@llvm.org>

Author: johannes
Date: Wed Apr  7 12:30:54 2010
New Revision: 100634

URL: http://llvm.org/viewvc/llvm-project?rev=100634&view=rev
Log:
Keep some more intermediate files around.


Modified:
    test-suite/trunk/TEST.optllcdbg.Makefile

Modified: test-suite/trunk/TEST.optllcdbg.Makefile
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/TEST.optllcdbg.Makefile?rev=100634&r1=100633&r2=100634&view=diff
==============================================================================
--- test-suite/trunk/TEST.optllcdbg.Makefile (original)
+++ test-suite/trunk/TEST.optllcdbg.Makefile Wed Apr  7 12:30:54 2010
@@ -18,7 +18,7 @@
 TARGET_FLAGS = -g -O0
 LLC_DEBUG_FLAGS = -O3
 OPT_FLAGS = -std-compile-opts
-.PRECIOUS: Output/%.first.s Output/%.second.s Output/%.t2c.s Output/%.t1c.s Output/%.t2b.bc Output/%.t1b.bc
+.PRECIOUS: Output/%.first.s Output/%.second.s Output/%.t2c.s Output/%.t1c.s Output/%.t2b.bc Output/%.t1b.bc Output/%.t1a.bc Output/%.t2a.bc
 
 $(PROGRAMS_TO_TEST:%=test.$(TEST).%): \
 test.$(TEST).%: Output/%.diff




From sabre at nondot.org  Wed Apr  7 13:03:19 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 18:03:19 -0000
Subject: [llvm-commits] [llvm] r100636 - in /llvm/trunk:
 lib/CodeGen/MachineInstr.cpp test/CodeGen/X86/crash.ll
Message-ID: <20100407180319.848B72A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 13:03:19 2010
New Revision: 100636

URL: http://llvm.org/viewvc/llvm-project?rev=100636&view=rev
Log:
fix a latent bug my inline asm stuff exposed: 
MachineOperand::isIdenticalTo wasn't handling metadata operands.

Modified:
    llvm/trunk/lib/CodeGen/MachineInstr.cpp
    llvm/trunk/test/CodeGen/X86/crash.ll

Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=100636&r1=100635&r2=100636&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Wed Apr  7 13:03:19 2010
@@ -192,6 +192,8 @@
     return getBlockAddress() == Other.getBlockAddress();
   case MachineOperand::MO_MCSymbol:
     return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_Metadata:
+    return getMetadata() == Other.getMetadata();
   }
 }
 

Modified: llvm/trunk/test/CodeGen/X86/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/crash.ll?rev=100636&r1=100635&r2=100636&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/crash.ll (original)
+++ llvm/trunk/test/CodeGen/X86/crash.ll Wed Apr  7 13:03:19 2010
@@ -92,3 +92,19 @@
 }
 
 
+; Crash commoning identical asms.
+define void @test6(i1 %C) nounwind optsize ssp {
+entry:
+  br i1 %C, label %do.body55, label %do.body92
+
+do.body55:                                        ; preds = %if.else36
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+  ret void
+
+do.body92:                                        ; preds = %if.then66
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !1
+  ret void
+}
+
+!0 = metadata !{i32 633550}                       
+!1 = metadata !{i32 634261}                       




From sabre at nondot.org  Wed Apr  7 13:04:56 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 18:04:56 -0000
Subject: [llvm-commits] [llvm] r100637 -
	/llvm/trunk/test/CodeGen/X86/crash.ll
Message-ID: <20100407180456.755882A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 13:04:56 2010
New Revision: 100637

URL: http://llvm.org/viewvc/llvm-project?rev=100637&view=rev
Log:
this has a pr!

Modified:
    llvm/trunk/test/CodeGen/X86/crash.ll

Modified: llvm/trunk/test/CodeGen/X86/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/crash.ll?rev=100637&r1=100636&r2=100637&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/crash.ll (original)
+++ llvm/trunk/test/CodeGen/X86/crash.ll Wed Apr  7 13:04:56 2010
@@ -93,6 +93,7 @@
 
 
 ; Crash commoning identical asms.
+; PR6803
 define void @test6(i1 %C) nounwind optsize ssp {
 entry:
   br i1 %C, label %do.body55, label %do.body92




From sabre at nondot.org  Wed Apr  7 13:10:38 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 18:10:38 -0000
Subject: [llvm-commits] [llvm] r100638 -
	/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Message-ID: <20100407181038.5EF7D2A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 13:10:38 2010
New Revision: 100638

URL: http://llvm.org/viewvc/llvm-project?rev=100638&view=rev
Log:
add a comment line that got dropped

Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100638&r1=100637&r2=100638&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Wed Apr  7 13:10:38 2010
@@ -1,3 +1,4 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -44,11 +45,9 @@
 #include "llvm/Support/Timer.h"
 using namespace llvm;
 
-namespace {
-  const char *DWARFGroupName = "DWARF Emission";
-  const char *DbgTimerName = "DWARF Debug Writer";
-  const char *EHTimerName = "DWARF Exception Writer";
-} // end anonymous namespace
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 




From clattner at apple.com  Wed Apr  7 13:13:07 2010
From: clattner at apple.com (Chris Lattner)
Date: Wed, 7 Apr 2010 11:13:07 -0700
Subject: [llvm-commits] [llvm-gcc-4.2] r100632	-
	/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp
In-Reply-To: <4BBCBAA9.2020802@free.fr>
References: <20100407164518.0FCB12A6C12C@llvm.org> <4BBCBAA9.2020802@free.fr>
Message-ID: <4B5B0F7F-1D15-4AE8-A76C-479E235FE7B5@apple.com>

On Apr 7, 2010, at 10:02 AM, Duncan Sands wrote:
> this means that you can't write S.d and S.e separately, so if one thread is
> writing to S.d while another writes to S.e then you are in trouble.  Also, if
> this type is describing a memory mapped I/O region then writing or reading both
> d and e when the original code only touches one of them is bad.  I appreciate
> that the C standard doesn't require this kind of thing to work, but I'm pretty
> sure the Ada standard does [*].  (That said, what llvm-gcc did before was
> already broken for Ada, due to this kind of thing).
> 
> I would much rather see reads and writes touch as little as possible, and
> have the optimizers combine them if they can prove that it is safe.

Bitfields in C and C++ (even in C++'0x with the new memory model) don't require this.  It is much much better for performance to do what Jakob is doing, and GCC already does it.  I don't know anything about the Ada case though.

-Chris


From sabre at nondot.org  Wed Apr  7 13:13:33 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 18:13:33 -0000
Subject: [llvm-commits] [llvm] r100639 -
	/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Message-ID: <20100407181333.7F77E2A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 13:13:33 2010
New Revision: 100639

URL: http://llvm.org/viewvc/llvm-project?rev=100639&view=rev
Log:
fix 80 col violation, patch by Alastair Lynn

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=100639&r1=100638&r2=100639&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Apr  7 13:13:33 2010
@@ -3779,7 +3779,8 @@
   if (N0.getOpcode() == ISD::TRUNCATE)
     return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
-  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND ||
       N0.getOpcode() == ISD::ANY_EXTEND) {
     if (N0.getOperand(0).getValueType().bitsLT(VT))
       // if the source is smaller than the dest, we still need an extend




From clattner at apple.com  Wed Apr  7 13:14:24 2010
From: clattner at apple.com (Chris Lattner)
Date: Wed, 7 Apr 2010 11:14:24 -0700
Subject: [llvm-commits] Trivial patch to fix an 80 column violation in
	DAGCombiner.cpp
In-Reply-To: <70DB0CB9-2A51-4D07-823A-8D1C67846D60@gmail.com>
References: <70DB0CB9-2A51-4D07-823A-8D1C67846D60@gmail.com>
Message-ID: <7B967EDC-2B6D-4378-AA86-8CA764D46CF9@apple.com>

Applied in r100639, thanks!

On Apr 7, 2010, at 6:14 AM, Alastair Lynn wrote:

> Hi-
> 
> Just noticed this as I was looking through.
> 
> Alastair
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



From isanbard at gmail.com  Wed Apr  7 13:18:02 2010
From: isanbard at gmail.com (Bill Wendling)
Date: Wed, 7 Apr 2010 11:18:02 -0700
Subject: [llvm-commits] [llvm] r100616 - in
	/llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp
	DwarfDebug.h DwarfException.cpp DwarfException.h
In-Reply-To: <4BBC6297.1070400@gmail.com>
References: <20100407092804.B65FE2A6C12C@llvm.org>
	<4BBC5C78.6050108@gmail.com> <4BBC6297.1070400@gmail.com>
Message-ID: 

On Apr 7, 2010, at 3:46 AM, T?r?k Edwin wrote:

> On 04/07/2010 01:20 PM, T?r?k Edwin wrote:
>> On 04/07/2010 12:28 PM, Bill Wendling wrote:
>>> Author: void
>>> Date: Wed Apr  7 04:28:04 2010
>>> New Revision: 100616
>>> 
>>> URL: http://llvm.org/viewvc/llvm-project?rev=100616&view=rev
>>> Log:
>>> Use the "NamedGroupTimer" class to categorize DWARF emission better.
>> 
>> Hi Bill,
>> 
>> Looks like this change broke most of the buildbots, because the dwarf
>> timers are now shown unconditionally on stderr when llc quits, see for
>> example:
>> http://google1.osuosl.org:8011/builders/clang-i686-linux/builds/6270/steps/test-llvm/logs/2007-03-07-combinercrash.ll
>> 
> 
Gah! I shouldn't try to program at 2AM. :-(

> I guarded all your timer creations with TimePassesIsEnabled in r100618,
> this should unbreak the buildbots.

Thanks. I'll check them out. Sorry for the breakage.

-bw




From asl at math.spbu.ru  Wed Apr  7 13:18:42 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:18:42 -0000
Subject: [llvm-commits] [llvm] r100640 - in /llvm/trunk:
 include/llvm/Target/TargetMachine.h lib/CodeGen/LLVMTargetMachine.cpp
Message-ID: <20100407181842.9D7192A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:18:42 2010
New Revision: 100640

URL: http://llvm.org/viewvc/llvm-project?rev=100640&view=rev
Log:
Add hook to insert late LLVM=>LLVM passes just before isel

Modified:
    llvm/trunk/include/llvm/Target/TargetMachine.h
    llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp

Modified: llvm/trunk/include/llvm/Target/TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetMachine.h?rev=100640&r1=100639&r2=100640&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetMachine.h (original)
+++ llvm/trunk/include/llvm/Target/TargetMachine.h Wed Apr  7 13:18:42 2010
@@ -264,10 +264,15 @@
                                           bool DisableVerify = true);
   
   /// Target-Independent Code Generator Pass Configuration Options.
-  
-  /// addInstSelector - This method should add any "last minute" LLVM->LLVM
-  /// passes, then install an instruction selector pass, which converts from
-  /// LLVM code to machine instructions.
+
+  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
+  /// passes (which are run just before instruction selector).
+  virtual bool addPreISel(PassManagerBase &, CodeGenOpt::Level) {
+    return true;
+  }
+
+  /// addInstSelector - This method should install an instruction selector pass,
+  /// which converts from LLVM code to machine instructions.
   virtual bool addInstSelector(PassManagerBase &, CodeGenOpt::Level) {
     return true;
   }

Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=100640&r1=100639&r2=100640&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original)
+++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Wed Apr  7 13:18:42 2010
@@ -268,6 +268,8 @@
 
   PM.add(createStackProtectorPass(getTargetLowering()));
 
+  addPreISel(PM, OptLevel);
+
   if (PrintISelInput)
     PM.add(createPrintFunctionPass("\n\n"
                                    "*** Final LLVM Code input to ISel ***\n",




From asl at math.spbu.ru  Wed Apr  7 13:19:07 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:07 -0000
Subject: [llvm-commits] [llvm] r100641 - in /llvm/trunk/lib/Target/ARM:
 ARM.h ARMGlobalMerge.cpp ARMTargetMachine.cpp ARMTargetMachine.h
Message-ID: <20100407181907.D64702A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:07 2010
New Revision: 100641

URL: http://llvm.org/viewvc/llvm-project?rev=100641&view=rev
Log:
Some initial version of global merger

Added:
    llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp
Modified:
    llvm/trunk/lib/Target/ARM/ARM.h
    llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
    llvm/trunk/lib/Target/ARM/ARMTargetMachine.h

Modified: llvm/trunk/lib/Target/ARM/ARM.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.h?rev=100641&r1=100640&r2=100641&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.h (original)
+++ llvm/trunk/lib/Target/ARM/ARM.h Wed Apr  7 13:19:07 2010
@@ -98,6 +98,7 @@
 
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createNEONPreAllocPass();
 FunctionPass *createNEONMoveFixPass();

Added: llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp?rev=100641&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp (added)
+++ llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp Wed Apr  7 13:19:07 2010
@@ -0,0 +1,135 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging  --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+  class VISIBILITY_HIDDEN ARMGlobalMerge : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// target type sizes.
+    const TargetLowering *TLI;
+
+    std::vector InternalGlobals;
+
+  public:
+    static char ID;             // Pass identification, replacement for typeid.
+    explicit ARMGlobalMerge(const TargetLowering *tli)
+      : FunctionPass(&ID), TLI(tli) {}
+
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function& F);
+
+    const char *getPassName() const {
+      return "Merge internal globals";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+    struct GlobalCmp {
+      const TargetData *TD;
+
+      GlobalCmp(const TargetData *td):
+        TD(td) { };
+
+      bool operator() (const GlobalVariable* GV1,
+                       const GlobalVariable* GV2) {
+        const Type* Ty1 = cast(GV1->getType())->getElementType();
+        const Type* Ty2 = cast(GV2->getType())->getElementType();
+
+        return (TD->getTypeAllocSize(Ty1) <
+                TD->getTypeAllocSize(Ty2));
+      }
+    };
+  };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+#define MAX_OFFSET 4095
+
+bool ARMGlobalMerge::doInitialization(Module& M) {
+  const TargetData *TD = TLI->getTargetData();
+
+  for (Module::global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    // FIXME: Can we just grab all 'local' vars here?
+    // Won't we break some semantics?
+    if (I->hasInternalLinkage() &&
+        TD->getTypeAllocSize(I->getType()) < MAX_OFFSET)
+      InternalGlobals.push_back(I);
+  }
+
+  // FIXME: Find better heuristics
+  std::stable_sort(InternalGlobals.begin(), InternalGlobals.end(),
+                   GlobalCmp(TD));
+
+  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+  for (size_t i = 0, e = InternalGlobals.size(); i != e; ) {
+    size_t j = 0;
+    uint64_t MergedSize = 0;
+    std::vector Tys;
+    std::vector Inits;
+    for (j = i; MergedSize < MAX_OFFSET && j != e; ++j) {
+      const Type* Ty =
+        cast(InternalGlobals[j]->getType())->getElementType();
+      Tys.push_back(Ty);
+      Inits.push_back(InternalGlobals[j]->getInitializer());
+      MergedSize += TD->getTypeAllocSize(Ty);
+    }
+
+    StructType* MergedTy = StructType::get(M.getContext(), Tys);
+    Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
+    // FIXME: Should we handle constants and 'normal' globals separately?
+    GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, false,
+                                                  GlobalValue::InternalLinkage,
+                                                  MergedInit, "merged");
+    for (size_t k = i; k < j; ++k) {
+      SmallVector Idx;
+      Idx.push_back(ConstantInt::get(Int32Ty, 0));
+      Idx.push_back(ConstantInt::get(Int32Ty, k-i));
+
+      Constant* GEP =
+        ConstantExpr::getInBoundsGetElementPtr(MergedGV,
+                                               &Idx[0], Idx.size());
+
+      InternalGlobals[k]->replaceAllUsesWith(GEP);
+    }
+    i = j;
+  }
+
+  return true;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function& F) {
+  return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+  return new ARMGlobalMerge(tli);
+}

Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=100641&r1=100640&r2=100641&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Wed Apr  7 13:19:07 2010
@@ -79,9 +79,15 @@
     TLInfo(*this) {
 }
 
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (OptLevel != CodeGenOpt::None)
+    PM.add(createARMGlobalMergePass(getTargetLowering()));
 
+  return false;
+}
 
-// Pass Pipeline Configuration
 bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
                                            CodeGenOpt::Level OptLevel) {
   PM.add(createARMISelDag(*this, OptLevel));

Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.h?rev=100641&r1=100640&r2=100641&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.h Wed Apr  7 13:19:07 2010
@@ -49,6 +49,7 @@
   }
 
   // Pass Pipeline Configuration
+  virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);




From asl at math.spbu.ru  Wed Apr  7 13:19:13 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:13 -0000
Subject: [llvm-commits] [llvm] r100642 -
	/llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp
Message-ID: <20100407181913.925CA2A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:13 2010
New Revision: 100642

URL: http://llvm.org/viewvc/llvm-project?rev=100642&view=rev
Log:
Separate const from non-const stuff during mergeing

Modified:
    llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp?rev=100642&r1=100641&r2=100642&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp Wed Apr  7 13:19:13 2010
@@ -30,8 +30,7 @@
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// target type sizes.
     const TargetLowering *TLI;
-
-    std::vector InternalGlobals;
+    bool doMerge(std::vector &Globals, Module &M, bool) const;
 
   public:
     static char ID;             // Pass identification, replacement for typeid.
@@ -72,41 +71,30 @@
 
 #define MAX_OFFSET 4095
 
-bool ARMGlobalMerge::doInitialization(Module& M) {
+bool ARMGlobalMerge::doMerge(std::vector &Globals,
+                             Module &M, bool isConst) const {
   const TargetData *TD = TLI->getTargetData();
 
-  for (Module::global_iterator I = M.global_begin(),
-         E = M.global_end(); I != E; ++I) {
-    // FIXME: Can we just grab all 'local' vars here?
-    // Won't we break some semantics?
-    if (I->hasInternalLinkage() &&
-        TD->getTypeAllocSize(I->getType()) < MAX_OFFSET)
-      InternalGlobals.push_back(I);
-  }
-
   // FIXME: Find better heuristics
-  std::stable_sort(InternalGlobals.begin(), InternalGlobals.end(),
-                   GlobalCmp(TD));
+  std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
 
   const Type *Int32Ty = Type::getInt32Ty(M.getContext());
 
-  for (size_t i = 0, e = InternalGlobals.size(); i != e; ) {
+  for (size_t i = 0, e = Globals.size(); i != e; ) {
     size_t j = 0;
     uint64_t MergedSize = 0;
     std::vector Tys;
     std::vector Inits;
     for (j = i; MergedSize < MAX_OFFSET && j != e; ++j) {
-      const Type* Ty =
-        cast(InternalGlobals[j]->getType())->getElementType();
+      const Type* Ty = Globals[j]->getType()->getElementType();
       Tys.push_back(Ty);
-      Inits.push_back(InternalGlobals[j]->getInitializer());
+      Inits.push_back(Globals[j]->getInitializer());
       MergedSize += TD->getTypeAllocSize(Ty);
     }
 
     StructType* MergedTy = StructType::get(M.getContext(), Tys);
     Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
-    // FIXME: Should we handle constants and 'normal' globals separately?
-    GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, false,
+    GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
                                                   GlobalValue::InternalLinkage,
                                                   MergedInit, "merged");
     for (size_t k = i; k < j; ++k) {
@@ -118,7 +106,8 @@
         ConstantExpr::getInBoundsGetElementPtr(MergedGV,
                                                &Idx[0], Idx.size());
 
-      InternalGlobals[k]->replaceAllUsesWith(GEP);
+      Globals[k]->replaceAllUsesWith(GEP);
+      Globals[k]->eraseFromParent();
     }
     i = j;
   }
@@ -126,6 +115,31 @@
   return true;
 }
 
+
+bool ARMGlobalMerge::doInitialization(Module& M) {
+  std::vector Globals, ConstGlobals;
+  bool Changed = false;
+  const TargetData *TD = TLI->getTargetData();
+
+  // Grab all non-const globals.
+  for (Module::global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    // Ignore fancy-aligned globals for now.
+    if (I->hasLocalLinkage() && I->getAlignment() == 0 &&
+        TD->getTypeAllocSize(I->getType()) < MAX_OFFSET) {
+      if (I->isConstant())
+        ConstGlobals.push_back(I);
+      else
+        Globals.push_back(I);
+    }
+  }
+
+  Changed |= doMerge(Globals, M, false);
+  Changed |= doMerge(ConstGlobals, M, true);
+
+  return Changed;
+}
+
 bool ARMGlobalMerge::runOnFunction(Function& F) {
   return false;
 }




From asl at math.spbu.ru  Wed Apr  7 13:19:19 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:19 -0000
Subject: [llvm-commits] [llvm] r100643 - in /llvm/trunk/lib/Target/ARM:
	ARM.td ARMScheduleV7.td
Message-ID: <20100407181919.0E4C82A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:18 2010
New Revision: 100643

URL: http://llvm.org/viewvc/llvm-project?rev=100643&view=rev
Log:
Some bits of A9 scheduling: VFP

Modified:
    llvm/trunk/lib/Target/ARM/ARM.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=100643&r1=100642&r2=100643&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.td (original)
+++ llvm/trunk/lib/Target/ARM/ARM.td Wed Apr  7 13:19:18 2010
@@ -124,7 +124,8 @@
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
                  FeatureNEONForFP]>;
-def : ProcNoItin<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : Processor<"cortex-a9",        CortexA9Itineraries,
+                [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100643&r1=100642&r2=100643&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:19:18 2010
@@ -585,3 +585,103 @@
                                InstrStage<1, [FU_NPipe], 0>,
                                InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
 ]>;
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
+//
+def CortexA9Itineraries : ProcessorItineraries<[
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit.
+  //
+
+  // FP Special Register to Integer Register File Move
+  InstrItinData,
+                              InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+
+  //
+  // Single-precision FP Compare
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData,
+                               InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData,
+                               InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData,
+                               InstrStage<13, [FU_NPipe]>], [17, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData,
+                               InstrStage<28, [FU_NPipe]>], [32, 1]>
+]>;
+
+




From asl at math.spbu.ru  Wed Apr  7 13:19:24 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:24 -0000
Subject: [llvm-commits] [llvm] r100644 - in /llvm/trunk/lib/CodeGen:
 ExactHazardRecognizer.cpp ExactHazardRecognizer.h
Message-ID: <20100407181924.64B752A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:24 2010
New Revision: 100644

URL: http://llvm.org/viewvc/llvm-project?rev=100644&view=rev
Log:
Factor out scoreboard into separate class. This way we might have several different score boards.

Modified:
    llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp
    llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h

Modified: llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp?rev=100644&r1=100643&r2=100644&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp Wed Apr  7 13:19:24 2010
@@ -29,7 +29,7 @@
   // Determine the maximum depth of any itinerary. This determines the
   // depth of the scoreboard. We always make the scoreboard at least 1
   // cycle deep to avoid dealing with the boundary condition.
-  ScoreboardDepth = 1;
+  unsigned ScoreboardDepth = 1;
   if (!ItinData.isEmpty()) {
     for (unsigned idx = 0; ; ++idx) {
       if (ItinData.isEndMarker(idx))
@@ -45,35 +45,25 @@
     }
   }
 
-  Scoreboard = new unsigned[ScoreboardDepth];
-  ScoreboardHead = 0;
+  Scoreboard.reset(ScoreboardDepth);
 
   DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " 
                << ScoreboardDepth << '\n');
 }
 
-ExactHazardRecognizer::~ExactHazardRecognizer() {
-  delete [] Scoreboard;
-}
-
 void ExactHazardRecognizer::Reset() {
-  memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
-  ScoreboardHead = 0;
-}
-
-unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
-  return (ScoreboardHead + offset) % ScoreboardDepth;
+  Scoreboard.reset();
 }
 
-void ExactHazardRecognizer::dumpScoreboard() {
+void ExactHazardRecognizer::ScoreBoard::dump() const {
   dbgs() << "Scoreboard:\n";
-  
-  unsigned last = ScoreboardDepth - 1;
-  while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
+
+  unsigned last = Depth - 1;
+  while ((last > 0) && ((*this)[last] == 0))
     last--;
 
   for (unsigned i = 0; i <= last; i++) {
-    unsigned FUs = Scoreboard[getFutureIndex(i)];
+    unsigned FUs = (*this)[i];
     dbgs() << "\t";
     for (int j = 31; j >= 0; j--)
       dbgs() << ((FUs & (1 << j)) ? '1' : '0');
@@ -96,11 +86,10 @@
     // stage is occupied. FIXME it would be more accurate to find the
     // same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) && 
+      assert(((cycle + i) < Scoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+
+      unsigned freeUnits = IS->getUnits() & ~Scoreboard[cycle + i];
       if (!freeUnits) {
         DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
         DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
@@ -108,14 +97,14 @@
         return Hazard;
       }
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
 
   return NoHazard;
 }
-    
+
 void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
   if (ItinData.isEmpty())
     return;
@@ -131,31 +120,30 @@
     // stage is occupied. FIXME it would be more accurate to reserve
     // the same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) &&
+      assert(((cycle + i) < Scoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
-      
+
+      unsigned freeUnits = IS->getUnits() & ~Scoreboard[cycle + i];
+
       // reduce to a single unit
       unsigned freeUnit = 0;
       do {
         freeUnit = freeUnits;
         freeUnits = freeUnit & (freeUnit - 1);
       } while (freeUnits);
-      
+
       assert(freeUnit && "No function unit available!");
-      Scoreboard[index] |= freeUnit;
+      Scoreboard[cycle + i] |= freeUnit;
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
-  
-  DEBUG(dumpScoreboard());
+
+  DEBUG(Scoreboard.dump());
 }
-    
+
 void ExactHazardRecognizer::AdvanceCycle() {
-  Scoreboard[ScoreboardHead] = 0;
-  ScoreboardHead = getFutureIndex(1);
+  Scoreboard[0] = 0;
+  Scoreboard.advance();
 }

Modified: llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h?rev=100644&r1=100643&r2=100644&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h (original)
+++ llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h Wed Apr  7 13:19:24 2010
@@ -22,35 +22,59 @@
 
 namespace llvm {
   class ExactHazardRecognizer : public ScheduleHazardRecognizer {
+    // ScoreBoard to track function unit usage. ScoreBoard[0] is a
+    // mask of the FUs in use in the cycle currently being
+    // schedule. ScoreBoard[1] is a mask for the next cycle. The
+    // ScoreBoard is used as a circular buffer with the current cycle
+    // indicated by Head.
+    class ScoreBoard {
+      unsigned *Data;
+
+      // The maximum number of cycles monitored by the Scoreboard. This
+      // value is determined based on the target itineraries to ensure
+      // that all hazards can be tracked.
+      size_t Depth;
+      // Indices into the Scoreboard that represent the current cycle.
+      size_t Head;
+    public:
+      ScoreBoard():Data(NULL), Depth(0), Head(0) { }
+      ~ScoreBoard() {
+        delete[] Data;
+      }
+
+      size_t getDepth() const { return Depth; }
+      unsigned& operator[](size_t idx) const {
+        assert(Depth && "ScoreBoard was not initialized properly!");
+
+        return Data[(Head + idx) % Depth];
+      }
+
+      void reset(size_t d = 1) {
+        if (Data == NULL) {
+          Depth = d;
+          Data = new unsigned[Depth];
+        }
+
+        memset(Data, 0, Depth * sizeof(Data[0]));
+        Head = 0;
+      }
+
+      void advance() {
+        Head = (Head + 1) % Depth;
+      }
+
+      // Print the scoreboard.
+      void dump() const;
+    };
+
     // Itinerary data for the target.
     const InstrItineraryData &ItinData;
 
-    // Scoreboard to track function unit usage. Scoreboard[0] is a
-    // mask of the FUs in use in the cycle currently being
-    // schedule. Scoreboard[1] is a mask for the next cycle. The
-    // Scoreboard is used as a circular buffer with the current cycle
-    // indicated by ScoreboardHead.
-    unsigned *Scoreboard;
-
-    // The maximum number of cycles monitored by the Scoreboard. This
-    // value is determined based on the target itineraries to ensure
-    // that all hazards can be tracked.
-    unsigned ScoreboardDepth;
-
-    // Indices into the Scoreboard that represent the current cycle.
-    unsigned ScoreboardHead;
-
-    // Return the scoreboard index to use for 'offset' cycles in the
-    // future. 'offset' of 0 returns ScoreboardHead.
-    unsigned getFutureIndex(unsigned offset);
-
-    // Print the scoreboard.
-    void dumpScoreboard();
+    ScoreBoard Scoreboard;
 
   public:
     ExactHazardRecognizer(const InstrItineraryData &ItinData);
-    ~ExactHazardRecognizer();
-    
+
     virtual HazardType getHazardType(SUnit *SU);
     virtual void Reset();
     virtual void EmitInstruction(SUnit *SU);




From asl at math.spbu.ru  Wed Apr  7 13:19:32 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:32 -0000
Subject: [llvm-commits] [llvm] r100645 - in /llvm/trunk:
 include/llvm/Target/TargetInstrItineraries.h
 include/llvm/Target/TargetSchedule.td lib/CodeGen/ExactHazardRecognizer.cpp
 lib/CodeGen/ExactHazardRecognizer.h utils/TableGen/SubtargetEmitter.cpp
Message-ID: <20100407181932.CA17A2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:32 2010
New Revision: 100645

URL: http://llvm.org/viewvc/llvm-project?rev=100645&view=rev
Log:
Initial support for different kinds of FU reservation.

Modified:
    llvm/trunk/include/llvm/Target/TargetInstrItineraries.h
    llvm/trunk/include/llvm/Target/TargetSchedule.td
    llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp
    llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h
    llvm/trunk/utils/TableGen/SubtargetEmitter.cpp

Modified: llvm/trunk/include/llvm/Target/TargetInstrItineraries.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrItineraries.h?rev=100645&r1=100644&r2=100645&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetInstrItineraries.h (original)
+++ llvm/trunk/include/llvm/Target/TargetInstrItineraries.h Wed Apr  7 13:19:32 2010
@@ -47,10 +47,24 @@
 ///      indicate that the instruction requires multiple stages at the
 ///      same time.
 ///
+/// FU reservation can be of two different kinds:
+///  - FUs which instruction actually requires
+///  - FUs which instruction just reserves. Reserved unit is not available for
+///    execution of other instruction. However, several instructions can reserve
+///    the same unit several times.
+/// Such two types of units reservation is used to model instruction domain
+/// change stalls, FUs using the same resource (e.g. same register file), etc.
+
 struct InstrStage {
+  enum ReservationKinds {
+    Required = 0,
+    Reserved = 1
+  };
+
   unsigned Cycles_;  ///< Length of stage in machine cycles
   unsigned Units_;   ///< Choice of functional units
-  int NextCycles_;   ///< Number of machine cycles to next stage 
+  int NextCycles_;   ///< Number of machine cycles to next stage
+  ReservationKinds Kind_; ///< Kind of the FU reservation
 
   /// getCycles - returns the number of cycles the stage is occupied
   unsigned getCycles() const {
@@ -62,6 +76,10 @@
     return Units_;
   }
 
+  ReservationKinds getReservationKind() const {
+    return Kind_;
+  }
+
   /// getNextCycles - returns the number of cycles from the start of
   /// this stage to the start of the next stage in the itinerary
   unsigned getNextCycles() const {

Modified: llvm/trunk/include/llvm/Target/TargetSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSchedule.td?rev=100645&r1=100644&r2=100645&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetSchedule.td (original)
+++ llvm/trunk/include/llvm/Target/TargetSchedule.td Wed Apr  7 13:19:32 2010
@@ -22,6 +22,13 @@
 //  
 class FuncUnit;
 
+class ReservationKind val> {
+  bits<1> Value = val;
+}
+
+def Required : ReservationKind<0>;
+def Reserved : ReservationKind<1>;
+
 //===----------------------------------------------------------------------===//
 // Instruction stage - These values represent a non-pipelined step in
 // the execution of an instruction.  Cycles represents the number of
@@ -36,10 +43,12 @@
 //   InstrStage<1, [FU_x, FU_y]>     - TimeInc defaults to Cycles
 //   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
 //
-class InstrStage units, int timeinc = -1> {
+class InstrStage units,
+                 int timeinc = -1, ReservationKind kind = Required> {
   int Cycles          = cycles;       // length of stage in machine cycles
   list Units = units;       // choice of functional units
   int TimeInc         = timeinc;      // cycles till start of next stage
+  int Kind            = kind.Value;   // kind of FU reservation
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp?rev=100645&r1=100644&r2=100645&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/CodeGen/ExactHazardRecognizer.cpp Wed Apr  7 13:19:32 2010
@@ -45,14 +45,16 @@
     }
   }
 
-  Scoreboard.reset(ScoreboardDepth);
+  ReservedScoreboard.reset(ScoreboardDepth);
+  RequiredScoreboard.reset(ScoreboardDepth);
 
   DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " 
                << ScoreboardDepth << '\n');
 }
 
 void ExactHazardRecognizer::Reset() {
-  Scoreboard.reset();
+  RequiredScoreboard.reset();
+  ReservedScoreboard.reset();
 }
 
 void ExactHazardRecognizer::ScoreBoard::dump() const {
@@ -86,10 +88,23 @@
     // stage is occupied. FIXME it would be more accurate to find the
     // same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < Scoreboard.getDepth()) &&
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
 
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[cycle + i];
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
       if (!freeUnits) {
         DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
         DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
@@ -114,16 +129,28 @@
   // Use the itinerary for the underlying instruction to reserve FU's
   // in the scoreboard at the appropriate future cycles.
   unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx), 
+  for (const InstrStage *IS = ItinData.beginStage(idx),
          *E = ItinData.endStage(idx); IS != E; ++IS) {
     // We must reserve one of the stage's units for every cycle the
     // stage is occupied. FIXME it would be more accurate to reserve
     // the same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < Scoreboard.getDepth()) &&
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
 
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[cycle + i];
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
 
       // reduce to a single unit
       unsigned freeUnit = 0;
@@ -133,17 +160,21 @@
       } while (freeUnits);
 
       assert(freeUnit && "No function unit available!");
-      Scoreboard[cycle + i] |= freeUnit;
+      if (IS->getReservationKind() == InstrStage::Required)
+        RequiredScoreboard[cycle + i] |= freeUnit;
+      else
+        ReservedScoreboard[cycle + i] |= freeUnit;
     }
 
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
 
-  DEBUG(Scoreboard.dump());
+  DEBUG(ReservedScoreboard.dump());
+  DEBUG(RequiredScoreboard.dump());
 }
 
 void ExactHazardRecognizer::AdvanceCycle() {
-  Scoreboard[0] = 0;
-  Scoreboard.advance();
+  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
 }

Modified: llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h?rev=100645&r1=100644&r2=100645&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h (original)
+++ llvm/trunk/lib/CodeGen/ExactHazardRecognizer.h Wed Apr  7 13:19:32 2010
@@ -70,7 +70,8 @@
     // Itinerary data for the target.
     const InstrItineraryData &ItinData;
 
-    ScoreBoard Scoreboard;
+    ScoreBoard ReservedScoreboard;
+    ScoreBoard RequiredScoreboard;
 
   public:
     ExactHazardRecognizer(const InstrItineraryData &ItinData);

Modified: llvm/trunk/utils/TableGen/SubtargetEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/SubtargetEmitter.cpp?rev=100645&r1=100644&r2=100645&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/SubtargetEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/SubtargetEmitter.cpp Wed Apr  7 13:19:32 2010
@@ -216,7 +216,7 @@
     // Next stage
     const Record *Stage = StageList[i];
   
-    // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
+    // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc, kind }
     int Cycles = Stage->getValueAsInt("Cycles");
     ItinString += "  { " + itostr(Cycles) + ", ";
     
@@ -233,6 +233,9 @@
     int TimeInc = Stage->getValueAsInt("TimeInc");
     ItinString += ", " + itostr(TimeInc);
 
+    int Kind = Stage->getValueAsInt("Kind");
+    ItinString += ", (llvm::InstrStage::ReservationKinds)" + itostr(Kind);
+
     // Close off stage
     ItinString += " }";
     if (++i < N) ItinString += ", ";
@@ -278,7 +281,7 @@
 
   // Begin stages table
   std::string StageTable = "static const llvm::InstrStage Stages[] = {\n";
-  StageTable += "  { 0, 0, 0 }, // No itinerary\n";
+  StageTable += "  { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n";
         
   // Begin operand cycle table
   std::string OperandCycleTable = "static const unsigned OperandCycles[] = {\n";
@@ -367,7 +370,7 @@
   }
   
   // Closing stage
-  StageTable += "  { 0, 0, 0 } // End itinerary\n";
+  StageTable += "  { 0, 0, 0, llvm::InstrStage::Required } // End itinerary\n";
   StageTable += "};\n";
 
   // Closing operand cycles




From asl at math.spbu.ru  Wed Apr  7 13:19:40 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:40 -0000
Subject: [llvm-commits] [llvm] r100646 - in /llvm/trunk:
 include/llvm/Target/TargetSchedule.td lib/Target/ARM/ARMSchedule.td
 lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407181940.471422A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:40 2010
New Revision: 100646

URL: http://llvm.org/viewvc/llvm-project?rev=100646&view=rev
Log:
Make use of new reserved/required scheduling stuff: introduce VFP and NEON locks to model domain cross stalls precisly.

Modified:
    llvm/trunk/include/llvm/Target/TargetSchedule.td
    llvm/trunk/lib/Target/ARM/ARMSchedule.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/include/llvm/Target/TargetSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSchedule.td?rev=100646&r1=100645&r2=100646&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetSchedule.td (original)
+++ llvm/trunk/include/llvm/Target/TargetSchedule.td Wed Apr  7 13:19:40 2010
@@ -23,7 +23,7 @@
 class FuncUnit;
 
 class ReservationKind val> {
-  bits<1> Value = val;
+  int Value = val;
 }
 
 def Required : ReservationKind<0>;
@@ -43,14 +43,19 @@
 //   InstrStage<1, [FU_x, FU_y]>     - TimeInc defaults to Cycles
 //   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
 //
-class InstrStage units,
-                 int timeinc = -1, ReservationKind kind = Required> {
+
+class InstrStage2 units,
+                  int timeinc, ReservationKind kind> {
   int Cycles          = cycles;       // length of stage in machine cycles
   list Units = units;       // choice of functional units
   int TimeInc         = timeinc;      // cycles till start of next stage
   int Kind            = kind.Value;   // kind of FU reservation
 }
 
+class InstrStage units,
+                 int timeinc = -1>
+  : InstrStage2;
+
 //===----------------------------------------------------------------------===//
 // Instruction itinerary - An itinerary represents a sequential series of steps
 // required to complete an instruction.  Itineraries are represented as lists of
@@ -71,10 +76,10 @@
 // Instruction itinerary data - These values provide a runtime map of an 
 // instruction itinerary class (name) to its itinerary data.
 //
-class InstrItinData stages,
+class InstrItinData stages,
                     list operandcycles = []> {
   InstrItinClass TheClass = Class;
-  list Stages = stages;
+  list Stages = stages;
   list OperandCycles = operandcycles;
 }
 

Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=100646&r1=100645&r2=100646&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed Apr  7 13:19:40 2010
@@ -17,6 +17,8 @@
 def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
 def FU_NPipe   : FuncUnit; // NEON ALU/MUL pipe
 def FU_NLSPipe : FuncUnit; // NEON LS pipe
+def FU_DRegsVFP: FuncUnit; // FP register set, VFP side
+def FU_DRegsN  : FuncUnit; // FP register set, NEON side
 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100646&r1=100645&r2=100646&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:19:40 2010
@@ -593,94 +593,147 @@
 // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
 //
 def CortexA9Itineraries : ProcessorItineraries<[
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration.
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
+  // stalls.
 
   // VFP
   // Issue through integer pipeline, and execute in NEON unit.
-  //
 
   // FP Special Register to Integer Register File Move
-  InstrItinData,
+  InstrItinData,
+                              InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                               InstrStage<1, [FU_NPipe]>]>,
   //
   // Single-precision FP Unary
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Unary
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
 
   //
   // Single-precision FP Compare
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Compare
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Single to Double FP Convert
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double to Single FP Convert
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Single-precision FP ALU
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Double-precision FP ALU
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Single-precision FP Multiply
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<6, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
   //
   // Double-precision FP Multiply
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<7, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
   //
   // Single-precision FP MAC
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<9, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
   //
   // Double-precision FP MAC
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<10, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>,
   //
   // Single-precision FP DIV
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<16, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
   //
   // Double-precision FP DIV
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<26, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
   //
   // Single-precision FP SQRT
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<18, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<13, [FU_NPipe]>], [17, 1]>,
   //
   // Double-precision FP SQRT
-  InstrItinData,
+  InstrItinData,
+                               InstrStage2<33, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<28, [FU_NPipe]>], [32, 1]>
 ]>;
 




From asl at math.spbu.ru  Wed Apr  7 13:19:46 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:46 -0000
Subject: [llvm-commits] [llvm] r100647 - in /llvm/trunk/lib/Target/ARM:
 ARMInstrVFP.td ARMSchedule.td ARMScheduleV7.td
Message-ID: <20100407181946.530322A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:46 2010
New Revision: 100647

URL: http://llvm.org/viewvc/llvm-project?rev=100647&view=rev
Log:
Add new itin classes for FP16 <-> FP32 conversions and make uise of them for A9.

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
    llvm/trunk/lib/Target/ARM/ARMSchedule.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=100647&r1=100646&r2=100647&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Wed Apr  7 13:19:46 2010
@@ -256,25 +256,25 @@
 // Between half-precision and single-precision.  For disassembly only.
 
 def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def : ARMPat<(f32_to_f16 SPR:$a),
              (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
 
 def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def : ARMPat<(f16_to_f32 GPR:$a),
              (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
 def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 let neverHasSideEffects = 1 in {

Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=100647&r1=100646&r2=100647&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed Apr  7 13:19:46 2010
@@ -71,6 +71,8 @@
 def IIC_fpCMP64    : InstrItinClass;
 def IIC_fpCVTSD    : InstrItinClass;
 def IIC_fpCVTDS    : InstrItinClass;
+def IIC_fpCVTSH    : InstrItinClass;
+def IIC_fpCVTHS    : InstrItinClass;
 def IIC_fpCVTIS    : InstrItinClass;
 def IIC_fpCVTID    : InstrItinClass;
 def IIC_fpCVTSI    : InstrItinClass;

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100647&r1=100646&r2=100647&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:19:46 2010
@@ -651,6 +651,20 @@
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
+
+  //
+  // Single to Half FP Convert
+  InstrItinData,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Half to Single FP Convert
+  InstrItinData,
+                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
+
   //
   // Single-Precision FP to Integer Convert
   InstrItinData,




From asl at math.spbu.ru  Wed Apr  7 13:19:51 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:51 -0000
Subject: [llvm-commits] [llvm] r100648 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407181951.A88E22A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:51 2010
New Revision: 100648

URL: http://llvm.org/viewvc/llvm-project?rev=100648&view=rev
Log:
Proper cycle times for locks, since wbck latency can be larger than fwd latency.

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100648&r1=100647&r2=100648&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:19:51 2010
@@ -600,7 +600,8 @@
   //
   // Every VFP instruction:
   //  - Acquires DRegsVFP resource for 1 cycle
-  //  - Reserves DRegsN resource for the whole duration.
+  //  - Reserves DRegsN resource for the whole duration (including time to
+  //    register file writeback!).
   // Every NEON instruction does the same but with FUs swapped.
   //
   // Since the reserved FU cannot be acquired this models precisly "cross-domain"
@@ -612,39 +613,43 @@
   // FP Special Register to Integer Register File Move
   InstrItinData,
                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                              InstrStage<1, [FU_NPipe]>]>,
+                              InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                              InstrStage<1,  [FU_NPipe]>]>,
   //
   // Single-precision FP Unary
   InstrItinData,
-                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Unary
   InstrItinData,
-                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
 
   //
   // Single-precision FP Compare
   InstrItinData,
-                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+                               // Extra 3 latency cycle since wbck is 4 cycles
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Compare
   InstrItinData,
-                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+                               // Extra 3 latency cycle since wbck is 4 cycles
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
   //
   // Single to Double FP Convert
   InstrItinData,
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
   //
   // Double to Single FP Convert
   InstrItinData,
@@ -662,93 +667,93 @@
   // Half to Single FP Convert
   InstrItinData,
                                InstrStage2<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [2, 1]>,
 
   //
   // Single-Precision FP to Integer Convert
   InstrItinData,
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
   //
   // Double-Precision FP to Integer Convert
   InstrItinData,
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Single-Precision FP Convert
   InstrItinData,
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Double-Precision FP Convert
   InstrItinData,
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
   //
   // Single-precision FP ALU
   InstrItinData,
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [4, 1, 1]>,
   //
   // Double-precision FP ALU
   InstrItinData,
                                InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [4, 1, 1]>,
   //
   // Single-precision FP Multiply
   InstrItinData,
                                InstrStage2<6, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [5, 1, 1]>,
   //
   // Double-precision FP Multiply
   InstrItinData,
                                InstrStage2<7, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2,  [FU_NPipe]>], [6, 1, 1]>,
   //
   // Single-precision FP MAC
   InstrItinData,
                                InstrStage2<9, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [8, 0, 1, 1]>,
   //
   // Double-precision FP MAC
-  InstrItinData,
+  InstrItinData,
                                InstrStage2<10, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>,
+                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2,   [FU_NPipe]>], [9, 0, 1, 1]>,
   //
   // Single-precision FP DIV
-  InstrItinData,
+  InstrItinData,
                                InstrStage2<16, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
+                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<10,  [FU_NPipe]>], [15, 1, 1]>,
   //
   // Double-precision FP DIV
-  InstrItinData,
+  InstrItinData,
                                InstrStage2<26, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
+                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<20,  [FU_NPipe]>], [25, 1, 1]>,
   //
   // Single-precision FP SQRT
-  InstrItinData,
+  InstrItinData,
                                InstrStage2<18, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<13, [FU_NPipe]>], [17, 1]>,
+                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<13,  [FU_NPipe]>], [17, 1]>,
   //
   // Double-precision FP SQRT
-  InstrItinData,
+  InstrItinData,
                                InstrStage2<33, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<28, [FU_NPipe]>], [32, 1]>
+                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<28,  [FU_NPipe]>], [32, 1]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:19:56 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:19:56 -0000
Subject: [llvm-commits] [llvm] r100649 -
	/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
Message-ID: <20100407181956.ACBBD2A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:19:56 2010
New Revision: 100649

URL: http://llvm.org/viewvc/llvm-project?rev=100649&view=rev
Log:
FCONST{S,D} behaves the same way as FP unary instructions. This is true for both A8 and A9.

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=100649&r1=100648&r2=100649&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Wed Apr  7 13:19:56 2010
@@ -678,7 +678,7 @@
 // Materialize FP immediates. VFP3 only.
 let isReMaterializable = 1 in {
 def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
-                    VFPMiscFrm, IIC_VMOVImm,
+                    VFPMiscFrm, IIC_fpUNA64,
                     "vmov", ".f64\t$dst, $imm",
                     [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
@@ -689,7 +689,7 @@
 }
 
 def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
-                    VFPMiscFrm, IIC_VMOVImm,
+                    VFPMiscFrm, IIC_fpUNA32,
                     "vmov", ".f32\t$dst, $imm",
                     [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;




From asl at math.spbu.ru  Wed Apr  7 13:20:02 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:02 -0000
Subject: [llvm-commits] [llvm] r100650 - in /llvm/trunk/lib/Target/ARM:
 ARMInstrVFP.td ARMSchedule.td ARMScheduleV7.td
Message-ID: <20100407182002.706222A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:02 2010
New Revision: 100650

URL: http://llvm.org/viewvc/llvm-project?rev=100650&view=rev
Log:
Define new itin classes for ARM <-> VFP reg moves to distinguish from NEON ops. Define proper scheduling itinerary for them on A9. A8 TRM does not specify latency for them at all :(

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
    llvm/trunk/lib/Target/ARM/ARMSchedule.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=100650&r1=100649&r2=100650&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Wed Apr  7 13:20:02 2010
@@ -306,23 +306,23 @@
 //
 
 def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
-                 IIC_VMOVSI, "vmov", "\t$dst, $src",
+                 IIC_fpMOVSI, "vmov", "\t$dst, $src",
                  [(set GPR:$dst, (bitconvert SPR:$src))]>;
 
 def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
-                 IIC_VMOVIS, "vmov", "\t$dst, $src",
+                 IIC_fpMOVIS, "vmov", "\t$dst, $src",
                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
 
 def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                       (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
-                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src",
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
                  [/* FIXME: Can't write pattern for multiple result instr*/]> {
   let Inst{7-6} = 0b00;
 }
 
 def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                       (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
-                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
 }
@@ -332,14 +332,14 @@
 
 def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                      (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "vmov", "\t$dst, $src1, $src2",
+                IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2",
                 [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
   let Inst{7-6} = 0b00;
 }
 
 def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+                IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
 }

Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=100650&r1=100649&r2=100650&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed Apr  7 13:20:02 2010
@@ -77,6 +77,10 @@
 def IIC_fpCVTID    : InstrItinClass;
 def IIC_fpCVTSI    : InstrItinClass;
 def IIC_fpCVTDI    : InstrItinClass;
+def IIC_fpMOVIS    : InstrItinClass;
+def IIC_fpMOVID    : InstrItinClass;
+def IIC_fpMOVSI    : InstrItinClass;
+def IIC_fpMOVDI    : InstrItinClass;
 def IIC_fpALU32    : InstrItinClass;
 def IIC_fpALU64    : InstrItinClass;
 def IIC_fpMUL32    : InstrItinClass;

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100650&r1=100649&r2=100650&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:02 2010
@@ -753,7 +753,34 @@
   InstrItinData,
                                InstrStage2<33, [FU_DRegsN],   0, Reserved>,
                                InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<28,  [FU_NPipe]>], [32, 1]>
+                               InstrStage<28,  [FU_NPipe]>], [32, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1,  [FU_NPipe]>], [1, 1, 1]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:20:07 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:07 -0000
Subject: [llvm-commits] [llvm] r100651 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182007.5526E2A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:07 2010
New Revision: 100651

URL: http://llvm.org/viewvc/llvm-project?rev=100651&view=rev
Log:
Some easy NEON scheduling goodness for A9

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100651&r1=100650&r2=100651&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:07 2010
@@ -780,7 +780,59 @@
   InstrItinData,
                                InstrStage2<2, [FU_DRegsN],   0, Reserved>,
                                InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1, 1]>
+                               InstrStage<1,  [FU_NPipe]>], [1, 1, 1]>,
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+
+  //
+  // Double-register Integer Binary
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:20:13 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:13 -0000
Subject: [llvm-commits] [llvm] r100652 - in /llvm/trunk/lib/Target/ARM:
 ARMInstrNEON.td ARMSchedule.td ARMScheduleV7.td
Message-ID: <20100407182013.6632B2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:13 2010
New Revision: 100652

URL: http://llvm.org/viewvc/llvm-project?rev=100652&view=rev
Log:
VHADD differs from VHSUB at least on A9 - the former reads both operands in the second cycle, while the latter reads second operand in first cycle. Introduce new itin classes to catch this behavior. Whether this is true for A8 as well is WIP.

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
    llvm/trunk/lib/Target/ARM/ARMSchedule.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100652&r1=100651&r2=100652&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:20:13 2010
@@ -2257,17 +2257,17 @@
 defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
 //   VHSUB    : Vector Halving Subtract
 defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vhsub", "s", int_arm_neon_vhsubs, 0>;
 defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vhsub", "u", int_arm_neon_vhsubu, 0>;
 //   VQSUB    : Vector Saturing Subtract
 defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vqsub", "s", int_arm_neon_vqsubs, 0>;
 defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vqsub", "u", int_arm_neon_vqsubu, 0>;
 //   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
 defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
@@ -2279,8 +2279,8 @@
 // Vector Comparisons.
 
 //   VCEQ     : Vector Compare Equal
-defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>;
+defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
 def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
@@ -2290,10 +2290,10 @@
                            "$dst, $src, #0">;
 
 //   VCGE     : Vector Compare Greater Than or Equal
-defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
-defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>;
+defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 
+                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
 def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
                      NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
@@ -2306,10 +2306,10 @@
                             "$dst, $src, #0">;
 
 //   VCGT     : Vector Compare Greater Than
-defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
-defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>;
+defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
 def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
@@ -2446,11 +2446,19 @@
 // Vector Absolute Differences.
 
 //   VABD     : Vector Absolute Difference
+<<<<<<< HEAD
 defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
                             IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
                            "vabd", "s", int_arm_neon_vabds, 0>;
 defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
                             IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+=======
+defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VSUBi4D, IIC_VSUBi4D,
+                           IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vabd", "s", int_arm_neon_vabds, 0>;
+defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VSUBi4D, IIC_VSUBi4D,
+                           IIC_VSUBi4Q, IIC_VSUBi4Q,
+>>>>>>> VHADD differs from VHSUB at least on A9 - the former reads both operands in the
                            "vabd", "u", int_arm_neon_vabdu, 0>;
 def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
                         "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
@@ -2458,9 +2466,9 @@
                         "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q,
+defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
                             "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q,
+defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
                              "vabdl", "u", int_arm_neon_vabdlu, 0>;
 
 //   VABA     : Vector Absolute Difference and Accumulate
@@ -2474,6 +2482,7 @@
 // Vector Maximum and Minimum.
 
 //   VMAX     : Vector Maximum
+<<<<<<< HEAD
 defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
                            "vmax", "s", int_arm_neon_vmaxs, 1>;
@@ -2496,6 +2505,26 @@
                         "f32", v2f32, v2f32, int_arm_neon_vmins, 1>;
 def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin",
                         "f32", v4f32, v4f32, int_arm_neon_vmins, 1>;
+=======
+defm VMAXs    : N3VInt_QHS<0,0,0b0110,0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                           IIC_VSUBi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu    : N3VInt_QHS<1,0,0b0110,0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                           IIC_VSUBi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
+                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+
+//   VMIN     : Vector Minimum
+defm VMINs    : N3VInt_QHS<0,0,0b0110,1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                           IIC_VSUBi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu    : N3VInt_QHS<1,0,0b0110,1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                           IIC_VSUBi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vmins, 1>;
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32",
+                        v4f32, v4f32, int_arm_neon_vmins, 1>;
+>>>>>>> VHADD differs from VHSUB at least on A9 - the former reads both operands in the
 
 // Vector Pairwise Operations.
 

Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=100652&r1=100651&r2=100652&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed Apr  7 13:20:13 2010
@@ -133,6 +133,8 @@
 def IIC_VSUBiQ     : InstrItinClass;
 def IIC_VBINi4D    : InstrItinClass;
 def IIC_VBINi4Q    : InstrItinClass;
+def IIC_VSUBi4D    : InstrItinClass;
+def IIC_VSUBi4Q    : InstrItinClass;
 def IIC_VSHLiD     : InstrItinClass;
 def IIC_VSHLiQ     : InstrItinClass;
 def IIC_VSHLi4D    : InstrItinClass;

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100652&r1=100651&r2=100652&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:13 2010
@@ -480,6 +480,7 @@
   // Quad-register Integer Binary (4 cycle)
   InstrItinData,
                                InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+
   //
   // Double-register Integer Subtract
   InstrItinData,
@@ -489,6 +490,14 @@
   InstrItinData,
                                InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
   //
+  // Double-register Integer Subtract
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
   // Double-register Integer Shift
   InstrItinData,
                                InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
@@ -832,7 +841,21 @@
                                // Extra 3 latency cycle since wbck is 6 cycles
                                InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Subtract (4 cycle)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract (4 cycle)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:20:18 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:18 -0000
Subject: [llvm-commits] [llvm] r100653 -
	/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Message-ID: <20100407182018.DDDC32A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:18 2010
New Revision: 100653

URL: http://llvm.org/viewvc/llvm-project?rev=100653&view=rev
Log:
VP{MAX, MIN} are of IIC_VSUBi4D itin class as well.

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100653&r1=100652&r2=100653&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:20:18 2010
@@ -2446,19 +2446,11 @@
 // Vector Absolute Differences.
 
 //   VABD     : Vector Absolute Difference
-<<<<<<< HEAD
 defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vabd", "s", int_arm_neon_vabds, 0>;
 defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
-=======
-defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VSUBi4D, IIC_VSUBi4D,
-                           IIC_VSUBi4Q, IIC_VSUBi4Q,
-                           "vabd", "s", int_arm_neon_vabds, 0>;
-defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VSUBi4D, IIC_VSUBi4D,
-                           IIC_VSUBi4Q, IIC_VSUBi4Q,
->>>>>>> VHADD differs from VHSUB at least on A9 - the former reads both operands in the
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vabd", "u", int_arm_neon_vabdu, 0>;
 def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
                         "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
@@ -2551,6 +2543,7 @@
                               int_arm_neon_vpadalu>;
 
 //   VPMAX    : Vector Pairwise Maximum
+<<<<<<< HEAD
 def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
                         "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
 def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
@@ -2581,6 +2574,38 @@
                         "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
 def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin",
                         "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+=======
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VSUBi4D, "vpmax", "s8",
+                        v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VSUBi4D, "vpmax", "s16",
+                        v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VSUBi4D, "vpmax", "s32",
+                        v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VSUBi4D, "vpmax", "u8",
+                        v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VSUBi4D, "vpmax", "u16",
+                        v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VSUBi4D, "vpmax", "u32",
+                        v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VSUBi4D, "vpmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+
+//   VPMIN    : Vector Pairwise Minimum
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VSUBi4D, "vpmin", "s8",
+                        v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VSUBi4D, "vpmin", "s16",
+                        v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VSUBi4D, "vpmin", "s32",
+                        v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VSUBi4D, "vpmin", "u8",
+                        v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VSUBi4D, "vpmin", "u16",
+                        v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VSUBi4D, "vpmin", "u32",
+                        v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VSUBi4D, "vpmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vpmins, 0>;
+>>>>>>> VP{MAX, MIN} are of IIC_VSUBi4D itin class as well.
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
 




From clattner at apple.com  Wed Apr  7 13:21:06 2010
From: clattner at apple.com (Chris Lattner)
Date: Wed, 7 Apr 2010 11:21:06 -0700
Subject: [llvm-commits] [llvm] r100640 - in /llvm/trunk:
	include/llvm/Target/TargetMachine.h
	lib/CodeGen/LLVMTargetMachine.cpp
In-Reply-To: <20100407181842.9D7192A6C12C@llvm.org>
References: <20100407181842.9D7192A6C12C@llvm.org>
Message-ID: <2FFCF782-4C2F-4E14-BC04-5831B42F6DD7@apple.com>


On Apr 7, 2010, at 11:18 AM, Anton Korobeynikov wrote:

> Author: asl
> Date: Wed Apr  7 13:18:42 2010
> New Revision: 100640
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=100640&view=rev
> Log:
> Add hook to insert late LLVM=>LLVM passes just before isel

This hook should return void if you're going to ignore the result :).

> 
> Modified:
>    llvm/trunk/include/llvm/Target/TargetMachine.h
>    llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
> 
> Modified: llvm/trunk/include/llvm/Target/TargetMachine.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetMachine.h?rev=100640&r1=100639&r2=100640&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetMachine.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetMachine.h Wed Apr  7 13:18:42 2010
> @@ -264,10 +264,15 @@
>                                           bool DisableVerify = true);
> 
>   /// Target-Independent Code Generator Pass Configuration Options.
> -  
> -  /// addInstSelector - This method should add any "last minute" LLVM->LLVM
> -  /// passes, then install an instruction selector pass, which converts from
> -  /// LLVM code to machine instructions.
> +
> +  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
> +  /// passes (which are run just before instruction selector).
> +  virtual bool addPreISel(PassManagerBase &, CodeGenOpt::Level) {
> +    return true;
> +  }
> +
> +  /// addInstSelector - This method should install an instruction selector pass,
> +  /// which converts from LLVM code to machine instructions.
>   virtual bool addInstSelector(PassManagerBase &, CodeGenOpt::Level) {
>     return true;
>   }
> 
> Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=100640&r1=100639&r2=100640&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original)
> +++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Wed Apr  7 13:18:42 2010
> @@ -268,6 +268,8 @@
> 
>   PM.add(createStackProtectorPass(getTargetLowering()));
> 
> +  addPreISel(PM, OptLevel);
> +
>   if (PrintISelInput)
>     PM.add(createPrintFunctionPass("\n\n"
>                                    "*** Final LLVM Code input to ISel ***\n",
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




From asl at math.spbu.ru  Wed Apr  7 13:20:24 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:24 -0000
Subject: [llvm-commits] [llvm] r100654 -
	/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Message-ID: <20100407182024.920882A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:24 2010
New Revision: 100654

URL: http://llvm.org/viewvc/llvm-project?rev=100654&view=rev
Log:
Correct itinerary class for VPADD

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100654&r1=100653&r2=100654&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:20:24 2010
@@ -2474,61 +2474,48 @@
 // Vector Maximum and Minimum.
 
 //   VMAX     : Vector Maximum
-<<<<<<< HEAD
 defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmax", "s", int_arm_neon_vmaxs, 1>;
 defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmax", "u", int_arm_neon_vmaxu, 1>;
-def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax",
-                        "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>;
-def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax",
-                        "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmax", "f32",
+                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
 
 //   VMIN     : Vector Minimum
 defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmin", "s", int_arm_neon_vmins, 1>;
 defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmin", "u", int_arm_neon_vminu, 1>;
-def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin",
-                        "f32", v2f32, v2f32, int_arm_neon_vmins, 1>;
-def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin",
-                        "f32", v4f32, v4f32, int_arm_neon_vmins, 1>;
-=======
-defm VMAXs    : N3VInt_QHS<0,0,0b0110,0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
-                           IIC_VSUBi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
-defm VMAXu    : N3VInt_QHS<1,0,0b0110,0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
-                           IIC_VSUBi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
-def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
-                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
-def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
-                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
-
-//   VMIN     : Vector Minimum
-defm VMINs    : N3VInt_QHS<0,0,0b0110,1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
-                           IIC_VSUBi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
-defm VMINu    : N3VInt_QHS<1,0,0b0110,1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
-                           IIC_VSUBi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
-def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmin", "f32",
                         v2f32, v2f32, int_arm_neon_vmins, 1>;
-def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32",
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmin", "f32",
                         v4f32, v4f32, int_arm_neon_vmins, 1>;
->>>>>>> VHADD differs from VHSUB at least on A9 - the former reads both operands in the
 
 // Vector Pairwise Operations.
 
 //   VPADD    : Vector Pairwise Add
-def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>;
-def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>;
-def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd",
-                        "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i8",
+                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i16",
+                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i32",
+                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VSHLD,
+                        "vpadd", "f32",
+                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
 
 //   VPADDL   : Vector Pairwise Add Long
 defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -2543,69 +2530,36 @@
                               int_arm_neon_vpadalu>;
 
 //   VPMAX    : Vector Pairwise Maximum
-<<<<<<< HEAD
-def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
-def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
 
 //   VPMIN    : Vector Pairwise Minimum
-def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
-def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
-def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
-def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
-def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
-def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
-=======
-def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VSUBi4D, "vpmax", "s8",
-                        v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VSUBi4D, "vpmax", "s16",
-                        v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VSUBi4D, "vpmax", "s32",
-                        v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
-def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VSUBi4D, "vpmax", "u8",
-                        v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VSUBi4D, "vpmax", "u16",
-                        v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VSUBi4D, "vpmax", "u32",
-                        v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VSUBi4D, "vpmax", "f32",
-                        v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
-
-//   VPMIN    : Vector Pairwise Minimum
-def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VSUBi4D, "vpmin", "s8",
-                        v8i8, v8i8, int_arm_neon_vpmins, 0>;
-def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VSUBi4D, "vpmin", "s16",
-                        v4i16, v4i16, int_arm_neon_vpmins, 0>;
-def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VSUBi4D, "vpmin", "s32",
-                        v2i32, v2i32, int_arm_neon_vpmins, 0>;
-def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VSUBi4D, "vpmin", "u8",
-                        v8i8, v8i8, int_arm_neon_vpminu, 0>;
-def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VSUBi4D, "vpmin", "u16",
-                        v4i16, v4i16, int_arm_neon_vpminu, 0>;
-def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VSUBi4D, "vpmin", "u32",
-                        v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VSUBi4D, "vpmin", "f32",
-                        v2f32, v2f32, int_arm_neon_vpmins, 0>;
->>>>>>> VP{MAX, MIN} are of IIC_VSUBi4D itin class as well.
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
 




From asl at math.spbu.ru  Wed Apr  7 13:20:29 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:29 -0000
Subject: [llvm-commits] [llvm] r100655 - in /llvm/trunk/lib/Target/ARM:
 ARMInstrNEON.td ARMScheduleV7.td
Message-ID: <20100407182030.0E7952A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:29 2010
New Revision: 100655

URL: http://llvm.org/viewvc/llvm-project?rev=100655&view=rev
Log:
More A9 itineraries

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100655&r1=100654&r2=100655&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:20:29 2010
@@ -2513,8 +2513,8 @@
 def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
                         "vpadd", "i32",
                         v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VSHLD,
-                        "vpadd", "f32",
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 
+                        IIC_VBIND, "vpadd", "f32",
                         v2f32, v2f32, int_arm_neon_vpadd, 0>;
 
 //   VPADDL   : Vector Pairwise Add Long

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100655&r1=100654&r2=100655&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:29 2010
@@ -794,6 +794,34 @@
   // Issue through integer pipeline, and execute in NEON unit.
 
   //
+  // Double-register Integer Unary
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
   // Double-register Integer Binary
   InstrItinData,
                                // Extra 3 latency cycle since wbck is 6 cycles




From asl at math.spbu.ru  Wed Apr  7 13:20:36 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:36 -0000
Subject: [llvm-commits] [llvm] r100656 -
	/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Message-ID: <20100407182036.63BD22A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:36 2010
New Revision: 100656

URL: http://llvm.org/viewvc/llvm-project?rev=100656&view=rev
Log:
Correct VMVN itinerary: operand is read in the second cycle, not in the first.

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100656&r1=100655&r2=100656&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:20:36 2010
@@ -2387,11 +2387,11 @@
 
 //   VMVN     : Vector Bitwise NOT
 def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
-                     (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
+                     (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
                      [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
 def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
-                     (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
+                     (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
                      [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
 def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;




From asl at math.spbu.ru  Wed Apr  7 13:20:42 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:42 -0000
Subject: [llvm-commits] [llvm] r100657 - in /llvm/trunk/lib/Target/ARM:
 ARMInstrNEON.td ARMSchedule.td ARMScheduleV7.td
Message-ID: <20100407182042.E738E2A6C12E@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:42 2010
New Revision: 100657

URL: http://llvm.org/viewvc/llvm-project?rev=100657&view=rev
Log:
Fix itins for VABA

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
    llvm/trunk/lib/Target/ARM/ARMSchedule.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100657&r1=100656&r2=100657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:20:42 2010
@@ -1711,21 +1711,22 @@
 // Neon 3-argument intrinsics,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N3VInt3_QHS op11_8, bit op4,
+                       InstrItinClass itinD, InstrItinClass itinQ,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
-  def v8i8  : N3VDInt3;
-  def v4i16 : N3VDInt3;
-  def v2i32 : N3VDInt3;
 
   // 128-bit vector types.
-  def v16i8 : N3VQInt3;
-  def v8i16 : N3VQInt3;
-  def v4i32 : N3VQInt3;
 }
 
@@ -1734,10 +1735,11 @@
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt3_HS op11_8, bit op4,
+                       InstrItinClass itin,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
-  def v4i32 : N3VLInt3;
-  def v2i64 : N3VLInt3;
 }
 
@@ -1751,9 +1753,10 @@
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt3_QHS op11_8, bit op4,
+                        InstrItinClass itin,
                         string OpcodeStr, string Dt, Intrinsic IntOp>
-  : N3VLInt3_HS {
-  def v8i16 : N3VLInt3 {
+  def v8i16 : N3VLInt3;
 }
 
@@ -2177,15 +2180,17 @@
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D,
+                             "vmlal", "s", int_arm_neon_vmlals>;
+defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D,
+                             "vmlal", "u", int_arm_neon_vmlalu>;
 
 defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
 defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
 
 //   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
-defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s",
-                            int_arm_neon_vqdmlal>;
+defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D,
+                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
 
 //   VMLS     : Vector Multiply Subtract (integer and floating-point)
@@ -2227,15 +2232,17 @@
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D,
+                             "vmlsl", "s", int_arm_neon_vmlsls>;
+defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D,
+                             "vmlsl", "u", int_arm_neon_vmlslu>;
 
 defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
 defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
 
 //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
-defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s",
-                            int_arm_neon_vqdmlsl>;
+defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D,
+                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 
 // Vector Subtract Operations.
@@ -2464,12 +2471,16 @@
                              "vabdl", "u", int_arm_neon_vabdlu, 0>;
 
 //   VABA     : Vector Absolute Difference and Accumulate
-defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>;
-defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>;
+defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                            "vaba", "s", int_arm_neon_vabas>;
+defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                            "vaba", "u", int_arm_neon_vabau>;
 
 //   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>;
-defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD,
+                             "vabal", "s", int_arm_neon_vabals>;
+defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD,
+                             "vabal", "u", int_arm_neon_vabalu>;
 
 // Vector Maximum and Minimum.
 

Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=100657&r1=100656&r2=100657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed Apr  7 13:20:42 2010
@@ -135,6 +135,8 @@
 def IIC_VBINi4Q    : InstrItinClass;
 def IIC_VSUBi4D    : InstrItinClass;
 def IIC_VSUBi4Q    : InstrItinClass;
+def IIC_VABAD      : InstrItinClass;
+def IIC_VABAQ      : InstrItinClass;
 def IIC_VSHLiD     : InstrItinClass;
 def IIC_VSHLiQ     : InstrItinClass;
 def IIC_VSHLi4D    : InstrItinClass;

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100657&r1=100656&r2=100657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:42 2010
@@ -522,6 +522,15 @@
   InstrItinData,
                                InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
   //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
+
+  //
   // Double-register Integer Multiply (.8, .16)
   InstrItinData,
                                InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
@@ -883,7 +892,38 @@
                                // Extra 3 latency cycle since wbck is 6 cycles
                                InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Count
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:20:48 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:48 -0000
Subject: [llvm-commits] [llvm] r100658 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182048.279062A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:47 2010
New Revision: 100658

URL: http://llvm.org/viewvc/llvm-project?rev=100658&view=rev
Log:
Fix itins for VPAL

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100658&r1=100657&r2=100658&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:47 2010
@@ -516,11 +516,11 @@
   //
   // Double-register Integer Pair Add Long
   InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
   //
   // Quad-register Integer Pair Add Long
   InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 1]>,
   //
   // Double-register Absolute Difference and Accumulate
   InstrItinData,
@@ -923,7 +923,21 @@
                                // Extra 3 latency cycle since wbck is 6 cycles
                                InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 1]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:20:53 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:53 -0000
Subject: [llvm-commits] [llvm] r100659 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182053.7FFAC2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:53 2010
New Revision: 100659

URL: http://llvm.org/viewvc/llvm-project?rev=100659&view=rev
Log:
Fix invalid NEON MAC itins on A8

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100659&r1=100658&r2=100659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:53 2010
@@ -551,21 +551,21 @@
   //
   // Double-register Integer Multiply-Accumulate (.8, .16)
   InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
   //
   // Double-register Integer Multiply-Accumulate (.32)
   InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
   //
   // Quad-register Integer Multiply-Accumulate (.8, .16)
   InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
   //
   // Quad-register Integer Multiply-Accumulate (.32)
   InstrItinData,
                                InstrStage<1, [FU_NPipe]>,
                                InstrStage<2, [FU_NLSPipe], 0>,
-                               InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>,
+                               InstrStage<3, [FU_NPipe]>], [9, 3, 2, 1]>,
   //
   // Double-register VEXT
   InstrItinData,




From asl at math.spbu.ru  Wed Apr  7 13:20:58 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:20:58 -0000
Subject: [llvm-commits] [llvm] r100660 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182058.CC0C12A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:20:58 2010
New Revision: 100660

URL: http://llvm.org/viewvc/llvm-project?rev=100660&view=rev
Log:
Add MAC stuff for A9

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100660&r1=100659&r2=100660&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:20:58 2010
@@ -937,7 +937,65 @@
                                // Extra 3 latency cycle since wbck is 6 cycles
                                InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 3, 1]>
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
+
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 9 cycles
+                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 9 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:21:04 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:04 -0000
Subject: [llvm-commits] [llvm] r100661 -
	/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Message-ID: <20100407182104.BB4DA2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:04 2010
New Revision: 100661

URL: http://llvm.org/viewvc/llvm-project?rev=100661&view=rev
Log:
Fix invalid itins for 32-bit varians of VMLAL and friends

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100661&r1=100660&r2=100661&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:21:04 2010
@@ -1735,11 +1735,11 @@
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt3_HS op11_8, bit op4,
-                       InstrItinClass itin,
+                       InstrItinClass itin16, InstrItinClass itin32,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
-  def v4i32 : N3VLInt3;
-  def v2i64 : N3VLInt3;
 }
 
@@ -1753,10 +1753,10 @@
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt3_QHS op11_8, bit op4,
-                        InstrItinClass itin,
+                        InstrItinClass itin16, InstrItinClass itin32,
                         string OpcodeStr, string Dt, Intrinsic IntOp>
-  : N3VLInt3_HS {
-  def v8i16 : N3VLInt3 {
+  def v8i16 : N3VLInt3;
 }
 
@@ -2180,16 +2180,16 @@
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D,
+defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
                              "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D,
+defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
                              "vmlal", "u", int_arm_neon_vmlalu>;
 
 defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
 defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
 
 //   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
-defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D,
+defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                             "vqdmlal", "s", int_arm_neon_vqdmlal>;
 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
 
@@ -2232,16 +2232,16 @@
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D,
+defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
                              "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D,
+defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
                              "vmlsl", "u", int_arm_neon_vmlslu>;
 
 defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
 defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
 
 //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
-defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D,
+defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
                             "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 
@@ -2477,9 +2477,9 @@
                             "vaba", "u", int_arm_neon_vabau>;
 
 //   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD,
+defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
                              "vabal", "s", int_arm_neon_vabals>;
-defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD,
+defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
                              "vabal", "u", int_arm_neon_vabalu>;
 
 // Vector Maximum and Minimum.




From asl at math.spbu.ru  Wed Apr  7 13:21:10 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:10 -0000
Subject: [llvm-commits] [llvm] r100662 -
	/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Message-ID: <20100407182110.A7D5B2A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:10 2010
New Revision: 100662

URL: http://llvm.org/viewvc/llvm-project?rev=100662&view=rev
Log:
More fixes for itins

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=100662&r1=100661&r2=100662&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr  7 13:21:10 2010
@@ -1621,12 +1621,13 @@
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt_HS op11_8, bit op4,
-                      InstrItinClass itin, string OpcodeStr, string Dt,
+                      InstrItinClass itin16, InstrItinClass itin32,
+                      string OpcodeStr, string Dt,
                       Intrinsic IntOp, bit Commutable = 0> {
-  def v4i32 : N3VLInt;
-  def v2i64 : N3VLInt;
 }
@@ -1642,11 +1643,12 @@
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt_QHS op11_8, bit op4,
-                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt,
                        Intrinsic IntOp, bit Commutable = 0>
-  : N3VLInt_HS {
-  def v8i16 : N3VLInt;
 }
@@ -2004,10 +2006,10 @@
 def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
                      v4f32, v4f32, fadd, 1>;
 //   VADDL    : Vector Add Long (Q = D + D)
-defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s",
-                            int_arm_neon_vaddls, 1>;
-defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u",
-                            int_arm_neon_vaddlu, 1>;
+defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "s", int_arm_neon_vaddls, 1>;
+defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "u", int_arm_neon_vaddlu, 1>;
 //   VADDW    : Vector Add Wide (Q = Q + D)
 defm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
 defm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
@@ -2121,10 +2123,10 @@
                                   (SubReg_i32_lane imm:$lane)))>;
 
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s",
-                            int_arm_neon_vmulls, 1>;
-defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u",
-                            int_arm_neon_vmullu, 1>;
+defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                            "vmull", "s", int_arm_neon_vmulls, 1>;
+defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                            "vmull", "u", int_arm_neon_vmullu, 1>;
 def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
                         v8i16, v8i8, int_arm_neon_vmullp, 1>;
 defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
@@ -2133,10 +2135,10 @@
                              int_arm_neon_vmullu>;
 
 //   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
-defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s",
-                           int_arm_neon_vqdmull, 1>;
-defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s",
-                             int_arm_neon_vqdmull>;
+defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+                             "vqdmull", "s", int_arm_neon_vqdmull>;
 
 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
 
@@ -2255,10 +2257,10 @@
 def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
                      v4f32, v4f32, fsub, 0>;
 //   VSUBL    : Vector Subtract Long (Q = D - D)
-defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s",
-                            int_arm_neon_vsubls, 1>;
-defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u",
-                            int_arm_neon_vsublu, 1>;
+defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "s", int_arm_neon_vsubls, 1>;
+defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "u", int_arm_neon_vsublu, 1>;
 //   VSUBW    : Vector Subtract Wide (Q = Q - D)
 defm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
 defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
@@ -2465,9 +2467,9 @@
                         "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
                              "vabdl", "u", int_arm_neon_vabdlu, 0>;
 
 //   VABA     : Vector Absolute Difference and Accumulate




From asl at math.spbu.ru  Wed Apr  7 13:21:17 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:17 -0000
Subject: [llvm-commits] [llvm] r100663 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182117.229F32A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:16 2010
New Revision: 100663

URL: http://llvm.org/viewvc/llvm-project?rev=100663&view=rev
Log:
More shift itins for A9

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100663&r1=100662&r2=100663&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:21:16 2010
@@ -866,6 +866,27 @@
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
   //
+  // Quad-register Integer Shift
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
   // Double-register Integer Binary (4 cycle)
   InstrItinData,
                                // Extra 3 latency cycle since wbck is 6 cycles




From asl at math.spbu.ru  Wed Apr  7 13:21:27 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:27 -0000
Subject: [llvm-commits] [llvm] r100665 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182127.E48012A6C12E@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:27 2010
New Revision: 100665

URL: http://llvm.org/viewvc/llvm-project?rev=100665&view=rev
Log:
A9 NEON FP itins

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100665&r1=100664&r2=100665&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:21:27 2010
@@ -1018,6 +1018,72 @@
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>,
   //
+  // Double-register FP Unary
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 8 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 9 cycles
+                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [8, 4, 2, 1]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 9 cycles
+                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [8, 2, 2]>,
+  //
   // Double-register Permute
   InstrItinData,
                                // Extra 3 latency cycle since wbck is 6 cycles




From asl at math.spbu.ru  Wed Apr  7 13:21:33 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:33 -0000
Subject: [llvm-commits] [llvm] r100666 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182133.79A1F2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:33 2010
New Revision: 100666

URL: http://llvm.org/viewvc/llvm-project?rev=100666&view=rev
Log:
Fix A8 FP NEON MAC itins

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100666&r1=100665&r2=100666&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:21:33 2010
@@ -423,13 +423,13 @@
   //
   // Double-register FP Multiple-Accumulate
   InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>,
+                               InstrStage<1, [FU_NPipe]>], [9, 3, 2, 2]>,
   //
   // Quad-register FP Multiple-Accumulate
   // Result written in N9, but that is relative to the last cycle of multicycle,
   // so we use 10 for those cases
   InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>,
+                               InstrStage<2, [FU_NPipe]>], [10, 3, 2, 2]>,
   //
   // Double-register Reciprical Step
   InstrItinData,




From asl at math.spbu.ru  Wed Apr  7 13:21:22 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:22 -0000
Subject: [llvm-commits] [llvm] r100664 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182122.63BA92A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:22 2010
New Revision: 100664

URL: http://llvm.org/viewvc/llvm-project?rev=100664&view=rev
Log:
Some permute goodness for A9

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100664&r1=100663&r2=100664&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:21:22 2010
@@ -1014,9 +1014,93 @@
   // Quad-register Integer Multiply-Accumulate (.32)
   InstrItinData,
                                // Extra 3 latency cycle since wbck is 9 cycles
+                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Double-register Permute
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 6 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 8 cycles
+                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NLSPipe]>], [4, 4, 1, 1]>,
+
+  //
+  // Double-register VEXT
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 9 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
                                InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>
+                               InstrStage<2, [FU_NPipe]>], [3, 2, 1]>,
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 2, 2, 1]>,
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 8 cycles
+                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 8 cycles
+                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 2, 1]>,
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 7 cycles
+                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 8 cycles
+                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData,
+                               // Extra 3 latency cycle since wbck is 8 cycles
+                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
 ]>;
 
 




From asl at math.spbu.ru  Wed Apr  7 13:21:41 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:41 -0000
Subject: [llvm-commits] [llvm] r100667 - in /llvm/trunk:
 include/llvm/Target/TargetSchedule.td lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182141.9A05C2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:41 2010
New Revision: 100667

URL: http://llvm.org/viewvc/llvm-project?rev=100667&view=rev
Log:
Since tblgen bug was fixed (thanks Jakob!) we don't need InstrStage2 hack anymore.

Modified:
    llvm/trunk/include/llvm/Target/TargetSchedule.td
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/include/llvm/Target/TargetSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSchedule.td?rev=100667&r1=100666&r2=100667&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetSchedule.td (original)
+++ llvm/trunk/include/llvm/Target/TargetSchedule.td Wed Apr  7 13:21:41 2010
@@ -44,18 +44,15 @@
 //   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
 //
 
-class InstrStage2 units,
-                  int timeinc, ReservationKind kind> {
+class InstrStage units,
+                 int timeinc = -1,
+                 ReservationKind kind = Required> {
   int Cycles          = cycles;       // length of stage in machine cycles
   list Units = units;       // choice of functional units
   int TimeInc         = timeinc;      // cycles till start of next stage
   int Kind            = kind.Value;   // kind of FU reservation
 }
 
-class InstrStage units,
-                 int timeinc = -1>
-  : InstrStage2;
-
 //===----------------------------------------------------------------------===//
 // Instruction itinerary - An itinerary represents a sequential series of steps
 // required to complete an instruction.  Itineraries are represented as lists of
@@ -76,10 +73,10 @@
 // Instruction itinerary data - These values provide a runtime map of an 
 // instruction itinerary class (name) to its itinerary data.
 //
-class InstrItinData stages,
+class InstrItinData stages,
                     list operandcycles = []> {
   InstrItinClass TheClass = Class;
-  list Stages = stages;
+  list Stages = stages;
   list OperandCycles = operandcycles;
 }
 

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100667&r1=100666&r2=100667&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:21:41 2010
@@ -629,417 +629,424 @@
   // Issue through integer pipeline, and execute in NEON unit.
 
   // FP Special Register to Integer Register File Move
-  InstrItinData,
-                              InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                              InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                              InstrStage<1,  [FU_NPipe]>]>,
+  InstrItinData,
+                              InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                              InstrStage<1, [FU_NPipe]>]>,
   //
   // Single-precision FP Unary
-  InstrItinData,
-                               // Extra 1 latency cycle since wbck is 2 cycles
-                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Unary
-  InstrItinData,
-                               // Extra 1 latency cycle since wbck is 2 cycles
-                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
 
   //
   // Single-precision FP Compare
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 4 cycles
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Compare
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 4 cycles
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Single to Double FP Convert
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double to Single FP Convert
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
 
   //
   // Single to Half FP Convert
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Half to Single FP Convert
-  InstrItinData,
-                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [2, 1]>,
+  InstrItinData,
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
 
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [4, 1]>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Single-precision FP ALU
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [4, 1, 1]>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Double-precision FP ALU
-  InstrItinData,
-                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [4, 1, 1]>,
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Single-precision FP Multiply
-  InstrItinData,
-                               InstrStage2<6, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [5, 1, 1]>,
+  InstrItinData,
+                               InstrStage<6, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
   //
   // Double-precision FP Multiply
-  InstrItinData,
-                               InstrStage2<7, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2,  [FU_NPipe]>], [6, 1, 1]>,
+  InstrItinData,
+                               InstrStage<7, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
   //
   // Single-precision FP MAC
-  InstrItinData,
-                               InstrStage2<9, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [8, 0, 1, 1]>,
+  InstrItinData,
+                               InstrStage<9, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
   //
   // Double-precision FP MAC
-  InstrItinData,
-                               InstrStage2<10, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2,   [FU_NPipe]>], [9, 0, 1, 1]>,
+  InstrItinData,
+                               InstrStage<10, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2,  [FU_NPipe]>], [9, 0, 1, 1]>,
   //
   // Single-precision FP DIV
-  InstrItinData,
-                               InstrStage2<16, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<10,  [FU_NPipe]>], [15, 1, 1]>,
+  InstrItinData,
+                               InstrStage<16, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
   //
   // Double-precision FP DIV
-  InstrItinData,
-                               InstrStage2<26, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<20,  [FU_NPipe]>], [25, 1, 1]>,
+  InstrItinData,
+                               InstrStage<26, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
   //
   // Single-precision FP SQRT
-  InstrItinData,
-                               InstrStage2<18, [FU_DRegsN],   0, Reserved>,
+  InstrItinData,
+                               InstrStage<18, [FU_DRegsN],   0, Reserved>,
                                InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<13,  [FU_NPipe]>], [17, 1]>,
   //
   // Double-precision FP SQRT
-  InstrItinData,
-                               InstrStage2<33, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<28,  [FU_NPipe]>], [32, 1]>,
+  InstrItinData,
+                               InstrStage<33, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<28, [FU_NPipe]>], [32, 1]>,
 
   //
   // Integer to Single-precision Move
-  InstrItinData,
+  InstrItinData,
                                // Extra 1 latency cycle since wbck is 2 cycles
-                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Integer to Double-precision Move
-  InstrItinData,
+  InstrItinData,
                                // Extra 1 latency cycle since wbck is 2 cycles
-                               InstrStage2<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1, 1]>,
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
   //
   // Single-precision to Integer Move
-  InstrItinData,
-                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1]>,
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision to Integer Move
-  InstrItinData,
-                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1,  [FU_NPipe]>], [1, 1, 1]>,
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
 
   //
   // Double-register Integer Unary
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 2]>,
   //
   // Quad-register Integer Unary
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 2]>,
   //
   // Double-register Integer Q-Unary
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Quad-register Integer CountQ-Unary
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double-register Integer Binary
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
   //
   // Quad-register Integer Binary
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
   //
   // Double-register Integer Subtract
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
   //
   // Quad-register Integer Subtract
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
   //
   // Double-register Integer Shift
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
   //
   // Quad-register Integer Shift
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
   //
   // Double-register Integer Shift (4 cycle)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Quad-register Integer Shift (4 cycle)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Double-register Integer Binary (4 cycle)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
   //
   // Quad-register Integer Binary (4 cycle)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
   //
   // Double-register Integer Subtract (4 cycle)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
   //
   // Quad-register Integer Subtract (4 cycle)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
 
   //
   // Double-register Integer Count
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
   //
   // Quad-register Integer Count
   // Result written in N3, but that is relative to the last cycle of multicycle,
   // so we use 4 for those cases
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
   //
   // Double-register Absolute Difference and Accumulate
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
   //
   // Quad-register Absolute Difference and Accumulate
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
   //
   // Double-register Integer Pair Add Long
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
   //
   // Quad-register Integer Pair Add Long
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 3, 1]>,
 
   //
   // Double-register Integer Multiply (.8, .16)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
   //
   // Quad-register Integer Multiply (.8, .16)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
 
   //
   // Double-register Integer Multiply (.32)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
   //
   // Quad-register Integer Multiply (.32)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 9 cycles
-                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<4, [FU_NPipe]>], [9, 2, 1]>,
   //
   // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
   //
   // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
   //
   // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
   //
   // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 9 cycles
-                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>,
   //
+  // Move Immediate
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3]>,
+  //
   // Double-register FP Unary
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [5, 2]>,
   //
   // Quad-register FP Unary
   // Result written in N5, but that is relative to the last cycle of multicycle,
   // so we use 6 for those cases
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 2]>,
   //
   // Double-register FP Binary
   // FIXME: We're using this itin for many instructions and [2, 2] here is too
   // optimistic.
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
   //
@@ -1048,123 +1055,123 @@
   // so we use 6 for those cases
   // FIXME: We're using this itin for many instructions and [2, 2] here is too
   // optimistic.
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 8 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
   //
   // Double-register FP Multiple-Accumulate
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
   //
   // Quad-register FP Multiple-Accumulate
   // Result written in N9, but that is relative to the last cycle of multicycle,
   // so we use 10 for those cases
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 9 cycles
-                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<4, [FU_NPipe]>], [8, 4, 2, 1]>,
   //
   // Double-register Reciprical Step
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
   //
   // Quad-register Reciprical Step
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 9 cycles
-                               InstrStage2<10, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<4, [FU_NPipe]>], [8, 2, 2]>,
   //
   // Double-register Permute
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 6 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [2, 2, 1, 1]>,
   //
   // Quad-register Permute
   // Result written in N2, but that is relative to the last cycle of multicycle,
   // so we use 3 for those cases
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [3, 3, 1, 1]>,
   //
   // Quad-register Permute (3 cycle issue)
   // Result written in N2, but that is relative to the last cycle of multicycle,
   // so we use 4 for those cases
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 8 cycles
-                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<3, [FU_NLSPipe]>], [4, 4, 1, 1]>,
 
   //
   // Double-register VEXT
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<7, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
   //
   // Quad-register VEXT
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 9 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
   //
   // VTB
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [3, 2, 1]>,
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [3, 2, 2, 1]>,
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 8 cycles
-                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 1]>,
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 8 cycles
-                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 3, 1]>,
   //
   // VTBX
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [3, 1, 2, 1]>,
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 7 cycles
-                               InstrStage2<8, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [3, 1, 2, 2, 1]>,
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 8 cycles
-                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<3, [FU_NPipe]>], [4, 1, 2, 2, 3, 1]>,
-  InstrItinData,
-                               // Extra 3 latency cycle since wbck is 8 cycles
-                               InstrStage2<9, [FU_DRegsVFP], 0, Reserved>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
 ]>;




From asl at math.spbu.ru  Wed Apr  7 13:21:47 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:47 -0000
Subject: [llvm-commits] [llvm] r100668 -
	/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
Message-ID: <20100407182147.19F892A6C12E@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:46 2010
New Revision: 100668

URL: http://llvm.org/viewvc/llvm-project?rev=100668&view=rev
Log:
Move NEON-VFP domain fixer upper, so post-RA scheduler would benefit from it.

Modified:
    llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=100668&r1=100667&r2=100668&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Wed Apr  7 13:21:46 2010
@@ -108,8 +108,12 @@
 bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
-  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
-    PM.add(createARMLoadStoreOptimizationPass());
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createARMLoadStoreOptimizationPass());
+    if (Subtarget.hasNEON())
+      PM.add(createNEONMoveFixPass());
+  }
 
   // Expand some pseudo instructions into multiple instructions to allow
   // proper scheduling.
@@ -124,8 +128,6 @@
   if (OptLevel != CodeGenOpt::None) {
     if (!Subtarget.isThumb1Only())
       PM.add(createIfConverterPass());
-    if (Subtarget.hasNEON())
-      PM.add(createNEONMoveFixPass());
   }
 
   if (Subtarget.isThumb2()) {




From asl at math.spbu.ru  Wed Apr  7 13:21:52 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:52 -0000
Subject: [llvm-commits] [llvm] r100669 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182152.6F10D2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:52 2010
New Revision: 100669

URL: http://llvm.org/viewvc/llvm-project?rev=100669&view=rev
Log:
Add some A8-based approximation for instructions with unknown cycle times

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100669&r1=100668&r2=100669&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:21:52 2010
@@ -1025,6 +1025,58 @@
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [3]>,
   //
+  // Double-register Permute Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
+
+  //
   // Double-register FP Unary
   InstrItinData,
                                // Extra latency cycles since wbck is 6 cycles




From asl at math.spbu.ru  Wed Apr  7 13:21:58 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:21:58 -0000
Subject: [llvm-commits] [llvm] r100670 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182158.D8ED52A6C12D@llvm.org>

Author: asl
Date: Wed Apr  7 13:21:58 2010
New Revision: 100670

URL: http://llvm.org/viewvc/llvm-project?rev=100670&view=rev
Log:
Add some crude approximation for neon load/store instructions

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100670&r1=100669&r2=100670&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:21:58 2010
@@ -320,30 +320,35 @@
   // Issue through integer pipeline, and execute in NEON unit.
   //
   // VLD1
+  // FIXME: We don't model this instruction properly
   InstrItinData, 
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_LdSt0], 0>,
                                InstrStage<1, [FU_NLSPipe]>]>,
   //
   // VLD2
+  // FIXME: We don't model this instruction properly
   InstrItinData, 
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_LdSt0], 0>,
                                InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>,
   //
   // VLD3
+  // FIXME: We don't model this instruction properly
   InstrItinData, 
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_LdSt0], 0>,
                                InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>,
   //
   // VLD4
+  // FIXME: We don't model this instruction properly
   InstrItinData, 
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_LdSt0], 0>,
                                InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>,
   //
   // VST
+  // FIXME: We don't model this instruction properly
   InstrItinData, 
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_LdSt0], 0>,
@@ -801,7 +806,56 @@
                                InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
-
+  // FIXME: Neon pipeline and LdSt unit are multiplexed. 
+  //        Add some syntactic sugar to model this!
+  // VLD1
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // VLD2
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
   //
   // Double-register Integer Unary
   InstrItinData,




From asl at math.spbu.ru  Wed Apr  7 13:22:03 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:22:03 -0000
Subject: [llvm-commits] [llvm] r100671 -
	/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Message-ID: <20100407182203.C62872A6C12E@llvm.org>

Author: asl
Date: Wed Apr  7 13:22:03 2010
New Revision: 100671

URL: http://llvm.org/viewvc/llvm-project?rev=100671&view=rev
Log:
Add some crude itin approximation for VFP load / stores on A9

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100671&r1=100670&r2=100671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td Wed Apr  7 13:22:03 2010
@@ -804,6 +804,60 @@
                                InstrStage<2, [FU_DRegsN],   0, Reserved>,
                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // FP Load Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // FP Store Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
   // FIXME: Neon pipeline and LdSt unit are multiplexed. 




From asl at math.spbu.ru  Wed Apr  7 13:22:11 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:22:11 -0000
Subject: [llvm-commits] [llvm] r100672 - in /llvm/trunk/lib/Target/ARM:
 ARMSchedule.td ARMScheduleA8.td ARMScheduleA9.td ARMScheduleV7.td
Message-ID: <20100407182211.8EADF2A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:22:11 2010
New Revision: 100672

URL: http://llvm.org/viewvc/llvm-project?rev=100672&view=rev
Log:
Split A8/A9 itins - they already were too big.

Added:
    llvm/trunk/lib/Target/ARM/ARMScheduleA8.td
    llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
Removed:
    llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
Modified:
    llvm/trunk/lib/Target/ARM/ARMSchedule.td

Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=100672&r1=100671&r2=100672&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed Apr  7 13:22:11 2010
@@ -169,4 +169,5 @@
 
 
 include "ARMScheduleV6.td"
-include "ARMScheduleV7.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"

Added: llvm/trunk/lib/Target/ARM/ARMScheduleA8.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA8.td?rev=100672&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA8.td (added)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA8.td Wed Apr  7 13:22:11 2010
@@ -0,0 +1,610 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+//
+// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<[
+
+  // Two fully-pipelined integer ALU pipelines
+  //
+  // No operand cycles
+  InstrItinData]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData], [2, 2]>,
+  InstrItinData], [2, 2, 2]>,
+  InstrItinData], [2, 2, 1]>,
+  InstrItinData], [2, 2, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData], [2, 2]>,
+  InstrItinData], [2, 1]>,
+  InstrItinData], [2, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData], [2]>,
+  InstrItinData], [2, 2]>,
+  InstrItinData], [2, 1]>,
+  InstrItinData], [2, 1, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData], [1]>,
+  InstrItinData], [1, 1]>,
+  InstrItinData], [1, 1]>,
+  InstrItinData], [1, 1, 1]>,
+  //
+  // Move instructions, conditional
+  InstrItinData], [2]>,
+  InstrItinData], [2, 1]>,
+  InstrItinData], [2, 1]>,
+  InstrItinData], [2, 1, 1]>,
+
+  // Integer multiply pipeline
+  // Result written in E5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  //
+  InstrItinData], [5, 1, 1]>,
+  InstrItinData, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>,
+  InstrItinData, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData, 
+                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData, 
+                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
+  
+  // Integer load pipeline
+  //
+  // loads have an extra cycle of latency, but are fully pipelined
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple
+  InstrItinData,
+                                InstrStage<2, [FU_Pipe0], 0>,
+                                InstrStage<2, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>]>,
+
+  // Integer store pipeline
+  //
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData,
+                                InstrStage<2, [FU_Pipe0], 0>,
+                                InstrStage<2, [FU_Pipe1]>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>]>,
+  
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData,
+                              InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData,
+                               InstrStage<4, [FU_NPipe], 0>,
+                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData,
+                               InstrStage<4, [FU_NPipe], 0>,
+                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData,
+                               InstrStage<7, [FU_NPipe], 0>,
+                               InstrStage<7, [FU_NLSPipe]>], [7, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData,
+                               InstrStage<5, [FU_NPipe], 0>,
+                               InstrStage<5, [FU_NLSPipe]>], [5, 1]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData,
+                               InstrStage<8, [FU_NPipe], 0>,
+                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData,
+                               InstrStage<8, [FU_NPipe], 0>,
+                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData,
+                               InstrStage<9, [FU_NPipe], 0>,
+                               InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData,
+                               InstrStage<11, [FU_NPipe], 0>,
+                               InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData,
+                               InstrStage<19, [FU_NPipe], 0>,
+                               InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData,
+                               InstrStage<20, [FU_NPipe], 0>,
+                               InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData,
+                               InstrStage<29, [FU_NPipe], 0>,
+                               InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData,
+                               InstrStage<19, [FU_NPipe], 0>,
+                               InstrStage<19, [FU_NLSPipe]>], [19, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData,
+                               InstrStage<29, [FU_NPipe], 0>,
+                               InstrStage<29, [FU_NLSPipe]>], [29, 1]>,
+  //
+  // Single-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0], 0>,
+                               InstrStage<1, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // FP Load Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData, 
+                               InstrStage<2, [FU_Pipe0], 0>,
+                               InstrStage<2, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0], 0>,
+                               InstrStage<1, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // FP Store Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData, 
+                               InstrStage<2, [FU_Pipe0], 0>,
+                               InstrStage<2, [FU_Pipe1]>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  //
+  // VLD1
+  // FIXME: We don't model this instruction properly
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // VLD2
+  // FIXME: We don't model this instruction properly
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  // FIXME: We don't model this instruction properly
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  // FIXME: We don't model this instruction properly
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  // FIXME: We don't model this instruction properly
+  InstrItinData, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NLSPipe]>]>,
+  //
+  // Double-register FP Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Move Immediate
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>], [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Permute
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [9, 3, 2, 2]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [10, 3, 2, 2]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [10, 2, 2]>,
+  //
+  // Double-register Integer Count
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Subtract
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 1]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>,
+                               InstrStage<2, [FU_NLSPipe], 0>,
+                               InstrStage<3, [FU_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData,
+                               InstrStage<1, [FU_NPipe]>,
+                               InstrStage<2, [FU_NLSPipe], 0>,
+                               InstrStage<3, [FU_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Double-register VEXT
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>,
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>,
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>,
+  InstrItinData,
+                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData,
+                               InstrStage<1, [FU_NLSPipe]>,
+                               InstrStage<1, [FU_NPipe], 0>,
+                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;

Added: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=100672&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (added)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Wed Apr  7 13:22:11 2010
@@ -0,0 +1,739 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
+//
+def CortexA9Itineraries : ProcessorItineraries<[
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration (including time to
+  //    register file writeback!).
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
+  // stalls.
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit.
+
+  // FP Special Register to Integer Register File Move
+  InstrItinData,
+                              InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                              InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+
+  //
+  // Single-precision FP Compare
+  InstrItinData,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+
+  //
+  // Single to Half FP Convert
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Half to Single FP Convert
+  InstrItinData,
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
+
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData,
+                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData,
+                               InstrStage<6, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData,
+                               InstrStage<7, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData,
+                               InstrStage<9, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData,
+                               InstrStage<10, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2,  [FU_NPipe]>], [9, 0, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData,
+                               InstrStage<16, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData,
+                               InstrStage<26, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData,
+                               InstrStage<18, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<13,  [FU_NPipe]>], [17, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData,
+                               InstrStage<33, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<28, [FU_NPipe]>], [32, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // FP Load Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // FP Store Multiple
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData,
+                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  // FIXME: Neon pipeline and LdSt unit are multiplexed. 
+  //        Add some syntactic sugar to model this!
+  // VLD1
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // VLD2
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  // FIXME: We don't model this instruction properly
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Issue], 0>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0], 0>,
+                               InstrStage<1, [FU_NPipe]>]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Subtract (4 cycle)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract (4 cycle)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Count
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
+
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Move Immediate
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
+
+  //
+  // Double-register FP Unary
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [8, 4, 2, 1]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<4, [FU_NPipe]>], [8, 2, 2]>,
+  //
+  // Double-register Permute
+  InstrItinData,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NLSPipe]>], [4, 4, 1, 1]>,
+
+  //
+  // Double-register VEXT
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 2, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 2, 2, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 2, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<3, [FU_NPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<2, [FU_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;

Removed: llvm/trunk/lib/Target/ARM/ARMScheduleV7.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV7.td?rev=100671&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleV7.td (removed)
@@ -1,1339 +0,0 @@
-//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines the itinerary class data for the ARM v7 processors.
-//
-//===----------------------------------------------------------------------===//
-
-//
-// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
-//
-// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
-//
-def CortexA8Itineraries : ProcessorItineraries<[
-
-  // Two fully-pipelined integer ALU pipelines
-  //
-  // No operand cycles
-  InstrItinData]>,
-  //
-  // Binary Instructions that produce a result
-  InstrItinData], [2, 2]>,
-  InstrItinData], [2, 2, 2]>,
-  InstrItinData], [2, 2, 1]>,
-  InstrItinData], [2, 2, 1, 1]>,
-  //
-  // Unary Instructions that produce a result
-  InstrItinData], [2, 2]>,
-  InstrItinData], [2, 1]>,
-  InstrItinData], [2, 1, 1]>,
-  //
-  // Compare instructions
-  InstrItinData], [2]>,
-  InstrItinData], [2, 2]>,
-  InstrItinData], [2, 1]>,
-  InstrItinData], [2, 1, 1]>,
-  //
-  // Move instructions, unconditional
-  InstrItinData], [1]>,
-  InstrItinData], [1, 1]>,
-  InstrItinData], [1, 1]>,
-  InstrItinData], [1, 1, 1]>,
-  //
-  // Move instructions, conditional
-  InstrItinData], [2]>,
-  InstrItinData], [2, 1]>,
-  InstrItinData], [2, 1]>,
-  InstrItinData], [2, 1, 1]>,
-
-  // Integer multiply pipeline
-  // Result written in E5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  //
-  InstrItinData], [5, 1, 1]>,
-  InstrItinData, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>,
-  InstrItinData, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData, 
-                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
-  InstrItinData, 
-                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
-  
-  // Integer load pipeline
-  //
-  // loads have an extra cycle of latency, but are fully pipelined
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  //
-  // Immediate offset
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
-  //
-  // Register offset
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Scaled register offset, issues over 2 cycles
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>,
-  //
-  // Register offset with update
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>,
-  //
-  // Scaled register offset with update, issues over 2 cycles
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>,
-  //
-  // Load multiple
-  InstrItinData,
-                                InstrStage<2, [FU_Pipe0], 0>,
-                                InstrStage<2, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>]>,
-
-  // Integer store pipeline
-  //
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  //
-  // Immediate offset
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
-  //
-  // Register offset
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Scaled register offset, issues over 2 cycles
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>,
-  //
-  // Register offset with update
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>,
-  //
-  // Scaled register offset with update, issues over 2 cycles
-  InstrItinData,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>,
-  //
-  // Store multiple
-  InstrItinData,
-                                InstrStage<2, [FU_Pipe0], 0>,
-                                InstrStage<2, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>]>,
-  
-  // Branch
-  //
-  // no delay slots, so the latency of a branch is unimportant
-  InstrItinData]>,
-
-  // VFP
-  // Issue through integer pipeline, and execute in NEON unit. We assume
-  // RunFast mode so that NFP pipeline is used for single-precision when
-  // possible.
-  //
-  // FP Special Register to Integer Register File Move
-  InstrItinData,
-                              InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Single-precision FP Unary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Double-precision FP Unary
-  InstrItinData,
-                               InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
-  //
-  // Single-precision FP Compare
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Double-precision FP Compare
-  InstrItinData,
-                               InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
-  //
-  // Single to Double FP Convert
-  InstrItinData,
-                               InstrStage<7, [FU_NPipe], 0>,
-                               InstrStage<7, [FU_NLSPipe]>], [7, 1]>,
-  //
-  // Double to Single FP Convert
-  InstrItinData,
-                               InstrStage<5, [FU_NPipe], 0>,
-                               InstrStage<5, [FU_NLSPipe]>], [5, 1]>,
-  //
-  // Single-Precision FP to Integer Convert
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Double-Precision FP to Integer Convert
-  InstrItinData,
-                               InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
-  //
-  // Integer to Single-Precision FP Convert
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Integer to Double-Precision FP Convert
-  InstrItinData,
-                               InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
-  //
-  // Single-precision FP ALU
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
-  //
-  // Double-precision FP ALU
-  InstrItinData,
-                               InstrStage<9, [FU_NPipe], 0>,
-                               InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>,
-  //
-  // Single-precision FP Multiply
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
-  //
-  // Double-precision FP Multiply
-  InstrItinData,
-                               InstrStage<11, [FU_NPipe], 0>,
-                               InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>,
-  //
-  // Single-precision FP MAC
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>,
-  //
-  // Double-precision FP MAC
-  InstrItinData,
-                               InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>,
-  //
-  // Single-precision FP DIV
-  InstrItinData,
-                               InstrStage<20, [FU_NPipe], 0>,
-                               InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>,
-  //
-  // Double-precision FP DIV
-  InstrItinData,
-                               InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>,
-  //
-  // Single-precision FP SQRT
-  InstrItinData,
-                               InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>], [19, 1]>,
-  //
-  // Double-precision FP SQRT
-  InstrItinData,
-                               InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>], [29, 1]>,
-  //
-  // Single-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0], 0>,
-                               InstrStage<1, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // FP Load Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData, 
-                               InstrStage<2, [FU_Pipe0], 0>,
-                               InstrStage<2, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Single-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0], 0>,
-                               InstrStage<1, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // FP Store Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData, 
-                               InstrStage<2, [FU_Pipe0], 0>,
-                               InstrStage<2, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-
-  // NEON
-  // Issue through integer pipeline, and execute in NEON unit.
-  //
-  // VLD1
-  // FIXME: We don't model this instruction properly
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // VLD2
-  // FIXME: We don't model this instruction properly
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>,
-  //
-  // VLD3
-  // FIXME: We don't model this instruction properly
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>,
-  //
-  // VLD4
-  // FIXME: We don't model this instruction properly
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>,
-  //
-  // VST
-  // FIXME: We don't model this instruction properly
-  InstrItinData, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-register FP Unary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
-  //
-  // Quad-register FP Unary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
-  //
-  // Double-register FP Binary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
-  //
-  // Quad-register FP Binary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Move Immediate
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [3]>,
-  //
-  // Double-register Permute Move
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
-  //
-  // Quad-register Permute Move
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1]>,
-  //
-  // Integer to Single-precision Move
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>], [20, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>,
-  //
-  // Integer to Lane Move
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
-  //
-  // Double-register Permute
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>,
-  //
-  // Quad-register Permute
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>,
-  //
-  // Quad-register Permute (3 cycle issue)
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>,
-  //
-  // Double-register FP Multiple-Accumulate
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [9, 3, 2, 2]>,
-  //
-  // Quad-register FP Multiple-Accumulate
-  // Result written in N9, but that is relative to the last cycle of multicycle,
-  // so we use 10 for those cases
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [10, 3, 2, 2]>,
-  //
-  // Double-register Reciprical Step
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [9, 2, 2]>,
-  //
-  // Quad-register Reciprical Step
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [10, 2, 2]>,
-  //
-  // Double-register Integer Count
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Count
-  // Result written in N3, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
-  //
-  // Double-register Integer Unary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Quad-register Integer Unary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Double-register Integer Q-Unary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Quad-register Integer CountQ-Unary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Double-register Integer Binary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Binary
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Double-register Integer Binary (4 cycle)
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Quad-register Integer Binary (4 cycle)
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-
-  //
-  // Double-register Integer Subtract
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Quad-register Integer Subtract
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Double-register Integer Subtract
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Quad-register Integer Subtract
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Double-register Integer Shift
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
-  //
-  // Quad-register Integer Shift
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Double-register Integer Shift (4 cycle)
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Quad-register Integer Shift (4 cycle)
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [5, 1, 1]>,
-  //
-  // Double-register Integer Pair Add Long
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
-  //
-  // Quad-register Integer Pair Add Long
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 1]>,
-  //
-  // Double-register Absolute Difference and Accumulate
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
-  //
-  // Quad-register Absolute Difference and Accumulate
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
-
-  //
-  // Double-register Integer Multiply (.8, .16)
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Double-register Integer Multiply (.32)
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
-  //
-  // Quad-register Integer Multiply (.8, .16)
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
-  //
-  // Quad-register Integer Multiply (.32)
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>,
-                               InstrStage<2, [FU_NLSPipe], 0>,
-                               InstrStage<3, [FU_NPipe]>], [9, 2, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData,
-                               InstrStage<1, [FU_NPipe]>,
-                               InstrStage<2, [FU_NLSPipe], 0>,
-                               InstrStage<3, [FU_NPipe]>], [9, 3, 2, 1]>,
-  //
-  // Double-register VEXT
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
-  //
-  // Quad-register VEXT
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
-  //
-  // VTB
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>,
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>,
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>,
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
-  //
-  // VTBX
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>,
-  InstrItinData,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>,
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
-  InstrItinData,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
-]>;
-
-//
-// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
-// Reference Manual".
-//
-// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
-//
-def CortexA9Itineraries : ProcessorItineraries<[
-  // VFP and NEON shares the same register file. This means that every VFP
-  // instruction should wait for full completion of the consecutive NEON
-  // instruction and vice-versa. We model this behavior with two artificial FUs:
-  // DRegsVFP and DRegsVFP.
-  //
-  // Every VFP instruction:
-  //  - Acquires DRegsVFP resource for 1 cycle
-  //  - Reserves DRegsN resource for the whole duration (including time to
-  //    register file writeback!).
-  // Every NEON instruction does the same but with FUs swapped.
-  //
-  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
-  // stalls.
-
-  // VFP
-  // Issue through integer pipeline, and execute in NEON unit.
-
-  // FP Special Register to Integer Register File Move
-  InstrItinData,
-                              InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                              InstrStage<1, [FU_NPipe]>]>,
-  //
-  // Single-precision FP Unary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 2 cycles
-                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Double-precision FP Unary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 2 cycles
-                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-
-  //
-  // Single-precision FP Compare
-  InstrItinData,
-                               // Extra latency cycles since wbck is 4 cycles
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Double-precision FP Compare
-  InstrItinData,
-                               // Extra latency cycles since wbck is 4 cycles
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Single to Double FP Convert
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Double to Single FP Convert
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-
-  //
-  // Single to Half FP Convert
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Half to Single FP Convert
-  InstrItinData,
-                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
-
-  //
-  // Single-Precision FP to Integer Convert
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Double-Precision FP to Integer Convert
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Integer to Single-Precision FP Convert
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Integer to Double-Precision FP Convert
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Single-precision FP ALU
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Double-precision FP ALU
-  InstrItinData,
-                               InstrStage<5, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Single-precision FP Multiply
-  InstrItinData,
-                               InstrStage<6, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
-  //
-  // Double-precision FP Multiply
-  InstrItinData,
-                               InstrStage<7, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
-  //
-  // Single-precision FP MAC
-  InstrItinData,
-                               InstrStage<9, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
-  //
-  // Double-precision FP MAC
-  InstrItinData,
-                               InstrStage<10, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2,  [FU_NPipe]>], [9, 0, 1, 1]>,
-  //
-  // Single-precision FP DIV
-  InstrItinData,
-                               InstrStage<16, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
-  //
-  // Double-precision FP DIV
-  InstrItinData,
-                               InstrStage<26, [FU_DRegsN],  0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
-  //
-  // Single-precision FP SQRT
-  InstrItinData,
-                               InstrStage<18, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,   [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<13,  [FU_NPipe]>], [17, 1]>,
-  //
-  // Double-precision FP SQRT
-  InstrItinData,
-                               InstrStage<33, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<28, [FU_NPipe]>], [32, 1]>,
-
-  //
-  // Integer to Single-precision Move
-  InstrItinData,
-                               // Extra 1 latency cycle since wbck is 2 cycles
-                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData,
-                               // Extra 1 latency cycle since wbck is 2 cycles
-                               InstrStage<3, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
-  //
-  // Single-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  //
-  // Double-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  //
-  // FP Load Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  //
-  // Single-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  //
-  // Double-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  //
-  // FP Store Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData,
-                               InstrStage<2, [FU_DRegsN],   0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  // NEON
-  // Issue through integer pipeline, and execute in NEON unit.
-  // FIXME: Neon pipeline and LdSt unit are multiplexed. 
-  //        Add some syntactic sugar to model this!
-  // VLD1
-  // FIXME: We don't model this instruction properly
-  InstrItinData,
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  //
-  // VLD2
-  // FIXME: We don't model this instruction properly
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
-  //
-  // VLD3
-  // FIXME: We don't model this instruction properly
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>], [2, 2, 2, 1]>,
-  //
-  // VLD4
-  // FIXME: We don't model this instruction properly
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>], [2, 2, 2, 2, 1]>,
-  //
-  // VST
-  // FIXME: We don't model this instruction properly
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NPipe]>]>,
-  //
-  // Double-register Integer Unary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Quad-register Integer Unary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Double-register Integer Q-Unary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Quad-register Integer CountQ-Unary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Double-register Integer Binary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Binary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Double-register Integer Subtract
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Quad-register Integer Subtract
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Double-register Integer Shift
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
-  //
-  // Quad-register Integer Shift
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
-  //
-  // Double-register Integer Shift (4 cycle)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Quad-register Integer Shift (4 cycle)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Double-register Integer Binary (4 cycle)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
-  //
-  // Quad-register Integer Binary (4 cycle)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
-  //
-  // Double-register Integer Subtract (4 cycle)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Quad-register Integer Subtract (4 cycle)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-
-  //
-  // Double-register Integer Count
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Count
-  // Result written in N3, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
-  //
-  // Double-register Absolute Difference and Accumulate
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
-  //
-  // Quad-register Absolute Difference and Accumulate
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
-  //
-  // Double-register Integer Pair Add Long
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
-  //
-  // Quad-register Integer Pair Add Long
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 3, 1]>,
-
-  //
-  // Double-register Integer Multiply (.8, .16)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Quad-register Integer Multiply (.8, .16)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
-
-  //
-  // Double-register Integer Multiply (.32)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
-  //
-  // Quad-register Integer Multiply (.32)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 9 cycles
-                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe]>], [9, 2, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData,
-                               // Extra latency cycles since wbck is 9 cycles
-                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>,
-  //
-  // Move Immediate
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3]>,
-  //
-  // Double-register Permute Move
-  InstrItinData,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
-  //
-  // Quad-register Permute Move
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 1]>,
-  //
-  // Integer to Single-precision Move
-  InstrItinData,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
-  //
-  // Integer to Lane Move
-  InstrItinData,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
-
-  //
-  // Double-register FP Unary
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
-  //
-  // Quad-register FP Unary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
-  //
-  // Double-register FP Binary
-  // FIXME: We're using this itin for many instructions and [2, 2] here is too
-  // optimistic.
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
-  //
-  // Quad-register FP Binary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  // FIXME: We're using this itin for many instructions and [2, 2] here is too
-  // optimistic.
-  InstrItinData,
-                               // Extra latency cycles since wbck is 8 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Double-register FP Multiple-Accumulate
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
-  //
-  // Quad-register FP Multiple-Accumulate
-  // Result written in N9, but that is relative to the last cycle of multicycle,
-  // so we use 10 for those cases
-  InstrItinData,
-                               // Extra latency cycles since wbck is 9 cycles
-                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe]>], [8, 4, 2, 1]>,
-  //
-  // Double-register Reciprical Step
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Quad-register Reciprical Step
-  InstrItinData,
-                               // Extra latency cycles since wbck is 9 cycles
-                               InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe]>], [8, 2, 2]>,
-  //
-  // Double-register Permute
-  InstrItinData,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 2, 1, 1]>,
-  //
-  // Quad-register Permute
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 3, 1, 1]>,
-  //
-  // Quad-register Permute (3 cycle issue)
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData,
-                               // Extra latency cycles since wbck is 8 cycles
-                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<3, [FU_NLSPipe]>], [4, 4, 1, 1]>,
-
-  //
-  // Double-register VEXT
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
-  //
-  // Quad-register VEXT
-  InstrItinData,
-                               // Extra latency cycles since wbck is 9 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
-  //
-  // VTB
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 2, 1]>,
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 2, 2, 1]>,
-  InstrItinData,
-                               // Extra latency cycles since wbck is 8 cycles
-                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 1]>,
-  InstrItinData,
-                               // Extra latency cycles since wbck is 8 cycles
-                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 3, 1]>,
-  //
-  // VTBX
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 1, 2, 1]>,
-  InstrItinData,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [3, 1, 2, 2, 1]>,
-  InstrItinData,
-                               // Extra latency cycles since wbck is 8 cycles
-                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<3, [FU_NPipe]>], [4, 1, 2, 2, 3, 1]>,
-  InstrItinData,
-                               // Extra latency cycles since wbck is 8 cycles
-                               InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
-]>;
-
-




From asl at math.spbu.ru  Wed Apr  7 13:23:27 2010
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Wed, 07 Apr 2010 18:23:27 -0000
Subject: [llvm-commits] [llvm] r100673 - in /llvm/trunk:
 include/llvm/Target/TargetMachine.h lib/CodeGen/LLVMTargetMachine.cpp
 lib/Target/ARM/ARM.h lib/Target/ARM/ARMGlobalMerge.cpp
 lib/Target/ARM/ARMTargetMachine.cpp lib/Target/ARM/ARMTargetMachine.h
Message-ID: <20100407182327.48D992A6C12C@llvm.org>

Author: asl
Date: Wed Apr  7 13:23:27 2010
New Revision: 100673

URL: http://llvm.org/viewvc/llvm-project?rev=100673&view=rev
Log:
Remove late ARM codegen optimization pass committed by accident.
It is not ready for public yet.

Removed:
    llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp
Modified:
    llvm/trunk/include/llvm/Target/TargetMachine.h
    llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
    llvm/trunk/lib/Target/ARM/ARM.h
    llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
    llvm/trunk/lib/Target/ARM/ARMTargetMachine.h

Modified: llvm/trunk/include/llvm/Target/TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetMachine.h?rev=100673&r1=100672&r2=100673&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetMachine.h (original)
+++ llvm/trunk/include/llvm/Target/TargetMachine.h Wed Apr  7 13:23:27 2010
@@ -264,15 +264,10 @@
                                           bool DisableVerify = true);
   
   /// Target-Independent Code Generator Pass Configuration Options.
-
-  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
-  /// passes (which are run just before instruction selector).
-  virtual bool addPreISel(PassManagerBase &, CodeGenOpt::Level) {
-    return true;
-  }
-
-  /// addInstSelector - This method should install an instruction selector pass,
-  /// which converts from LLVM code to machine instructions.
+  
+  /// addInstSelector - This method should add any "last minute" LLVM->LLVM
+  /// passes, then install an instruction selector pass, which converts from
+  /// LLVM code to machine instructions.
   virtual bool addInstSelector(PassManagerBase &, CodeGenOpt::Level) {
     return true;
   }

Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=100673&r1=100672&r2=100673&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original)
+++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Wed Apr  7 13:23:27 2010
@@ -268,8 +268,6 @@
 
   PM.add(createStackProtectorPass(getTargetLowering()));
 
-  addPreISel(PM, OptLevel);
-
   if (PrintISelInput)
     PM.add(createPrintFunctionPass("\n\n"
                                    "*** Final LLVM Code input to ISel ***\n",

Modified: llvm/trunk/lib/Target/ARM/ARM.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.h?rev=100673&r1=100672&r2=100673&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.h (original)
+++ llvm/trunk/lib/Target/ARM/ARM.h Wed Apr  7 13:23:27 2010
@@ -98,7 +98,6 @@
 
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
-FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createNEONPreAllocPass();
 FunctionPass *createNEONMoveFixPass();

Removed: llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp?rev=100672&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMGlobalMerge.cpp (removed)
@@ -1,149 +0,0 @@
-//===-- ARMGlobalMerge.cpp - Internal globals merging  --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-global-merge"
-#include "ARM.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-using namespace llvm;
-
-namespace {
-  class VISIBILITY_HIDDEN ARMGlobalMerge : public FunctionPass {
-    /// TLI - Keep a pointer of a TargetLowering to consult for determining
-    /// target type sizes.
-    const TargetLowering *TLI;
-    bool doMerge(std::vector &Globals, Module &M, bool) const;
-
-  public:
-    static char ID;             // Pass identification, replacement for typeid.
-    explicit ARMGlobalMerge(const TargetLowering *tli)
-      : FunctionPass(&ID), TLI(tli) {}
-
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function& F);
-
-    const char *getPassName() const {
-      return "Merge internal globals";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      FunctionPass::getAnalysisUsage(AU);
-    }
-
-    struct GlobalCmp {
-      const TargetData *TD;
-
-      GlobalCmp(const TargetData *td):
-        TD(td) { };
-
-      bool operator() (const GlobalVariable* GV1,
-                       const GlobalVariable* GV2) {
-        const Type* Ty1 = cast(GV1->getType())->getElementType();
-        const Type* Ty2 = cast(GV2->getType())->getElementType();
-
-        return (TD->getTypeAllocSize(Ty1) <
-                TD->getTypeAllocSize(Ty2));
-      }
-    };
-  };
-} // end anonymous namespace
-
-char ARMGlobalMerge::ID = 0;
-
-#define MAX_OFFSET 4095
-
-bool ARMGlobalMerge::doMerge(std::vector &Globals,
-                             Module &M, bool isConst) const {
-  const TargetData *TD = TLI->getTargetData();
-
-  // FIXME: Find better heuristics
-  std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
-
-  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
-
-  for (size_t i = 0, e = Globals.size(); i != e; ) {
-    size_t j = 0;
-    uint64_t MergedSize = 0;
-    std::vector Tys;
-    std::vector Inits;
-    for (j = i; MergedSize < MAX_OFFSET && j != e; ++j) {
-      const Type* Ty = Globals[j]->getType()->getElementType();
-      Tys.push_back(Ty);
-      Inits.push_back(Globals[j]->getInitializer());
-      MergedSize += TD->getTypeAllocSize(Ty);
-    }
-
-    StructType* MergedTy = StructType::get(M.getContext(), Tys);
-    Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
-    GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
-                                                  GlobalValue::InternalLinkage,
-                                                  MergedInit, "merged");
-    for (size_t k = i; k < j; ++k) {
-      SmallVector Idx;
-      Idx.push_back(ConstantInt::get(Int32Ty, 0));
-      Idx.push_back(ConstantInt::get(Int32Ty, k-i));
-
-      Constant* GEP =
-        ConstantExpr::getInBoundsGetElementPtr(MergedGV,
-                                               &Idx[0], Idx.size());
-
-      Globals[k]->replaceAllUsesWith(GEP);
-      Globals[k]->eraseFromParent();
-    }
-    i = j;
-  }
-
-  return true;
-}
-
-
-bool ARMGlobalMerge::doInitialization(Module& M) {
-  std::vector Globals, ConstGlobals;
-  bool Changed = false;
-  const TargetData *TD = TLI->getTargetData();
-
-  // Grab all non-const globals.
-  for (Module::global_iterator I = M.global_begin(),
-         E = M.global_end(); I != E; ++I) {
-    // Ignore fancy-aligned globals for now.
-    if (I->hasLocalLinkage() && I->getAlignment() == 0 &&
-        TD->getTypeAllocSize(I->getType()) < MAX_OFFSET) {
-      if (I->isConstant())
-        ConstGlobals.push_back(I);
-      else
-        Globals.push_back(I);
-    }
-  }
-
-  Changed |= doMerge(Globals, M, false);
-  Changed |= doMerge(ConstGlobals, M, true);
-
-  return Changed;
-}
-
-bool ARMGlobalMerge::runOnFunction(Function& F) {
-  return false;
-}
-
-FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
-  return new ARMGlobalMerge(tli);
-}

Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=100673&r1=100672&r2=100673&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Wed Apr  7 13:23:27 2010
@@ -79,15 +79,9 @@
     TLInfo(*this) {
 }
 
-// Pass Pipeline Configuration
-bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
-  if (OptLevel != CodeGenOpt::None)
-    PM.add(createARMGlobalMergePass(getTargetLowering()));
 
-  return false;
-}
 
+// Pass Pipeline Configuration
 bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
                                            CodeGenOpt::Level OptLevel) {
   PM.add(createARMISelDag(*this, OptLevel));

Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.h?rev=100673&r1=100672&r2=100673&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.h Wed Apr  7 13:23:27 2010
@@ -49,7 +49,6 @@
   }
 
   // Pass Pipeline Configuration
-  virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);




From ggreif at gmail.com  Wed Apr  7 13:59:26 2010
From: ggreif at gmail.com (Gabor Greif)
Date: Wed, 07 Apr 2010 18:59:26 -0000
Subject: [llvm-commits] [llvm] r100677 -
	/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp
Message-ID: <20100407185926.6986B2A6C12C@llvm.org>

Author: ggreif
Date: Wed Apr  7 13:59:26 2010
New Revision: 100677

URL: http://llvm.org/viewvc/llvm-project?rev=100677&view=rev
Log:
fix 80-col violations

Modified:
    llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp

Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=100677&r1=100676&r2=100677&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Wed Apr  7 13:59:26 2010
@@ -143,7 +143,8 @@
 static bool SafeToDestroyConstant(const Constant *C) {
   if (isa(C)) return false;
 
-  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
+  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+       ++UI)
     if (const Constant *CU = dyn_cast(*UI)) {
       if (!SafeToDestroyConstant(CU)) return false;
     } else
@@ -158,7 +159,8 @@
 ///
 static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
                           SmallPtrSet &PHIUsers) {
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI)
     if (const ConstantExpr *CE = dyn_cast(*UI)) {
       GS.HasNonInstructionUser = true;
 
@@ -185,7 +187,8 @@
         // value, not an aggregate), keep more specific information about
         // stores.
         if (GS.StoredType != GlobalStatus::isStored) {
-          if (const GlobalVariable *GV = dyn_cast(SI->getOperand(1))){
+          if (const GlobalVariable *GV = dyn_cast(
+                                                           SI->getOperand(1))) {
             Value *StoredVal = SI->getOperand(0);
             if (StoredVal == GV->getInitializer()) {
               if (GS.StoredType < GlobalStatus::isInitializerStored)
@@ -1038,11 +1041,12 @@
 /// of a load) are simple enough to perform heap SRA on.  This permits GEP's
 /// that index through the array and struct field, icmps of null, and PHIs.
 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
-                              SmallPtrSet &LoadUsingPHIs,
-                              SmallPtrSet &LoadUsingPHIsPerLoad) {
+                        SmallPtrSet &LoadUsingPHIs,
+                        SmallPtrSet &LoadUsingPHIsPerLoad) {
   // We permit two users of the load: setcc comparing against the null
   // pointer, and a getelementptr of a specific form.
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
     const Instruction *User = cast(*UI);
     
     // Comparison against null is ok.
@@ -1093,8 +1097,8 @@
                                                     Instruction *StoredVal) {
   SmallPtrSet LoadUsingPHIs;
   SmallPtrSet LoadUsingPHIsPerLoad;
-  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
-       ++UI)
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI)
     if (const LoadInst *LI = dyn_cast(*UI)) {
       if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
                                           LoadUsingPHIsPerLoad))
@@ -1107,8 +1111,8 @@
   // that all inputs the to the PHI nodes are in the same equivalence sets. 
   // Check to verify that all operands of the PHIs are either PHIS that can be
   // transformed, loads from GV, or MI itself.
-  for (SmallPtrSet::const_iterator I = LoadUsingPHIs.begin(),
-       E = LoadUsingPHIs.end(); I != E; ++I) {
+  for (SmallPtrSet::const_iterator I = LoadUsingPHIs.begin()
+       , E = LoadUsingPHIs.end(); I != E; ++I) {
     const PHINode *PN = *I;
     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
       Value *InVal = PN->getIncomingValue(op);
@@ -1698,8 +1702,8 @@
     if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
       DEBUG(dbgs() << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n");
     if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
-      DEBUG(dbgs() << "  AccessingFunction = " << GS.AccessingFunction->getName()
-                  << "\n");
+      DEBUG(dbgs() << "  AccessingFunction = "
+                   << GS.AccessingFunction->getName() << "\n");
     DEBUG(dbgs() << "  HasMultipleAccessingFunctions =  "
                  << GS.HasMultipleAccessingFunctions << "\n");
     DEBUG(dbgs() << "  HasNonInstructionUser = " 
@@ -2509,7 +2513,7 @@
         continue;
 
       // Do not perform the transform if multiple aliases potentially target the
-      // aliasee.  This check also ensures that it is safe to replace the section
+      // aliasee. This check also ensures that it is safe to replace the section
       // and other attributes of the aliasee with those of the alias.
       if (!hasOneUse)
         continue;




From isanbard at gmail.com  Wed Apr  7 14:39:57 2010
From: isanbard at gmail.com (Bill Wendling)
Date: Wed, 07 Apr 2010 19:39:57 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r100679 - in /llvm-gcc-4.2/trunk/gcc:
 config/darwin.c config/darwin.h config/i386/darwin.h config/i386/darwin64.h
 opts.c
Message-ID: <20100407193957.D18A92A6C12C@llvm.org>

Author: void
Date: Wed Apr  7 14:39:57 2010
New Revision: 100679

URL: http://llvm.org/viewvc/llvm-project?rev=100679&view=rev
Log:
Build i386 kexts with "-mdynamic-no-pic -Wa,-dynamic". 

Modified:
    llvm-gcc-4.2/trunk/gcc/config/darwin.c
    llvm-gcc-4.2/trunk/gcc/config/darwin.h
    llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h
    llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h
    llvm-gcc-4.2/trunk/gcc/opts.c

Modified: llvm-gcc-4.2/trunk/gcc/config/darwin.c
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/darwin.c?rev=100679&r1=100678&r2=100679&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/darwin.c (original)
+++ llvm-gcc-4.2/trunk/gcc/config/darwin.c Wed Apr  7 14:39:57 2010
@@ -2712,16 +2712,9 @@
 void
 darwin_override_options (void)
 {
-  /* APPLE LOCAL begin for iframework for 4.3 4094959 */
-  /* Remove this: */
-#if 0
-  if (flag_apple_kext && strcmp (lang_hooks.name, "GNU C++") != 0)
-    {
-      warning (0, "command line option %<-fapple-kext%> is only valid for C++");
-      flag_apple_kext = 0;
-    }
-#endif
-  /* APPLE LOCAL end for iframework for 4.3 4094959 */
+  /* LLVM LOCAL begin 7563705 */
+  /* Removed. */
+  /* LLVM LOCAL begin 7563705 */
   if (flag_mkernel || flag_apple_kext)
     {
       /* -mkernel implies -fapple-kext for C++ */
@@ -2741,6 +2734,14 @@
 	  ! TARGET_SUPPORTS_KEXTABI1)
 	flag_apple_kext = 2;
       /* APPLE LOCAL end kext v2 */
+      /* LLVM LOCAL begin 7563705 */
+#ifdef ENABLE_LLVM
+      if (flag_apple_kext
+          && strverscmp (darwin_macosx_version_min, "10.6") > 0
+          && ! TARGET_64BIT)
+        target_flags |= MASK_MACHO_DYNAMIC_NO_PIC;
+#endif
+      /* LLVM LOCAL end 7563705 */
     }
   /* APPLE LOCAL begin axe stubs 5571540 */
   /* APPLE LOCAL begin ARM 5683689 */

Modified: llvm-gcc-4.2/trunk/gcc/config/darwin.h
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/darwin.h?rev=100679&r1=100678&r2=100679&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/darwin.h (original)
+++ llvm-gcc-4.2/trunk/gcc/config/darwin.h Wed Apr  7 14:39:57 2010
@@ -240,6 +240,17 @@
   darwin_init_cfstring_builtins ();	\
 } while(0)
 
+/* LLVM LOCAL begin 7563705 */
+#ifdef ENABLE_LLVM
+#define ALLOW_FAPPLE_KEXT_FLAG                                  \
+  (strcmp (lang_hooks.name, "GNU C++") == 0 ||                  \
+   (strcmp (lang_hooks.name, "GNU C") == 0 &&                   \
+    strverscmp (darwin_macosx_version_min, "10.6") > 0))
+#else
+#define ALLOW_FAPPLE_KEXT_FLAG false
+#endif
+/* LLVM LOCAL end 7563705 */
+
 #undef TARGET_EXPAND_TREE_BUILTIN
 #define TARGET_EXPAND_TREE_BUILTIN darwin_expand_tree_builtin
 #undef TARGET_CONSTRUCT_OBJC_STRING
@@ -593,10 +604,16 @@
 /* Default Darwin ASM_SPEC, very simple.  */
 /* APPLE LOCAL begin kext weak_import 5935650 */
 /* APPLE LOCAL begin radar 4161346 */
+/* LLVM LOCAL begin 7563705 */
 #define ASM_SPEC "-arch %(darwin_arch) \
-  %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL} \
-  %{!Zforce_cpusubtype_ALL:%{faltivec:-force_cpusubtype_ALL}} \
-  %{mkernel|static|fapple-kext:%{!Zdynamic:-static}}"
+  %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL}                        \
+  %{!Zforce_cpusubtype_ALL:%{faltivec:-force_cpusubtype_ALL}}           \
+  %{!m64:                                                               \
+    %{!fapple-kext:%{mkernel|static:-static}}                           \
+    %{fapple-kext:                                                      \
+      %:version-compare(>= 10.7 mmacosx-version-min= -dynamic)          \
+      %:version-compare(<  10.7 mmacosx-version-min= -static)}}"
+/* LLVM LOCAL end 7563705 */
 /* APPLE LOCAL end radar 4161346 */
 /* APPLE LOCAL end kext weak_import 5935650 */
 /* APPLE LOCAL begin mainline 4.3 2006-10-31 4370143 */

Modified: llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h?rev=100679&r1=100678&r2=100679&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h (original)
+++ llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h Wed Apr  7 14:39:57 2010
@@ -113,8 +113,14 @@
 /* APPLE LOCAL begin mainline */
 #undef ASM_SPEC
 /* APPLE LOCAL begin kext weak_import 5935650 */
+/* LLVM LOCAL begin 7563705 */
 #define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
-  %{mkernel|static|fapple-kext:%{!m64:-static}}"
+  %{!m64:                                                               \
+    %{!fapple-kext:%{mkernel|static:-static}}                           \
+    %{fapple-kext:                                                      \
+      %:version-compare(>= 10.7 mmacosx-version-min= -dynamic)          \
+      %:version-compare(<  10.7 mmacosx-version-min= -static)}}"
+/* LLVM LOCAL end 7563705 */
 /* APPLE LOCAL end kext weak_import 5935650 */
 
 #define DARWIN_ARCH_SPEC "%{m64:x86_64;:i386}"

Modified: llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h?rev=100679&r1=100678&r2=100679&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h (original)
+++ llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h Wed Apr  7 14:39:57 2010
@@ -30,8 +30,14 @@
 
 /* APPLE LOCAL begin kext 6400713 */
 #undef ASM_SPEC
-#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
-  %{mkernel|static|fapple-kext:%{m32:-static}}"
+/* LLVM LOCAL begin 7563705 */
+#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL    \
+  %{!m64:                                                       \
+    %{!fapple-kext:%{mkernel|static:-static}}                   \
+    %{fapple-kext:                                              \
+      %:version-compare(>= 10.7 mmacosx-version-min= -dynamic)  \
+      %:version-compare(<  10.7 mmacosx-version-min= -static)}}"
+/* LLVM LOCAL end 7563705 */
 /* APPLE LOCAL end kext 6400713 */
 
 #undef SUBTARGET_EXTRA_SPECS

Modified: llvm-gcc-4.2/trunk/gcc/opts.c
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/opts.c?rev=100679&r1=100678&r2=100679&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/opts.c (original)
+++ llvm-gcc-4.2/trunk/gcc/opts.c Wed Apr  7 14:39:57 2010
@@ -282,7 +282,14 @@
   /* APPLE LOCAL begin iframework for 4.3 4094959 */
   else if ((option->flags & CL_TARGET)
 	   && (option->flags & CL_LANG_ALL)
-	   && !(option->flags & lang_mask))
+           /* LLVM LOCAL begin 7563705 */
+	   && !(option->flags & lang_mask)
+#ifdef ALLOW_FAPPLE_KEXT_FLAG
+           && (strcmp (*argv, "-fapple-kext") != 0 ||
+               ! ALLOW_FAPPLE_KEXT_FLAG)
+#endif
+           )
+           /* LLVM LOCAL end 7563705 */
     {
       /* Complain for target flag language mismatches if any languages
 	 are specified.  */




From isanbard at gmail.com  Wed Apr  7 14:43:16 2010
From: isanbard at gmail.com (Bill Wendling)
Date: Wed, 07 Apr 2010 19:43:16 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r100680 - in /llvm-gcc-4.2/trunk/gcc:
 config/darwin.c config/darwin.h config/i386/darwin.h config/i386/darwin64.h
 opts.c
Message-ID: <20100407194316.7D18D2A6C12C@llvm.org>

Author: void
Date: Wed Apr  7 14:43:16 2010
New Revision: 100680

URL: http://llvm.org/viewvc/llvm-project?rev=100680&view=rev
Log:
Revert r100679. Too soon.

Modified:
    llvm-gcc-4.2/trunk/gcc/config/darwin.c
    llvm-gcc-4.2/trunk/gcc/config/darwin.h
    llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h
    llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h
    llvm-gcc-4.2/trunk/gcc/opts.c

Modified: llvm-gcc-4.2/trunk/gcc/config/darwin.c
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/darwin.c?rev=100680&r1=100679&r2=100680&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/darwin.c (original)
+++ llvm-gcc-4.2/trunk/gcc/config/darwin.c Wed Apr  7 14:43:16 2010
@@ -2712,9 +2712,16 @@
 void
 darwin_override_options (void)
 {
-  /* LLVM LOCAL begin 7563705 */
-  /* Removed. */
-  /* LLVM LOCAL begin 7563705 */
+  /* APPLE LOCAL begin for iframework for 4.3 4094959 */
+  /* Remove this: */
+#if 0
+  if (flag_apple_kext && strcmp (lang_hooks.name, "GNU C++") != 0)
+    {
+      warning (0, "command line option %<-fapple-kext%> is only valid for C++");
+      flag_apple_kext = 0;
+    }
+#endif
+  /* APPLE LOCAL end for iframework for 4.3 4094959 */
   if (flag_mkernel || flag_apple_kext)
     {
       /* -mkernel implies -fapple-kext for C++ */
@@ -2734,14 +2741,6 @@
 	  ! TARGET_SUPPORTS_KEXTABI1)
 	flag_apple_kext = 2;
       /* APPLE LOCAL end kext v2 */
-      /* LLVM LOCAL begin 7563705 */
-#ifdef ENABLE_LLVM
-      if (flag_apple_kext
-          && strverscmp (darwin_macosx_version_min, "10.6") > 0
-          && ! TARGET_64BIT)
-        target_flags |= MASK_MACHO_DYNAMIC_NO_PIC;
-#endif
-      /* LLVM LOCAL end 7563705 */
     }
   /* APPLE LOCAL begin axe stubs 5571540 */
   /* APPLE LOCAL begin ARM 5683689 */

Modified: llvm-gcc-4.2/trunk/gcc/config/darwin.h
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/darwin.h?rev=100680&r1=100679&r2=100680&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/darwin.h (original)
+++ llvm-gcc-4.2/trunk/gcc/config/darwin.h Wed Apr  7 14:43:16 2010
@@ -240,17 +240,6 @@
   darwin_init_cfstring_builtins ();	\
 } while(0)
 
-/* LLVM LOCAL begin 7563705 */
-#ifdef ENABLE_LLVM
-#define ALLOW_FAPPLE_KEXT_FLAG                                  \
-  (strcmp (lang_hooks.name, "GNU C++") == 0 ||                  \
-   (strcmp (lang_hooks.name, "GNU C") == 0 &&                   \
-    strverscmp (darwin_macosx_version_min, "10.6") > 0))
-#else
-#define ALLOW_FAPPLE_KEXT_FLAG false
-#endif
-/* LLVM LOCAL end 7563705 */
-
 #undef TARGET_EXPAND_TREE_BUILTIN
 #define TARGET_EXPAND_TREE_BUILTIN darwin_expand_tree_builtin
 #undef TARGET_CONSTRUCT_OBJC_STRING
@@ -604,16 +593,10 @@
 /* Default Darwin ASM_SPEC, very simple.  */
 /* APPLE LOCAL begin kext weak_import 5935650 */
 /* APPLE LOCAL begin radar 4161346 */
-/* LLVM LOCAL begin 7563705 */
 #define ASM_SPEC "-arch %(darwin_arch) \
-  %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL}                        \
-  %{!Zforce_cpusubtype_ALL:%{faltivec:-force_cpusubtype_ALL}}           \
-  %{!m64:                                                               \
-    %{!fapple-kext:%{mkernel|static:-static}}                           \
-    %{fapple-kext:                                                      \
-      %:version-compare(>= 10.7 mmacosx-version-min= -dynamic)          \
-      %:version-compare(<  10.7 mmacosx-version-min= -static)}}"
-/* LLVM LOCAL end 7563705 */
+  %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL} \
+  %{!Zforce_cpusubtype_ALL:%{faltivec:-force_cpusubtype_ALL}} \
+  %{mkernel|static|fapple-kext:%{!Zdynamic:-static}}"
 /* APPLE LOCAL end radar 4161346 */
 /* APPLE LOCAL end kext weak_import 5935650 */
 /* APPLE LOCAL begin mainline 4.3 2006-10-31 4370143 */

Modified: llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h?rev=100680&r1=100679&r2=100680&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h (original)
+++ llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h Wed Apr  7 14:43:16 2010
@@ -113,14 +113,8 @@
 /* APPLE LOCAL begin mainline */
 #undef ASM_SPEC
 /* APPLE LOCAL begin kext weak_import 5935650 */
-/* LLVM LOCAL begin 7563705 */
 #define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
-  %{!m64:                                                               \
-    %{!fapple-kext:%{mkernel|static:-static}}                           \
-    %{fapple-kext:                                                      \
-      %:version-compare(>= 10.7 mmacosx-version-min= -dynamic)          \
-      %:version-compare(<  10.7 mmacosx-version-min= -static)}}"
-/* LLVM LOCAL end 7563705 */
+  %{mkernel|static|fapple-kext:%{!m64:-static}}"
 /* APPLE LOCAL end kext weak_import 5935650 */
 
 #define DARWIN_ARCH_SPEC "%{m64:x86_64;:i386}"

Modified: llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h?rev=100680&r1=100679&r2=100680&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h (original)
+++ llvm-gcc-4.2/trunk/gcc/config/i386/darwin64.h Wed Apr  7 14:43:16 2010
@@ -30,14 +30,8 @@
 
 /* APPLE LOCAL begin kext 6400713 */
 #undef ASM_SPEC
-/* LLVM LOCAL begin 7563705 */
-#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL    \
-  %{!m64:                                                       \
-    %{!fapple-kext:%{mkernel|static:-static}}                   \
-    %{fapple-kext:                                              \
-      %:version-compare(>= 10.7 mmacosx-version-min= -dynamic)  \
-      %:version-compare(<  10.7 mmacosx-version-min= -static)}}"
-/* LLVM LOCAL end 7563705 */
+#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
+  %{mkernel|static|fapple-kext:%{m32:-static}}"
 /* APPLE LOCAL end kext 6400713 */
 
 #undef SUBTARGET_EXTRA_SPECS

Modified: llvm-gcc-4.2/trunk/gcc/opts.c
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/opts.c?rev=100680&r1=100679&r2=100680&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/opts.c (original)
+++ llvm-gcc-4.2/trunk/gcc/opts.c Wed Apr  7 14:43:16 2010
@@ -282,14 +282,7 @@
   /* APPLE LOCAL begin iframework for 4.3 4094959 */
   else if ((option->flags & CL_TARGET)
 	   && (option->flags & CL_LANG_ALL)
-           /* LLVM LOCAL begin 7563705 */
-	   && !(option->flags & lang_mask)
-#ifdef ALLOW_FAPPLE_KEXT_FLAG
-           && (strcmp (*argv, "-fapple-kext") != 0 ||
-               ! ALLOW_FAPPLE_KEXT_FLAG)
-#endif
-           )
-           /* LLVM LOCAL end 7563705 */
+	   && !(option->flags & lang_mask))
     {
       /* Complain for target flag language mismatches if any languages
 	 are specified.  */




From dalej at apple.com  Wed Apr  7 14:51:45 2010
From: dalej at apple.com (Dale Johannesen)
Date: Wed, 07 Apr 2010 19:51:45 -0000
Subject: [llvm-commits] [llvm] r100681 - in /llvm/trunk/lib/Target:
 ARM/ARMBaseInstrInfo.cpp MSP430/MSP430InstrInfo.cpp
 PowerPC/PPCInstrInfo.cpp X86/X86InstrInfo.cpp
Message-ID: <20100407195145.274DF2A6C12C@llvm.org>

Author: johannes
Date: Wed Apr  7 14:51:44 2010
New Revision: 100681

URL: http://llvm.org/viewvc/llvm-project?rev=100681&view=rev
Log:
Educate GetInstrSizeInBytes implementations that
DBG_VALUE does not generate code.


Modified:
    llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/trunk/lib/Target/MSP430/MSP430InstrInfo.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=100681&r1=100680&r2=100681&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Wed Apr  7 14:51:44 2010
@@ -467,6 +467,7 @@
     case TargetOpcode::KILL:
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
       return 0;
     }
     break;

Modified: llvm/trunk/lib/Target/MSP430/MSP430InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430InstrInfo.cpp?rev=100681&r1=100680&r2=100681&view=diff
==============================================================================
--- llvm/trunk/lib/Target/MSP430/MSP430InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/MSP430/MSP430InstrInfo.cpp Wed Apr  7 14:51:44 2010
@@ -365,6 +365,7 @@
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
+    case TargetOpcode::DBG_VALUE:
       return 0;
     case TargetOpcode::INLINEASM: {
       const MachineFunction *MF = MI->getParent()->getParent();

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=100681&r1=100680&r2=100681&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Wed Apr  7 14:51:44 2010
@@ -778,6 +778,7 @@
   case PPC::DBG_LABEL:
   case PPC::EH_LABEL:
   case PPC::GC_LABEL:
+  case PPC::DBG_VALUE:
     return 0;
   default:
     return 4; // PowerPC instructions are all 4 bytes

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=100681&r1=100680&r2=100681&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Apr  7 14:51:44 2010
@@ -3406,6 +3406,7 @@
     }
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
       break;
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:




From dalej at apple.com  Wed Apr  7 15:01:25 2010
From: dalej at apple.com (Dale Johannesen)
Date: Wed, 07 Apr 2010 20:01:25 -0000
Subject: [llvm-commits] [llvm] r100682 -
 /llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll
Message-ID: <20100407200125.21F392A6C12C@llvm.org>

Author: johannes
Date: Wed Apr  7 15:01:24 2010
New Revision: 100682

URL: http://llvm.org/viewvc/llvm-project?rev=100682&view=rev
Log:
Test that DEBUG_VALUE comments come out on a variety of targets.


Added:
    llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll

Added: llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll?rev=100682&view=auto
==============================================================================
--- llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:01:24 2010
@@ -0,0 +1,45 @@
+; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}




From stoklund at 2pi.dk  Wed Apr  7 15:12:43 2010
From: stoklund at 2pi.dk (Jakob Stoklund Olesen)
Date: Wed, 07 Apr 2010 20:12:43 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r100683 -
	/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp
Message-ID: <20100407201243.982A12A6C12C@llvm.org>

Author: stoklund
Date: Wed Apr  7 15:12:43 2010
New Revision: 100683

URL: http://llvm.org/viewvc/llvm-project?rev=100683&view=rev
Log:
Revert "Create larger struct fields for consecutive bitfields."

This reverts commit 100632, it broke a buildbot.

Modified:
    llvm-gcc-4.2/trunk/gcc/llvm-types.cpp

Modified: llvm-gcc-4.2/trunk/gcc/llvm-types.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp?rev=100683&r1=100682&r2=100683&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/gcc/llvm-types.cpp (original)
+++ llvm-gcc-4.2/trunk/gcc/llvm-types.cpp Wed Apr  7 15:12:43 2010
@@ -1620,8 +1620,7 @@
     return ~0U;
   }
 
-  void addNewBitField(uint64_t Size, uint64_t Extra,
-                      uint64_t FirstUnallocatedByte);
+  void addNewBitField(uint64_t Size, uint64_t FirstUnallocatedByte);
 
   void dump() const;
 };
@@ -1629,43 +1628,22 @@
 // Add new element which is a bit field. Size is not the size of bit field,
 // but size of bits required to determine type of new Field which will be
 // used to access this bit field.
-// If possible, allocate a field with room for Size+Extra bits.
-void StructTypeConversionInfo::addNewBitField(uint64_t Size, uint64_t Extra,
+void StructTypeConversionInfo::addNewBitField(uint64_t Size,
                                               uint64_t FirstUnallocatedByte) {
 
   // Figure out the LLVM type that we will use for the new field.
   // Note, Size is not necessarily size of the new field. It indicates
   // additional bits required after FirstunallocatedByte to cover new field.
-  const Type *NewFieldTy = 0;
-
-  // First try an ABI-aligned field including (some of) the Extra bits.
-  // This field must satisfy Size <= w && w <= XSize.
-  uint64_t XSize = RoundUpToAlignment(Size + Extra, 8);
-  for (unsigned w = NextPowerOf2(std::min(UINT64_C(64), XSize)/2);
-       w >= Size && w >= 8; w /= 2) {
-    if (TD.isIllegalInteger(w))
-      continue;
-    // Would a w-sized integer field be aligned here?
-    const unsigned a = TD.getABIIntegerTypeAlignment(w);
-    if (FirstUnallocatedByte & (a-1) || a > getGCCStructAlignmentInBytes())
-      continue;
-    // OK, use w-sized integer.
-    NewFieldTy = IntegerType::get(Context, w);
-    break;
-  }
-
-  // Try an integer field that holds Size bits.
-  if (!NewFieldTy) {
-    if (Size <= 8)
-      NewFieldTy = Type::getInt8Ty(Context);
-    else if (Size <= 16)
-      NewFieldTy = Type::getInt16Ty(Context);
-    else if (Size <= 32)
-      NewFieldTy = Type::getInt32Ty(Context);
-    else {
-      assert(Size <= 64 && "Bitfield too large!");
-      NewFieldTy = Type::getInt64Ty(Context);
-    }
+  const Type *NewFieldTy;
+  if (Size <= 8)
+    NewFieldTy = Type::getInt8Ty(Context);
+  else if (Size <= 16)
+    NewFieldTy = Type::getInt16Ty(Context);
+  else if (Size <= 32)
+    NewFieldTy = Type::getInt32Ty(Context);
+  else {
+    assert(Size <= 64 && "Bitfield too large!");
+    NewFieldTy = Type::getInt64Ty(Context);
   }
 
   // Check that the alignment of NewFieldTy won't cause a gap in the structure!
@@ -2009,19 +1987,7 @@
   // LLVM struct such that there are no holes in the struct where the bitfield
   // is: these holes would make it impossible to statically initialize a global
   // of this type that has an initializer for the bitfield.
-
-  // We want the integer-typed fields as large as possible up to the machine
-  // word size. If there are more bitfields following this one, try to include
-  // them in the same field.
-
-  // Calculate the total number of bits in the continuous group of bitfields
-  // following this one. This is the number of bits that addNewBitField should
-  // try to include.
-  unsigned ExtraSizeInBits = 0;
-  for (tree f = TREE_CHAIN(Field); f && ExtraSizeInBits < 64 && isBitfield(f);
-       f = TREE_CHAIN(f))
-    ExtraSizeInBits += TREE_INT_CST_LOW(DECL_SIZE(f));
-
+  
   // Compute the number of bits that we need to add to this struct to cover
   // this field.
   uint64_t FirstUnallocatedByte = Info.getEndUnallocatedByte();
@@ -2035,7 +2001,7 @@
       // This field starts at byte boundry. Need to allocate space
       // for additional bytes not yet allocated.
       unsigned NumBitsToAdd = FieldSizeInBits - AvailableBits;
-      Info.addNewBitField(NumBitsToAdd, ExtraSizeInBits, FirstUnallocatedByte);
+      Info.addNewBitField(NumBitsToAdd, FirstUnallocatedByte);
       return;
     }
 
@@ -2059,7 +2025,7 @@
       for (unsigned idx = 0; idx < (prevFieldTypeSizeInBits/8); ++idx)
 	FirstUnallocatedByte--;
     }
-    Info.addNewBitField(NumBitsRequired, ExtraSizeInBits, FirstUnallocatedByte);
+    Info.addNewBitField(NumBitsRequired, FirstUnallocatedByte);
     // Do this after adding Field.
     Info.lastFieldStartsAtNonByteBoundry(true);
     return;
@@ -2093,7 +2059,7 @@
   }
 
   // Now, Field starts at FirstUnallocatedByte and everything is aligned.
-  Info.addNewBitField(FieldSizeInBits, ExtraSizeInBits, FirstUnallocatedByte);
+  Info.addNewBitField(FieldSizeInBits, FirstUnallocatedByte);
 }
 
 /// UnionHasOnlyZeroOffsets - Check if a union type has only members with
@@ -2165,7 +2131,7 @@
 
     if (isBitfield(UnionField)) {
       unsigned FieldSizeInBits = TREE_INT_CST_LOW(DECL_SIZE(UnionField));
-      Info.addNewBitField(FieldSizeInBits, 0, 0);
+      Info.addNewBitField(FieldSizeInBits, 0);
     } else {
       Info.allFieldsAreNotBitFields();
       Info.addElement(UnionTy, 0, Info.getTypeSize(UnionTy));




From clattner at apple.com  Wed Apr  7 15:21:49 2010
From: clattner at apple.com (Chris Lattner)
Date: Wed, 7 Apr 2010 13:21:49 -0700
Subject: [llvm-commits] [llvm] r100682 -
	/llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll
In-Reply-To: <20100407200125.21F392A6C12C@llvm.org>
References: <20100407200125.21F392A6C12C@llvm.org>
Message-ID: 


On Apr 7, 2010, at 1:01 PM, Dale Johannesen wrote:

> Author: johannes
> Date: Wed Apr  7 15:01:24 2010
> New Revision: 100682
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=100682&view=rev
> Log:
> Test that DEBUG_VALUE comments come out on a variety of targets.

This won't work if you've configured llvm for a specific subset of possible targets.  The tests have to go in the test/CodeGen/foo directory.

-Chris

> 
> 
> Added:
>    llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll
> 
> Added: llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll?rev=100682&view=auto
> ==============================================================================
> --- llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll (added)
> +++ llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:01:24 2010
> @@ -0,0 +1,45 @@
> +; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
> +; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
> +; Check that DEBUG_VALUE comments come through on a variety of targets.
> +
> +%tart.reflect.ComplexType = type { double, double }
> +
> + at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
> +
> +define i32 @"main(tart.core.String[])->int32"(i32 %args) {
> +entry:
> +; CHECK: DEBUG_VALUE
> +  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
> +  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
> +  ret i32 3
> +}
> +
> +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
> +declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
> +
> +!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
> +!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
> +!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
> +!3 = metadata !{metadata !4, metadata !6, metadata !7}
> +!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
> +!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
> +!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
> +!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
> +!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
> +!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
> +!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
> +!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
> +!12 = metadata !{metadata !13}
> +!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
> +!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




From scallanan at apple.com  Wed Apr  7 15:29:34 2010
From: scallanan at apple.com (Sean Callanan)
Date: Wed, 07 Apr 2010 20:29:34 -0000
Subject: [llvm-commits] [llvm] r100685 - in
 /llvm/trunk/lib/Target/ARM/AsmParser: ARMAsmLexer.cpp ARMAsmParser.cpp
Message-ID: <20100407202934.B83662A6C12C@llvm.org>

Author: spyffe
Date: Wed Apr  7 15:29:34 2010
New Revision: 100685

URL: http://llvm.org/viewvc/llvm-project?rev=100685&view=rev
Log:
Added an AsmLexer for the ARM target, which uses
a simple mapping of register names to IDs to
identify register tokens.

Added:
    llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
Modified:
    llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

Added: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp?rev=100685&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp (added)
+++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp Wed Apr  7 15:29:34 2010
@@ -0,0 +1,140 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include 
+#include 
+
+using namespace llvm;
+
+namespace {
+  
+  class ARMBaseAsmLexer : public TargetAsmLexer {
+    const MCAsmInfo &AsmInfo;
+    
+    const AsmToken &lexDefinite() {
+      return getLexer()->Lex();
+    }
+    
+    AsmToken LexTokenUAL();
+  protected:
+    typedef std::map  rmap_ty;
+    
+    rmap_ty RegisterMap;
+    
+    void InitRegisterMap(const TargetRegisterInfo *info) {
+      unsigned numRegs = info->getNumRegs();
+
+      for (unsigned i = 0; i < numRegs; ++i) {
+        const char *regName = info->getName(i);
+        if (regName)
+          RegisterMap[regName] = i;
+      }
+    }
+    
+    unsigned MatchRegisterName(StringRef Name) {
+      rmap_ty::iterator iter = RegisterMap.find(Name.str());
+      if (iter != RegisterMap.end())
+        return iter->second;
+      else
+        return 0;
+    }
+    
+    AsmToken LexToken() {
+      if (!Lexer) {
+        SetError(SMLoc(), "No MCAsmLexer installed");
+        return AsmToken(AsmToken::Error, "", 0);
+      }
+      
+      switch (AsmInfo.getAssemblerDialect()) {
+      default:
+        SetError(SMLoc(), "Unhandled dialect");
+        return AsmToken(AsmToken::Error, "", 0);
+      case 0:
+        return LexTokenUAL();
+      }
+    }
+  public:
+    ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : TargetAsmLexer(T), AsmInfo(MAI) {
+    }
+  };
+  
+  class ARMAsmLexer : public ARMBaseAsmLexer {
+  public:
+    ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : ARMBaseAsmLexer(T, MAI) {
+      std::string tripleString("arm-unknown-unknown");
+      std::string featureString;
+      OwningPtr 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+  
+  class ThumbAsmLexer : public ARMBaseAsmLexer {
+  public:
+    ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : ARMBaseAsmLexer(T, MAI) {
+      std::string tripleString("thumb-unknown-unknown");
+      std::string featureString;
+      OwningPtr 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+}
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Identifier:
+  {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID) {
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast(regID));
+    } else {
+      return AsmToken(lexedToken);
+    }
+  }
+  }
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+  RegisterAsmLexer X(TheARMTarget);
+  RegisterAsmLexer Y(TheThumbTarget);
+}
+

Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=100685&r1=100684&r2=100685&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Wed Apr  7 15:29:34 2010
@@ -812,8 +812,11 @@
   return false;
 }
 
+extern "C" void LLVMInitializeARMAsmLexer();
+
 /// Force static initialization.
 extern "C" void LLVMInitializeARMAsmParser() {
   RegisterAsmParser X(TheARMTarget);
   RegisterAsmParser Y(TheThumbTarget);
+  LLVMInitializeARMAsmLexer();
 }




From dalej at apple.com  Wed Apr  7 15:43:35 2010
From: dalej at apple.com (Dale Johannesen)
Date: Wed, 07 Apr 2010 20:43:35 -0000
Subject: [llvm-commits] [llvm] r100688 - in /llvm/trunk/test: CodeGen/ARM/
 CodeGen/Alpha/ CodeGen/CellSPU/ CodeGen/MBlaze/ CodeGen/MSP430/
 CodeGen/Mips/ CodeGen/PowerPC/ CodeGen/SPARC/ CodeGen/SystemZ/
 CodeGen/Thumb/ CodeGen/X86/ CodeGen/XCore/ DebugInfo/
Message-ID: <20100407204335.476A92A6C12C@llvm.org>

Author: johannes
Date: Wed Apr  7 15:43:35 2010
New Revision: 100688

URL: http://llvm.org/viewvc/llvm-project?rev=100688&view=rev
Log:
Split big test into multiple directories to cater to
those who don't build all targets.


Added:
    llvm/trunk/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
    llvm/trunk/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
Removed:
    llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll

Added: llvm/trunk/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,34 @@
+; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Added: llvm/trunk/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll?rev=100688&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll (added)
+++ llvm/trunk/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll Wed Apr  7 15:43:35 2010
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+%tart.reflect.ComplexType = type { double, double }
+
+ at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+; CHECK: DEBUG_VALUE
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

Removed: llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll?rev=100687&view=auto
==============================================================================
--- llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll (original)
+++ llvm/trunk/test/DebugInfo/2010-04-07-DbgValueOtherTargets.ll (removed)
@@ -1,45 +0,0 @@
-; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-%tart.reflect.ComplexType = type { double, double }
-
- at .type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
-entry:
-; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
-
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}




From johnny.chen at apple.com  Wed Apr  7 15:53:13 2010
From: johnny.chen at apple.com (Johnny Chen)
Date: Wed, 07 Apr 2010 20:53:13 -0000
Subject: [llvm-commits] [llvm] r100690 - in /llvm/trunk:
 lib/Target/ARM/Disassembler/Makefile lib/Target/ARM/Makefile
 utils/TableGen/ARMDecoderEmitter.cpp
Message-ID: <20100407205313.2115D2A6C12C@llvm.org>

Author: johnny
Date: Wed Apr  7 15:53:12 2010
New Revision: 100690

URL: http://llvm.org/viewvc/llvm-project?rev=100690&view=rev
Log:
Re-enable ARM/Thumb disassembler and add a workaround for a memcpy() call in
ARMDecoderEmitter.cpp, with FIXME comment.

Added:
    llvm/trunk/lib/Target/ARM/Disassembler/Makefile
Modified:
    llvm/trunk/lib/Target/ARM/Makefile
    llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp

Added: llvm/trunk/lib/Target/ARM/Disassembler/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/Makefile?rev=100690&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/Makefile (added)
+++ llvm/trunk/lib/Target/ARM/Disassembler/Makefile Wed Apr  7 15:53:12 2010
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDisassembler
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common

Modified: llvm/trunk/lib/Target/ARM/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Makefile?rev=100690&r1=100689&r2=100690&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Makefile (original)
+++ llvm/trunk/lib/Target/ARM/Makefile Wed Apr  7 15:53:12 2010
@@ -16,8 +16,9 @@
                 ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
                 ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
                 ARMGenDAGISel.inc ARMGenSubtarget.inc \
-                ARMGenCodeEmitter.inc ARMGenCallingConv.inc
+                ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+                ARMGenDecoderTables.inc
 
-DIRS = AsmPrinter AsmParser TargetInfo
+DIRS = AsmPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common

Modified: llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp?rev=100690&r1=100689&r2=100690&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp Wed Apr  7 15:53:12 2010
@@ -596,7 +596,11 @@
 
   bit_value_t BitValueArray[BIT_WIDTH];
   // Starts by inheriting our parent filter chooser's filter bit values.
-  memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
+  bit_value_t *BitVals = Owner->FilterBitValues;
+  for (unsigned i = 0; i < BIT_WIDTH; ++i)
+    BitValueArray[i] = BitVals[i];
+  // FIXME: memcpy() is optmized out with self-hosting llvm-gcc (-O1 and -O2).
+  //memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
 
   unsigned bitIndex;
 
@@ -623,7 +627,7 @@
     assert(FilterChooserMap.size() == 1);
     return;
   }
-        
+
   // Otherwise, create sub choosers.
   for (mapIterator = FilteredInstructions.begin();
        mapIterator != FilteredInstructions.end();




From echristo at apple.com  Wed Apr  7 15:58:16 2010
From: echristo at apple.com (Eric Christopher)
Date: Wed, 07 Apr 2010 20:58:16 -0000
Subject: [llvm-commits] [llvm] r100691 -
	/llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp
Message-ID: <20100407205816.689242A6C12C@llvm.org>

Author: echristo
Date: Wed Apr  7 15:58:16 2010
New Revision: 100691

URL: http://llvm.org/viewvc/llvm-project?rev=100691&view=rev
Log:
Fix typo and correct comment somewhat.

Modified:
    llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp

Modified: llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp?rev=100691&r1=100690&r2=100691&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/ARMDecoderEmitter.cpp Wed Apr  7 15:58:16 2010
@@ -599,7 +599,7 @@
   bit_value_t *BitVals = Owner->FilterBitValues;
   for (unsigned i = 0; i < BIT_WIDTH; ++i)
     BitValueArray[i] = BitVals[i];
-  // FIXME: memcpy() is optmized out with self-hosting llvm-gcc (-O1 and -O2).
+  // FIXME: memcpy() is misoptimized with self-hosting llvm-gcc (-O1 and -O2).
   //memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
 
   unsigned bitIndex;




From benny.kra at googlemail.com  Wed Apr  7 16:19:41 2010
From: benny.kra at googlemail.com (Benjamin Kramer)
Date: Wed, 07 Apr 2010 21:19:41 -0000
Subject: [llvm-commits] [llvm] r100692 - in
 /llvm/trunk/test/MC/Disassembler: arm-tests.txt neon-tests.txt
 thumb-tests.txt
Message-ID: <20100407211941.425B22A6C12C@llvm.org>

Author: d0k
Date: Wed Apr  7 16:19:41 2010
New Revision: 100692

URL: http://llvm.org/viewvc/llvm-project?rev=100692&view=rev
Log:
unXFAIL, arm disassembler was reenabled.

Modified:
    llvm/trunk/test/MC/Disassembler/arm-tests.txt
    llvm/trunk/test/MC/Disassembler/neon-tests.txt
    llvm/trunk/test/MC/Disassembler/thumb-tests.txt

Modified: llvm/trunk/test/MC/Disassembler/arm-tests.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/arm-tests.txt?rev=100692&r1=100691&r2=100692&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/arm-tests.txt (original)
+++ llvm/trunk/test/MC/Disassembler/arm-tests.txt Wed Apr  7 16:19:41 2010
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
-# XFAIL: *
 
 # CHECK:	b	#0
 0xfe 0xff 0xff 0xea

Modified: llvm/trunk/test/MC/Disassembler/neon-tests.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/neon-tests.txt?rev=100692&r1=100691&r2=100692&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/neon-tests.txt (original)
+++ llvm/trunk/test/MC/Disassembler/neon-tests.txt Wed Apr  7 16:19:41 2010
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
-# XFAIL: *
 
 # CHECK:	vbif	q15, q7, q0
 0x50 0xe1 0x7e 0xf3

Modified: llvm/trunk/test/MC/Disassembler/thumb-tests.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/thumb-tests.txt?rev=100692&r1=100691&r2=100692&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/thumb-tests.txt (original)
+++ llvm/trunk/test/MC/Disassembler/thumb-tests.txt Wed Apr  7 16:19:41 2010
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
-# XFAIL: *
 
 # CHECK:	add	r5, sp, #68
 0x11 0xad




From johnny.chen at apple.com  Wed Apr  7 16:23:49 2010
From: johnny.chen at apple.com (Johnny Chen)
Date: Wed, 07 Apr 2010 21:23:49 -0000
Subject: [llvm-commits] [llvm] r100693 -
 /llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
Message-ID: <20100407212349.22D212A6C12C@llvm.org>

Author: johnny
Date: Wed Apr  7 16:23:48 2010
New Revision: 100693

URL: http://llvm.org/viewvc/llvm-project?rev=100693&view=rev
Log:
Fixed 3 warnings pointed out by clang.

Modified:
    llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp

Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp?rev=100693&r1=100692&r2=100693&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp Wed Apr  7 16:23:48 2010
@@ -1986,7 +1986,7 @@
 // D = Inst{22}, Vd = Inst{15-12}
 static unsigned decodeNEONRd(uint32_t insn) {
   return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
 }
 
 // Extract/Decode NEON N/Vn:
@@ -1997,7 +1997,7 @@
 // N = Inst{7}, Vn = Inst{19-16}
 static unsigned decodeNEONRn(uint32_t insn) {
   return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
 }
 
 // Extract/Decode NEON M/Vm:
@@ -2008,7 +2008,7 @@
 // M = Inst{5}, Vm = Inst{3-0}
 static unsigned decodeNEONRm(uint32_t insn) {
   return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
 }
 
 namespace {




From scallanan at apple.com  Wed Apr  7 16:42:19 2010
From: scallanan at apple.com (Sean Callanan)
Date: Wed, 07 Apr 2010 21:42:19 -0000
Subject: [llvm-commits] [llvm] r100694 - in /llvm/trunk:
 lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
 lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
 utils/TableGen/X86RecognizableInstr.cpp
Message-ID: <20100407214219.53F852A6C12C@llvm.org>

Author: spyffe
Date: Wed Apr  7 16:42:19 2010
New Revision: 100694

URL: http://llvm.org/viewvc/llvm-project?rev=100694&view=rev
Log:
Fixed a bug where the disassembler would allow an immediate
argument that had to be between 0 and 7 to have any value,
firing an assert later in the AsmPrinter.  Now, the
disassembler rejects instructions with out-of-range values
for that immediate.

Modified:
    llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
    llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
    llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp

Modified: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c?rev=100694&r1=100693&r2=100694&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (original)
+++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c Wed Apr  7 16:42:19 2010
@@ -1277,6 +1277,9 @@
     case ENCODING_IB:
       if (readImmediate(insn, 1))
         return -1;
+      if (insn->spec->operands[index].type == TYPE_IMM3 &&
+          insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+        return -1;
       break;
     case ENCODING_IW:
       if (readImmediate(insn, 2))

Modified: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h?rev=100694&r1=100693&r2=100694&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h (original)
+++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h Wed Apr  7 16:42:19 2010
@@ -236,6 +236,7 @@
   ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
   ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \

Modified: llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp?rev=100694&r1=100693&r2=100694&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp (original)
+++ llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp Wed Apr  7 16:42:19 2010
@@ -820,7 +820,7 @@
   TYPE("i128mem",             TYPE_M128)
   TYPE("i64i32imm_pcrel",     TYPE_REL64)
   TYPE("i32imm_pcrel",        TYPE_REL32)
-  TYPE("SSECC",               TYPE_IMM8)
+  TYPE("SSECC",               TYPE_IMM3)
   TYPE("brtarget",            TYPE_RELv)
   TYPE("brtarget8",           TYPE_REL8)
   TYPE("f80mem",              TYPE_M80FP)




From johnny.chen at apple.com  Wed Apr  7 16:51:31 2010
From: johnny.chen at apple.com (Johnny Chen)
Date: Wed, 7 Apr 2010 14:51:31 -0700
Subject: [llvm-commits] [llvm] r100692 - in
	/llvm/trunk/test/MC/Disassembler: arm-tests.txt
	neon-tests.txt thumb-tests.txt
Message-ID: <96E21E59-1959-4747-B395-4DB94EF99D80@apple.com>

Thanks for the fixes!



From johnny.chen at apple.com  Wed Apr  7 16:52:48 2010
From: johnny.chen at apple.com (Johnny Chen)
Date: Wed, 07 Apr 2010 21:52:48 -0000
Subject: [llvm-commits] [llvm] r100695 -
 /llvm/trunk/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
Message-ID: <20100407215248.D8F772A6C12C@llvm.org>

Author: johnny
Date: Wed Apr  7 16:52:48 2010
New Revision: 100695

URL: http://llvm.org/viewvc/llvm-project?rev=100695&view=rev
Log:
Fixed warnings pointed out by clang.
Next to work on is ARMDisassemblerCore.cpp.

Modified:
    llvm/trunk/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h

Modified: llvm/trunk/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h?rev=100695&r1=100694&r2=100695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h (original)
+++ llvm/trunk/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h Wed Apr  7 16:52:48 2010
@@ -524,6 +524,7 @@
     unsigned short NumOps, unsigned &NumOpsAdded) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
          (OpInfo[1].RegClass == 0 &&
@@ -621,6 +622,7 @@
          && "Invalid opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -648,6 +650,7 @@
   assert(Opcode == ARM::tADDrPCi && "Invalid opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
          (OpInfo[1].RegClass == 0 &&
@@ -672,6 +675,7 @@
   assert(Opcode == ARM::tADDrSPi && "Invalid opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -891,6 +895,8 @@
     return true;
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   assert(NumOps == 3 && OpInfo[0].RegClass == 0 &&
          OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
          && "Exactly 3 operands expected");
@@ -915,6 +921,8 @@
     unsigned short NumOps, unsigned &NumOpsAdded) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected");
 
   unsigned Imm11 = getT1Imm11(insn);
@@ -1147,6 +1155,8 @@
     unsigned short NumOps, unsigned &NumOpsAdded) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1201,6 +1211,7 @@
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 4
          && OpInfo[0].RegClass == ARM::GPRRegClassID
@@ -1768,6 +1779,7 @@
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
@@ -2174,8 +2186,10 @@
   unsigned bits15_11 = slice(HalfWord, 15, 11);
 
   // A6.1 Thumb instruction set encoding
-  assert((bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) &&
-         "Bits [15:11] of first halfword of a Thumb2 instruction out of range");
+  if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
+    assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
+    return false;
+  }
 
   // A6.3 32-bit Thumb instruction encoding
   




From johnny.chen at apple.com  Wed Apr  7 17:03:28 2010
From: johnny.chen at apple.com (Johnny Chen)
Date: Wed, 07 Apr 2010 22:03:28 -0000
Subject: [llvm-commits] [llvm] r100696 -
 /llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
Message-ID: <20100407220328.1FBAA2A6C12C@llvm.org>

Author: johnny
Date: Wed Apr  7 17:03:27 2010
New Revision: 100696

URL: http://llvm.org/viewvc/llvm-project?rev=100696&view=rev
Log:
Fixed warnings pointed out by clang.

Modified:
    llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp

Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp?rev=100696&r1=100695&r2=100696&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp Wed Apr  7 17:03:27 2010
@@ -694,6 +694,7 @@
     return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded);
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   // MRS and MRSsys take one GPR reg Rd.
   if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
@@ -794,6 +795,8 @@
     unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1127,11 +1130,14 @@
   unsigned short NumDefs = TID.getNumDefs();
   bool isPrePost = isPrePostLdSt(TID.TSFlags);
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
@@ -1235,14 +1241,16 @@
     unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   bool isPrePost = isPrePostLdSt(TID.TSFlags);
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
@@ -1391,6 +1399,8 @@
     unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1681,6 +1691,7 @@
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
   bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
   bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
@@ -2766,6 +2777,7 @@
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
@@ -2828,10 +2840,10 @@
     unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
-  assert(NumDefs == 1 && NumOps >= 3 &&
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          OpInfo[2].RegClass == 0 &&
@@ -2862,10 +2874,10 @@
     unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
-  assert(NumDefs == 1 && NumOps >= 3 &&
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&




From johnny.chen at apple.com  Wed Apr  7 17:21:03 2010
From: johnny.chen at apple.com (Johnny Chen)
Date: Wed, 07 Apr 2010 22:21:03 -0000
Subject: [llvm-commits] [llvm] r100697 -
 /llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
Message-ID: <20100407222103.F22662A6C12C@llvm.org>

Author: johnny
Date: Wed Apr  7 17:21:03 2010
New Revision: 100697

URL: http://llvm.org/viewvc/llvm-project?rev=100697&view=rev
Log:
Missed this one line for the previous checkin to fix build warnings.

Modified:
    llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp

Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp?rev=100697&r1=100696&r2=100697&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp Wed Apr  7 17:21:03 2010
@@ -1127,7 +1127,6 @@
     unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   bool isPrePost = isPrePostLdSt(TID.TSFlags);
   const TargetOperandInfo *OpInfo = TID.OpInfo;
   if (!OpInfo) return false;




From gohman at apple.com  Wed Apr  7 17:27:08 2010
From: gohman at apple.com (Dan Gohman)
Date: Wed, 07 Apr 2010 22:27:08 -0000
Subject: [llvm-commits] [llvm] r100699 - in /llvm/trunk:
 include/llvm/Analysis/IVUsers.h
 include/llvm/Analysis/ScalarEvolutionExpander.h
 include/llvm/Analysis/ScalarEvolutionNormalization.h
 lib/Analysis/IVUsers.cpp lib/Analysis/ScalarEvolutionExpander.cpp
 lib/Analysis/ScalarEvolutionNormalization.cpp
 lib/Transforms/Scalar/IndVarSimplify.cpp
 lib/Transforms/Scalar/LoopStrengthReduce.cpp
 test/CodeGen/X86/multiple-loop-post-inc.ll
 test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
Message-ID: <20100407222708.7F0AE2A6C12C@llvm.org>

Author: djg
Date: Wed Apr  7 17:27:08 2010
New Revision: 100699

URL: http://llvm.org/viewvc/llvm-project?rev=100699&view=rev
Log:
Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the
ability to track multiple post-increment loops on the same expression.

This refines the concept of "normalizing" SCEV expressions used for
to post-increment uses, and introduces a dedicated utility routine for
normalizing and denormalizing expressions.

This fixes the expansion of expressions which are post-increment users
of more than one loop at a time. More broadly, this takes LSR another
step closer to being able to reason about more than one loop at a time.

Added:
    llvm/trunk/include/llvm/Analysis/ScalarEvolutionNormalization.h
    llvm/trunk/lib/Analysis/ScalarEvolutionNormalization.cpp
    llvm/trunk/test/CodeGen/X86/multiple-loop-post-inc.ll
Modified:
    llvm/trunk/include/llvm/Analysis/IVUsers.h
    llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h
    llvm/trunk/lib/Analysis/IVUsers.cpp
    llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
    llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/trunk/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll

Modified: llvm/trunk/include/llvm/Analysis/IVUsers.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/IVUsers.h?rev=100699&r1=100698&r2=100699&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/IVUsers.h (original)
+++ llvm/trunk/include/llvm/Analysis/IVUsers.h Wed Apr  7 17:27:08 2010
@@ -16,6 +16,7 @@
 #define LLVM_ANALYSIS_IVUSERS_H
 
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
@@ -26,17 +27,18 @@
 class IVUsers;
 class ScalarEvolution;
 class SCEV;
+class IVUsers;
 
 /// IVStrideUse - Keep track of one use of a strided induction variable.
 /// The Expr member keeps track of the expression, User is the actual user
 /// instruction of the operand, and 'OperandValToReplace' is the operand of
 /// the User that is the use.
 class IVStrideUse : public CallbackVH, public ilist_node {
+  friend class IVUsers;
 public:
-  IVStrideUse(IVUsers *P, const SCEV *S, const SCEV *Off,
+  IVStrideUse(IVUsers *P, const SCEV *E,
               Instruction* U, Value *O)
-    : CallbackVH(U), Parent(P), Stride(S), Offset(Off),
-      OperandValToReplace(O), IsUseOfPostIncrementedValue(false) {
+    : CallbackVH(U), Parent(P), Expr(E), OperandValToReplace(O) {
   }
 
   /// getUser - Return the user instruction for this use.
@@ -53,24 +55,16 @@
   /// this IVStrideUse.
   IVUsers *getParent() const { return Parent; }
 
-  /// getStride - Return the expression for the stride for the use.
-  const SCEV *getStride() const { return Stride; }
+  /// getExpr - Return the expression for the use.
+  const SCEV *getExpr() const { return Expr; }
 
-  /// setStride - Assign a new stride to this use.
-  void setStride(const SCEV *Val) {
-    Stride = Val;
-  }
-
-  /// getOffset - Return the offset to add to a theoretical induction
-  /// variable that starts at zero and counts up by the stride to compute
-  /// the value for the use. This always has the same type as the stride.
-  const SCEV *getOffset() const { return Offset; }
-
-  /// setOffset - Assign a new offset to this use.
-  void setOffset(const SCEV *Val) {
-    Offset = Val;
+  /// setExpr - Assign a new expression to this use.
+  void setExpr(const SCEV *Val) {
+    Expr = Val;
   }
 
+  const SCEV *getStride(const Loop *L) const;
+
   /// getOperandValToReplace - Return the Value of the operand in the user
   /// instruction that this IVStrideUse is representing.
   Value *getOperandValToReplace() const {
@@ -83,37 +77,30 @@
     OperandValToReplace = Op;
   }
 
-  /// isUseOfPostIncrementedValue - True if this should use the
-  /// post-incremented version of this IV, not the preincremented version.
-  /// This can only be set in special cases, such as the terminating setcc
-  /// instruction for a loop or uses dominated by the loop.
-  bool isUseOfPostIncrementedValue() const {
-    return IsUseOfPostIncrementedValue;
+  /// getPostIncLoops - Return the set of loops for which the expression has
+  /// been adjusted to use post-inc mode.
+  const PostIncLoopSet &getPostIncLoops() const {
+    return PostIncLoops;
   }
 
-  /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether
-  /// this is a post-increment use.
-  void setIsUseOfPostIncrementedValue(bool Val) {
-    IsUseOfPostIncrementedValue = Val;
-  }
+  /// transformToPostInc - Transform the expression to post-inc form for the
+  /// given loop.
+  void transformToPostInc(const Loop *L);
 
 private:
   /// Parent - a pointer to the IVUsers that owns this IVStrideUse.
   IVUsers *Parent;
 
-  /// Stride - The stride for this use.
-  const SCEV *Stride;
-
-  /// Offset - The offset to add to the base induction expression.
-  const SCEV *Offset;
+  /// Expr - The expression for this use.
+  const SCEV *Expr;
 
   /// OperandValToReplace - The Value of the operand in the user instruction
   /// that this IVStrideUse is representing.
   WeakVH OperandValToReplace;
 
-  /// IsUseOfPostIncrementedValue - True if this should use the
-  /// post-incremented version of this IV, not the preincremented version.
-  bool IsUseOfPostIncrementedValue;
+  /// PostIncLoops - The set of loops for which Expr has been adjusted to
+  /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
+  PostIncLoopSet PostIncLoops;
 
   /// Deleted - Implementation of CallbackVH virtual function to
   /// receive notification when the User is deleted.
@@ -174,18 +161,13 @@
   /// return true.  Otherwise, return false.
   bool AddUsersIfInteresting(Instruction *I);
 
-  IVStrideUse &AddUser(const SCEV *Stride, const SCEV *Offset,
+  IVStrideUse &AddUser(const SCEV *Expr,
                        Instruction *User, Value *Operand);
 
   /// getReplacementExpr - Return a SCEV expression which computes the
   /// value of the OperandValToReplace of the given IVStrideUse.
   const SCEV *getReplacementExpr(const IVStrideUse &U) const;
 
-  /// getCanonicalExpr - Return a SCEV expression which computes the
-  /// value of the SCEV of the given IVStrideUse, ignoring the 
-  /// isUseOfPostIncrementedValue flag.
-  const SCEV *getCanonicalExpr(const IVStrideUse &U) const;
-
   typedef ilist::iterator iterator;
   typedef ilist::const_iterator const_iterator;
   iterator begin() { return IVUses.begin(); }

Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h?rev=100699&r1=100698&r2=100699&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h (original)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h Wed Apr  7 17:27:08 2010
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
 
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/TargetFolder.h"
 #include 
@@ -32,12 +33,12 @@
       InsertedExpressions;
     std::set InsertedValues;
 
-    /// PostIncLoop - When non-null, expanded addrecs referring to the given
-    /// loop expanded in post-inc mode. For example, expanding {1,+,1} in
-    /// post-inc mode returns the add instruction that adds one to the phi
-    /// for {0,+,1}, as opposed to a new phi starting at 1. This is only
-    /// supported in non-canonical mode.
-    const Loop *PostIncLoop;
+    /// PostIncLoops - Addrecs referring to any of the given loops are expanded
+    /// in post-inc mode. For example, expanding {1,+,1} in post-inc mode
+    /// returns the add instruction that adds one to the phi for {0,+,1},
+    /// as opposed to a new phi starting at 1. This is only supported in
+    /// non-canonical mode.
+    PostIncLoopSet PostIncLoops;
 
     /// IVIncInsertPos - When this is non-null, addrecs expanded in the
     /// loop it indicates should be inserted with increments at
@@ -62,7 +63,7 @@
   public:
     /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
     explicit SCEVExpander(ScalarEvolution &se)
-      : SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true),
+      : SE(se), IVIncInsertLoop(0), CanonicalMode(true),
         Builder(se.getContext(), TargetFolder(se.TD)) {}
 
     /// clear - Erase the contents of the InsertedExpressions map so that users
@@ -89,14 +90,18 @@
       IVIncInsertPos = Pos;
     }
 
-    /// setPostInc - If L is non-null, enable post-inc expansion for addrecs
-    /// referring to the given loop. If L is null, disable post-inc expansion
-    /// completely. Post-inc expansion is only supported in non-canonical
+    /// setPostInc - Enable post-inc expansion for addrecs referring to the
+    /// given loops. Post-inc expansion is only supported in non-canonical
     /// mode.
-    void setPostInc(const Loop *L) {
+    void setPostInc(const PostIncLoopSet &L) {
       assert(!CanonicalMode &&
              "Post-inc expansion is not supported in CanonicalMode");
-      PostIncLoop = L;
+      PostIncLoops = L;
+    }
+
+    /// clearPostInc - Disable all post-inc expansion.
+    void clearPostInc() {
+      PostIncLoops.clear();
     }
 
     /// disableCanonicalMode - Disable the behavior of expanding expressions in

Added: llvm/trunk/include/llvm/Analysis/ScalarEvolutionNormalization.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolutionNormalization.h?rev=100699&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Analysis/ScalarEvolutionNormalization.h (added)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionNormalization.h Wed Apr  7 17:27:08 2010
@@ -0,0 +1,78 @@
+//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities for working with "normalized" ScalarEvolution
+// expressions.
+//
+// The following example illustrates post-increment uses and how normalized
+// expressions help.
+//
+//   for (i=0; i!=n; ++i) {
+//     ...
+//   }
+//   use(i);
+//
+// While the expression for most uses of i inside the loop is {0,+,1}<%L>, the
+// expression for the use of i outside the loop is {1,+,1}<%L>, since i is
+// incremented at the end of the loop body. This is inconveient, since it
+// suggests that we need two different induction variables, one that starts
+// at 0 and one that starts at 1. We'd prefer to be able to think of these as
+// the same induction variable, with uses inside the loop using the
+// "pre-incremented" value, and uses after the loop using the
+// "post-incremented" value.
+//
+// Expressions for post-incremented uses are represented as an expression
+// paired with a set of loops for which the expression is in "post-increment"
+// mode (there may be multiple loops).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class Instruction;
+class DominatorTree;
+class Loop;
+class ScalarEvolution;
+class SCEV;
+class Value;
+
+/// TransformKind - Different types of transformations that
+/// TransformForPostIncUse can do.
+enum TransformKind {
+  /// Normalize - Normalize according to the given loops.
+  Normalize,
+  /// NormalizeAutodetect - Detect post-inc opportunities on new expressions,
+  /// update the given loop set, and normalize.
+  NormalizeAutodetect,
+  /// Denormalize - Perform the inverse transform on the expression with the
+  /// given loop set.
+  Denormalize
+};
+
+/// PostIncLoopSet - A set of loops.
+typedef SmallPtrSet PostIncLoopSet;
+
+/// TransformForPostIncUse - Transform the given expression according to the
+/// given transformation kind.
+const SCEV *TransformForPostIncUse(TransformKind Kind,
+                                   const SCEV *S,
+                                   Instruction *User,
+                                   Value *OperandValToReplace,
+                                   PostIncLoopSet &Loops,
+                                   ScalarEvolution &SE,
+                                   DominatorTree &DT);
+
+}
+
+#endif

Modified: llvm/trunk/lib/Analysis/IVUsers.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=100699&r1=100698&r2=100699&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/IVUsers.cpp (original)
+++ llvm/trunk/lib/Analysis/IVUsers.cpp Wed Apr  7 17:27:08 2010
@@ -62,120 +62,34 @@
   Ops.push_back(S);
 }
 
-/// getSCEVStartAndStride - Compute the start and stride of this expression,
-/// returning false if the expression is not a start/stride pair, or true if it
-/// is.  The stride must be a loop invariant expression, but the start may be
-/// a mix of loop invariant and loop variant expressions.  The start cannot,
-/// however, contain an AddRec from a different loop, unless that loop is an
-/// outer loop of the current loop.
-static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
-                                  const SCEV *&Start, const SCEV *&Stride,
-                                  ScalarEvolution *SE, DominatorTree *DT) {
-  const SCEV *TheAddRec = Start;   // Initialize to zero.
-
-  // If the outer level is an AddExpr, the operands are all start values except
-  // for a nested AddRecExpr.
-  if (const SCEVAddExpr *AE = dyn_cast(SH)) {
-    for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
-      if (const SCEVAddRecExpr *AddRec =
-             dyn_cast(AE->getOperand(i)))
-        TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
-      else
-        Start = SE->getAddExpr(Start, AE->getOperand(i));
-  } else if (isa(SH)) {
-    TheAddRec = SH;
-  } else {
-    return false;  // not analyzable.
-  }
-
-  // Break down TheAddRec into its component parts.
-  SmallVector Subexprs;
-  CollectSubexprs(TheAddRec, Subexprs, *SE);
-
-  // Look for an addrec on the current loop among the parts.
-  const SCEV *AddRecStride = 0;
-  for (SmallVectorImpl::iterator I = Subexprs.begin(),
-       E = Subexprs.end(); I != E; ++I) {
-    const SCEV *S = *I;
-    if (const SCEVAddRecExpr *AR = dyn_cast(S))
-      if (AR->getLoop() == L) {
-        *I = AR->getStart();
-        AddRecStride = AR->getStepRecurrence(*SE);
-        break;
-      }
-  }
-  if (!AddRecStride)
-    return false;
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) {
+  // Anything loop-invariant is interesting.
+  if (!isa(S) && S->isLoopInvariant(L))
+    return true;
 
-  // Add up everything else into a start value (which may not be
-  // loop-invariant).
-  const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
-
-  // Use getSCEVAtScope to attempt to simplify other loops out of
-  // the picture.
-  AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-
-  Start = SE->getAddExpr(Start, AddRecStart);
-
-  // If stride is an instruction, make sure it properly dominates the header.
-  // Otherwise we could end up with a use before def situation.
-  if (!isa(AddRecStride)) {
-    BasicBlock *Header = L->getHeader();
-    if (!AddRecStride->properlyDominates(Header, DT))
-      return false;
-
-    DEBUG(dbgs() << "[";
-          WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
-          dbgs() << "] Variable stride: " << *AddRecStride << "\n");
+  // An addrec is interesting if it's affine or if it has an interesting start.
+  if (const SCEVAddRecExpr *AR = dyn_cast(S)) {
+    // Keep things simple. Don't touch loop-variant strides.
+    if (AR->getLoop() == L && (AR->isAffine() || !L->contains(I)))
+        return true;
+    // Otherwise recurse to see if the start value is interesting.
+    return isInteresting(AR->getStart(), I, L);
   }
 
-  Stride = AddRecStride;
-  return true;
-}
-
-/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
-/// and now we need to decide whether the user should use the preinc or post-inc
-/// value.  If this user should use the post-inc version of the IV, return true.
-///
-/// Choosing wrong here can break dominance properties (if we choose to use the
-/// post-inc value when we cannot) or it can end up adding extra live-ranges to
-/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
-/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
-                                       const Loop *L, DominatorTree *DT) {
-  // If the user is in the loop, use the preinc value.
-  if (L->contains(User)) return false;
-
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  if (!LatchBlock)
+  // An add is interesting if any of its operands is.
+  if (const SCEVAddExpr *Add = dyn_cast(S)) {
+    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+         OI != OE; ++OI)
+      if (isInteresting(*OI, I, L))
+        return true;
     return false;
+  }
 
-  // Ok, the user is outside of the loop.  If it is dominated by the latch
-  // block, use the post-inc value.
-  if (DT->dominates(LatchBlock, User->getParent()))
-    return true;
-
-  // There is one case we have to be careful of: PHI nodes.  These little guys
-  // can live in blocks that are not dominated by the latch block, but (since
-  // their uses occur in the predecessor block, not the block the PHI lives in)
-  // should still use the post-inc value.  Check for this case now.
-  PHINode *PN = dyn_cast(User);
-  if (!PN) return false;  // not a phi, not dominated by latch block.
-
-  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
-  // a block that is not dominated by the latch block, give up and use the
-  // preincremented value.
-  unsigned NumUses = 0;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == IV) {
-      ++NumUses;
-      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
-        return false;
-    }
-
-  // Okay, all uses of IV by PN are in predecessor blocks that really are
-  // dominated by the latch block.  Use the post-incremented value.
-  return true;
+  // Nothing else is interesting here.
+  return false;
 }
 
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
@@ -196,16 +110,9 @@
   const SCEV *ISE = SE->getSCEV(I);
   if (isa(ISE)) return false;
 
-  // Get the start and stride for this expression.
-  Loop *UseLoop = LI->getLoopFor(I->getParent());
-  const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType());
-  const SCEV *Stride = Start;
-
-  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
-    return false;  // Non-reducible symbolic expression, bail out.
-
-  // Keep things simple. Don't touch loop-variant strides.
-  if (!Stride->isLoopInvariant(L) && L->contains(I))
+  // If we've come to an uninteresting expression, stop the traversal and
+  // call this a user.
+  if (!isInteresting(ISE, I, L))
     return false;
 
   SmallPtrSet UniqueUsers;
@@ -241,27 +148,24 @@
     }
 
     if (AddUserToIVUsers) {
-      // Okay, we found a user that we cannot reduce.  Analyze the instruction
-      // and decide what to do with it.  If we are a use inside of the loop, use
-      // the value before incrementation, otherwise use it after incrementation.
-      if (IVUseShouldUsePostIncValue(User, I, L, DT)) {
-        // The value used will be incremented by the stride more than we are
-        // expecting, so subtract this off.
-        const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
-        IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
-        IVUses.back().setIsUseOfPostIncrementedValue(true);
-        DEBUG(dbgs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
-      } else {
-        IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
-      }
+      // Okay, we found a user that we cannot reduce.
+      IVUses.push_back(new IVStrideUse(this, ISE, User, I));
+      IVStrideUse &NewUse = IVUses.back();
+      // Transform the expression into a normalized form.
+      NewUse.Expr =
+        TransformForPostIncUse(NormalizeAutodetect, NewUse.Expr,
+                               User, I,
+                               NewUse.PostIncLoops,
+                               *SE, *DT);
+      DEBUG(dbgs() << "   NORMALIZED TO: " << *NewUse.Expr << '\n');
     }
   }
   return true;
 }
 
-IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
+IVStrideUse &IVUsers::AddUser(const SCEV *Expr,
                               Instruction *User, Value *Operand) {
-  IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
+  IVUses.push_back(new IVStrideUse(this, Expr, User, Operand));
   return IVUses.back();
 }
 
@@ -295,30 +199,10 @@
 /// getReplacementExpr - Return a SCEV expression which computes the
 /// value of the OperandValToReplace of the given IVStrideUse.
 const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  // For uses of post-incremented values, add an extra stride to compute
-  // the actual replacement value.
-  if (U.isUseOfPostIncrementedValue())
-    RetVal = SE->getAddExpr(RetVal, U.getStride());
-  return RetVal;
-}
-
-/// getCanonicalExpr - Return a SCEV expression which computes the
-/// value of the SCEV of the given IVStrideUse, ignoring the 
-/// isUseOfPostIncrementedValue flag.
-const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  return RetVal;
+  PostIncLoopSet &Loops = const_cast(U.PostIncLoops);
+  return TransformForPostIncUse(Denormalize, U.getExpr(),
+                                U.getUser(), U.getOperandValToReplace(),
+                                Loops, *SE, *DT);
 }
 
 void IVUsers::print(raw_ostream &OS, const Module *M) const {
@@ -339,8 +223,13 @@
     WriteAsOperand(OS, UI->getOperandValToReplace(), false);
     OS << " = "
        << *getReplacementExpr(*UI);
-    if (UI->isUseOfPostIncrementedValue())
-      OS << " (post-inc)";
+    for (PostIncLoopSet::const_iterator
+         I = UI->PostIncLoops.begin(),
+         E = UI->PostIncLoops.end(); I != E; ++I) {
+      OS << " (post-inc with loop ";
+      WriteAsOperand(OS, (*I)->getHeader(), false);
+      OS << ")";
+    }
     OS << " in  ";
     UI->getUser()->print(OS, &Annotator);
     OS << '\n';
@@ -356,6 +245,39 @@
   IVUses.clear();
 }
 
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+  if (const SCEVAddRecExpr *AR = dyn_cast(S)) {
+    if (AR->getLoop() == L)
+      return AR;
+    return findAddRecForLoop(AR->getStart(), L);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+        return AR;
+    return 0;
+  }
+
+  return 0;
+}
+
+const SCEV *IVStrideUse::getStride(const Loop *L) const {
+  if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(), L))
+    return AR->getStepRecurrence(*Parent->SE);
+  return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+  PostIncLoopSet Loops;
+  Loops.insert(L);
+  Expr = TransformForPostIncUse(Normalize, Expr,
+                                getUser(), getOperandValToReplace(),
+                                Loops, *Parent->SE, *Parent->DT);
+  PostIncLoops.insert(L);
+}
+
 void IVStrideUse::deleted() {
   // Remove this user from the list.
   Parent->IVUses.erase(this);

Modified: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp?rev=100699&r1=100698&r2=100699&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp Wed Apr  7 17:27:08 2010
@@ -966,9 +966,12 @@
   // Determine a normalized form of this expression, which is the expression
   // before any post-inc adjustment is made.
   const SCEVAddRecExpr *Normalized = S;
-  if (L == PostIncLoop) {
-    const SCEV *Step = S->getStepRecurrence(SE);
-    Normalized = cast(SE.getMinusSCEV(S, Step));
+  if (PostIncLoops.count(L)) {
+    PostIncLoopSet Loops;
+    Loops.insert(L);
+    Normalized =
+      cast(TransformForPostIncUse(Normalize, S, 0, 0,
+                                                  Loops, SE, *SE.DT));
   }
 
   // Strip off any non-loop-dominating component from the addrec start.
@@ -1002,7 +1005,7 @@
 
   // Accommodate post-inc mode, if necessary.
   Value *Result;
-  if (L != PostIncLoop)
+  if (!PostIncLoops.count(L))
     Result = PN;
   else {
     // In PostInc mode, use the post-incremented value.
@@ -1274,7 +1277,7 @@
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
+      if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L))
         InsertPt = L->getHeader()->getFirstNonPHI();
       while (isInsertedInstruction(InsertPt) || isa(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@@ -1296,7 +1299,7 @@
   Value *V = visit(S);
 
   // Remember the expanded value for this SCEV at this location.
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
     InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
   restoreInsertPoint(SaveInsertBB, SaveInsertPt);
@@ -1304,7 +1307,7 @@
 }
 
 void SCEVExpander::rememberInstruction(Value *I) {
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
     InsertedValues.insert(I);
 
   // If we just claimed an existing instruction and that instruction had

Added: llvm/trunk/lib/Analysis/ScalarEvolutionNormalization.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutionNormalization.cpp?rev=100699&view=auto
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolutionNormalization.cpp (added)
+++ llvm/trunk/lib/Analysis/ScalarEvolutionNormalization.cpp Wed Apr  7 17:27:08 2010
@@ -0,0 +1,150 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User)) return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast(User);
+  if (!PN) return false;  // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
+  // a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  unsigned NumUses = 0;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == IV) {
+      ++NumUses;
+      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+        return false;
+    }
+
+  // Okay, all uses of IV by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  if (isa(S) || isa(S))
+    return S;
+  if (const SCEVCastExpr *X = dyn_cast(S)) {
+    const SCEV *O = X->getOperand();
+    const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                           Loops, SE, DT);
+    if (O != N)
+      switch (S->getSCEVType()) {
+      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+      case scTruncate: return SE.getTruncateExpr(N, S->getType());
+      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVNAryExpr *X = dyn_cast(S)) {
+    SmallVector Operands;
+    bool Changed = false;
+    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                             Loops, SE, DT);
+      Changed |= N != O;
+      Operands.push_back(N);
+    }
+    if (const SCEVAddRecExpr *AR = dyn_cast(S)) {
+      // An addrec. This is the interesting part.
+      const Loop *L = AR->getLoop();
+      const SCEV *Result = SE.getAddRecExpr(Operands, L);
+      switch (Kind) {
+      default: llvm_unreachable("Unexpected transform name!");
+      case NormalizeAutodetect:
+        if (Instruction *OI = dyn_cast(OperandValToReplace))
+          if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
+            Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+            Loops.insert(L);
+          }
+        break;
+      case Normalize:
+        if (Loops.count(L))
+          Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+        break;
+      case Denormalize:
+        if (Loops.count(L)) {
+          const SCEV *TransformedStep =
+            TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                   User, OperandValToReplace, Loops, SE, DT);
+          Result = SE.getAddExpr(Result, TransformedStep);
+        }
+        break;
+      }
+      return Result;
+    }
+    if (Changed)
+      switch (S->getSCEVType()) {
+      case scAddExpr: return SE.getAddExpr(Operands);
+      case scMulExpr: return SE.getMulExpr(Operands);
+      case scSMaxExpr: return SE.getSMaxExpr(Operands);
+      case scUMaxExpr: return SE.getUMaxExpr(Operands);
+      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVUDivExpr *X = dyn_cast(S)) {
+    const SCEV *LO = X->getLHS();
+    const SCEV *RO = X->getRHS();
+    const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    if (LO != LN || RO != RN)
+      return SE.getUDivExpr(LN, RN);
+    return S;
+  }
+  llvm_unreachable("Unexpected SCEV kind!");
+  return 0;
+}

Modified: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=100699&r1=100698&r2=100699&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp Wed Apr  7 17:27:08 2010
@@ -454,6 +454,46 @@
   return Changed;
 }
 
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L) {
+  // Loop-invariant values are safe.
+  if (S->isLoopInvariant(L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L)) return false;
+    return true;
+  }
+  
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast(S))
+    return isSafe(C->getOperand(), L);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast(S))
+    return isSafe(UD->getLHS(), L) &&
+           isSafe(UD->getRHS(), L);
+
+  // SCEVUnknown is always safe.
+  if (isa(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   SmallVector DeadInsts;
 
@@ -465,7 +505,6 @@
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
     Value *Op = UI->getOperandValToReplace();
     const Type *UseTy = Op->getType();
     Instruction *User = UI->getUser();
@@ -486,7 +525,7 @@
     // currently can only reduce affine polynomials.  For now just disable
     // indvar subst on anything more complex than an affine addrec, unless
     // it can be expanded to a trivial value.
-    if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+    if (!isSafe(AR, L))
       continue;
 
     // Determine the insertion point for this user. By default, insert

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=100699&r1=100698&r2=100699&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Wed Apr  7 17:27:08 2010
@@ -781,10 +781,10 @@
   /// will be replaced.
   Value *OperandValToReplace;
 
-  /// PostIncLoop - If this user is to use the post-incremented value of an
+  /// PostIncLoops - If this user is to use the post-incremented value of an
   /// induction variable, this variable is non-null and holds the loop
   /// associated with the induction variable.
-  const Loop *PostIncLoop;
+  PostIncLoopSet PostIncLoops;
 
   /// LUIdx - The index of the LSRUse describing the expression which
   /// this fixup needs, minus an offset (below).
@@ -795,6 +795,8 @@
   /// offsets, for example in an unrolled loop.
   int64_t Offset;
 
+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
   LSRFixup();
 
   void print(raw_ostream &OS) const;
@@ -804,9 +806,24 @@
 }
 
 LSRFixup::LSRFixup()
-  : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+  : UserInst(0), OperandValToReplace(0),
     LUIdx(~size_t(0)), Offset(0) {}
 
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+  // PHI nodes use their value in their incoming blocks.
+  if (const PHINode *PN = dyn_cast(UserInst)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == OperandValToReplace &&
+          L->contains(PN->getIncomingBlock(i)))
+        return false;
+    return true;
+  }
+
+  return !L->contains(UserInst);
+}
+
 void LSRFixup::print(raw_ostream &OS) const {
   OS << "UserInst=";
   // Store is common and interesting enough to be worth special-casing.
@@ -821,9 +838,10 @@
   OS << ", OperandValToReplace=";
   WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
 
-  if (PostIncLoop) {
+  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+       E = PostIncLoops.end(); I != E; ++I) {
     OS << ", PostIncLoop=";
-    WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
+    WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
   }
 
   if (LUIdx != ~size_t(0))
@@ -1545,8 +1563,9 @@
             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
           // Conservatively assume there may be reuse if the quotient of their
           // strides could be a legal scale.
-          const SCEV *A = CondUse->getStride();
-          const SCEV *B = UI->getStride();
+          const SCEV *A = CondUse->getStride(L);
+          const SCEV *B = UI->getStride(L);
+          if (!A || !B) continue;
           if (SE.getTypeSizeInBits(A->getType()) !=
               SE.getTypeSizeInBits(B->getType())) {
             if (SE.getTypeSizeInBits(A->getType()) >
@@ -1598,7 +1617,7 @@
         ExitingBlock->getInstList().insert(TermBr, Cond);
 
         // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
+        CondUse = &IU.AddUser(CondUse->getExpr(),
                               Cond, CondUse->getOperandValToReplace());
         TermBr->replaceUsesOfWith(OldCond, Cond);
       }
@@ -1607,9 +1626,7 @@
     // If we get to here, we know that we can transform the setcc instruction to
     // use the post-incremented version of the IV, allowing us to coalesce the
     // live ranges for the IV correctly.
-    CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
-                                       CondUse->getStride()));
-    CondUse->setIsUseOfPostIncrementedValue(true);
+    CondUse->transformToPostInc(L);
     Changed = true;
 
     PostIncs.insert(Cond);
@@ -1717,19 +1734,24 @@
   SmallSetVector Strides;
 
   // Collect interesting types and strides.
+  SmallVector Worklist;
   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
+    const SCEV *Expr = UI->getExpr();
 
     // Collect interesting types.
-    Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
+    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
 
-    // Add the stride for this loop.
-    Strides.insert(Stride);
-
-    // Add strides for other mentioned loops.
-    for (const SCEVAddRecExpr *AR = dyn_cast(UI->getOffset());
-         AR; AR = dyn_cast(AR->getStart()))
-      Strides.insert(AR->getStepRecurrence(SE));
+    // Add strides for mentioned loops.
+    Worklist.push_back(Expr);
+    do {
+      const SCEV *S = Worklist.pop_back_val();
+      if (const SCEVAddRecExpr *AR = dyn_cast(S)) {
+        Strides.insert(AR->getStepRecurrence(SE));
+        Worklist.push_back(AR->getStart());
+      } else if (const SCEVAddExpr *Add = dyn_cast(S)) {
+        Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+      }
+    } while (!Worklist.empty());
   }
 
   // Compute interesting factors from the set of interesting strides.
@@ -1776,8 +1798,7 @@
     LSRFixup &LF = getNewFixup();
     LF.UserInst = UI->getUser();
     LF.OperandValToReplace = UI->getOperandValToReplace();
-    if (UI->isUseOfPostIncrementedValue())
-      LF.PostIncLoop = L;
+    LF.PostIncLoops = UI->getPostIncLoops();
 
     LSRUse::KindType Kind = LSRUse::Basic;
     const Type *AccessTy = 0;
@@ -1786,7 +1807,7 @@
       AccessTy = getAccessType(LF.UserInst);
     }
 
-    const SCEV *S = IU.getCanonicalExpr(*UI);
+    const SCEV *S = UI->getExpr();
 
     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
     // (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -1824,7 +1845,7 @@
     LF.LUIdx = P.first;
     LF.Offset = P.second;
     LSRUse &LU = Uses[LF.LUIdx];
-    LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
 
     // If this is the first use of this LSRUse, give it a formula.
     if (LU.Formulae.empty()) {
@@ -1936,7 +1957,7 @@
         LF.LUIdx = P.first;
         LF.Offset = P.second;
         LSRUse &LU = Uses[LF.LUIdx];
-        LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
         InsertSupplementalFormula(U, LU, LF.LUIdx);
         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
         break;
@@ -2783,8 +2804,8 @@
                            SmallVectorImpl &DeadInsts) const {
   const LSRUse &LU = Uses[LF.LUIdx];
 
-  // Then, collect some instructions which we will remain dominated by when
-  // expanding the replacement. These must be dominated by any operands that
+  // Then, collect some instructions which must be dominated by the
+  // expanding replacement. These must be dominated by any operands that
   // will be required in the expansion.
   SmallVector Inputs;
   if (Instruction *I = dyn_cast(LF.OperandValToReplace))
@@ -2793,8 +2814,8 @@
     if (Instruction *I =
           dyn_cast(cast(LF.UserInst)->getOperand(1)))
       Inputs.push_back(I);
-  if (LF.PostIncLoop) {
-    if (!L->contains(LF.UserInst))
+  if (LF.PostIncLoops.count(L)) {
+    if (LF.isUseFullyOutsideLoop(L))
       Inputs.push_back(L->getLoopLatch()->getTerminator());
     else
       Inputs.push_back(IVIncInsertPos);
@@ -2831,7 +2852,7 @@
 
   // Inform the Rewriter if we have a post-increment use, so that it can
   // perform an advantageous expansion.
-  Rewriter.setPostInc(LF.PostIncLoop);
+  Rewriter.setPostInc(LF.PostIncLoops);
 
   // This is the type that the user actually needs.
   const Type *OpTy = LF.OperandValToReplace->getType();
@@ -2855,24 +2876,11 @@
     const SCEV *Reg = *I;
     assert(!Reg->isZero() && "Zero allocated in a base register!");
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    const SCEV *Start = Reg;
-    while (const SCEVAddRecExpr *AR = dyn_cast(Start)) {
-      if (AR->getLoop() == LF.PostIncLoop) {
-        Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
-        // If the user is inside the loop, insert the code after the increment
-        // so that it is dominated by its operand. If the original insert point
-        // was already dominated by the increment, keep it, because there may
-        // be loop-variant operands that need to be respected also.
-        if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
-          IP = IVIncInsertPos;
-          while (isa(IP)) ++IP;
-        }
-        break;
-      }
-      Start = AR->getStart();
-    }
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast(LF.PostIncLoops);
+    Reg = TransformForPostIncUse(Denormalize, Reg,
+                                 LF.UserInst, LF.OperandValToReplace,
+                                 Loops, SE, DT);
 
     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
   }
@@ -2889,11 +2897,11 @@
   if (F.AM.Scale != 0) {
     const SCEV *ScaledS = F.ScaledReg;
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    if (const SCEVAddRecExpr *AR = dyn_cast(ScaledS))
-      if (AR->getLoop() == LF.PostIncLoop)
-        ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast(LF.PostIncLoops);
+    ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+                                     LF.UserInst, LF.OperandValToReplace,
+                                     Loops, SE, DT);
 
     if (LU.Kind == LSRUse::ICmpZero) {
       // An interesting way of "folding" with an icmp is to use a negated
@@ -2954,7 +2962,7 @@
   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
 
   // We're done expanding now, so reset the rewriter.
-  Rewriter.setPostInc(0);
+  Rewriter.clearPostInc();
 
   // An ICmpZero Formula represents an ICmp which we're handling as a
   // comparison against zero. Now that we've expanded an expression for that

Added: llvm/trunk/test/CodeGen/X86/multiple-loop-post-inc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/multiple-loop-post-inc.ll?rev=100699&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/multiple-loop-post-inc.ll (added)
+++ llvm/trunk/test/CodeGen/X86/multiple-loop-post-inc.ll Wed Apr  7 17:27:08 2010
@@ -0,0 +1,277 @@
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
+; rdar://7236213
+
+; CodeGen shouldn't require any lea instructions inside the marked loop.
+; It should properly set up post-increment uses and do coalescing for
+; the induction variables.
+
+; CHECK: # Start
+; CHECK-NOT: lea
+; CHECK: # Stop
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind {
+entry:
+  %times4 = alloca float, align 4                 ;  [#uses=3]
+  %timesN = alloca float, align 4                 ;  [#uses=2]
+  %0 = load float* %Step, align 4                 ;  [#uses=8]
+  %1 = ptrtoint float* %I to i64                  ;  [#uses=1]
+  %2 = ptrtoint float* %O to i64                  ;  [#uses=1]
+  %tmp = xor i64 %2, %1                           ;  [#uses=1]
+  %tmp16 = and i64 %tmp, 15                       ;  [#uses=1]
+  %3 = icmp eq i64 %tmp16, 0                      ;  [#uses=1]
+  %4 = trunc i64 %IS to i32                       ;  [#uses=1]
+  %5 = xor i32 %4, 1                              ;  [#uses=1]
+  %6 = trunc i64 %OS to i32                       ;  [#uses=1]
+  %7 = xor i32 %6, 1                              ;  [#uses=1]
+  %8 = or i32 %7, %5                              ;  [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ;  [#uses=1]
+  br i1 %9, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %10 = load float* %Start, align 4               ;  [#uses=1]
+  br label %bb2
+
+bb1:                                              ; preds = %bb3
+  %11 = load float* %I_addr.0, align 4            ;  [#uses=1]
+  %12 = fmul float %11, %x.0                      ;  [#uses=1]
+  store float %12, float* %O_addr.0, align 4
+  %13 = fadd float %x.0, %0                       ;  [#uses=1]
+  %indvar.next53 = add i64 %14, 1                 ;  [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ;  [#uses=21]
+  %x.0 = phi float [ %13, %bb1 ], [ %10, %bb ]    ;  [#uses=6]
+  %N_addr.0 = sub i64 %N, %14                     ;  [#uses=4]
+  %O_addr.0 = getelementptr float* %O, i64 %14    ;  [#uses=4]
+  %I_addr.0 = getelementptr float* %I, i64 %14    ;  [#uses=3]
+  %15 = icmp slt i64 %N_addr.0, 1                 ;  [#uses=1]
+  br i1 %15, label %bb4, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %16 = ptrtoint float* %O_addr.0 to i64          ;  [#uses=1]
+  %17 = and i64 %16, 15                           ;  [#uses=1]
+  %18 = icmp eq i64 %17, 0                        ;  [#uses=1]
+  br i1 %18, label %bb4, label %bb1
+
+bb4:                                              ; preds = %bb3, %bb2
+  %19 = fmul float %0, 4.000000e+00               ;  [#uses=1]
+  store float %19, float* %times4, align 4
+  %20 = fmul float %0, 1.600000e+01               ;  [#uses=1]
+  store float %20, float* %timesN, align 4
+  %21 = fmul float %0, 0.000000e+00               ;  [#uses=1]
+  %22 = fadd float %21, %x.0                      ;  [#uses=1]
+  %23 = fadd float %x.0, %0                       ;  [#uses=1]
+  %24 = fmul float %0, 2.000000e+00               ;  [#uses=1]
+  %25 = fadd float %24, %x.0                      ;  [#uses=1]
+  %26 = fmul float %0, 3.000000e+00               ;  [#uses=1]
+  %27 = fadd float %26, %x.0                      ;  [#uses=1]
+  %28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1]
+  %29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1]
+  %30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1]
+  %31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5]
+  %asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3]
+  %32 = fadd <4 x float> %31, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %33 = fadd <4 x float> %32, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %34 = fadd <4 x float> %33, %asmtmp.i           ; <<4 x float>> [#uses=2]
+  %asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8]
+  %35 = icmp sgt i64 %N_addr.0, 15                ;  [#uses=2]
+  br i1 %3, label %bb6.preheader, label %bb8
+
+bb6.preheader:                                    ; preds = %bb4
+  br i1 %35, label %bb.nph43, label %bb7
+
+bb.nph43:                                         ; preds = %bb6.preheader
+  %tmp108 = add i64 %14, 16                       ;  [#uses=1]
+  %tmp111 = add i64 %14, 4                        ;  [#uses=1]
+  %tmp115 = add i64 %14, 8                        ;  [#uses=1]
+  %tmp119 = add i64 %14, 12                       ;  [#uses=1]
+  %tmp134 = add i64 %N, -16                       ;  [#uses=1]
+  %tmp135 = sub i64 %tmp134, %14                  ;  [#uses=1]
+  call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  br label %bb5
+
+bb5:                                              ; preds = %bb.nph43, %bb5
+  %indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ;  [#uses=3]
+  %vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2]
+  %tmp104 = shl i64 %indvar102, 4                 ;  [#uses=5]
+  %tmp105 = add i64 %14, %tmp104                  ;  [#uses=2]
+  %scevgep106 = getelementptr float* %I, i64 %tmp105 ;  [#uses=1]
+  %scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp109 = add i64 %tmp108, %tmp104              ;  [#uses=2]
+  %tmp112 = add i64 %tmp111, %tmp104              ;  [#uses=2]
+  %scevgep113 = getelementptr float* %I, i64 %tmp112 ;  [#uses=1]
+  %scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp116 = add i64 %tmp115, %tmp104              ;  [#uses=2]
+  %scevgep117 = getelementptr float* %I, i64 %tmp116 ;  [#uses=1]
+  %scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp120 = add i64 %tmp119, %tmp104              ;  [#uses=2]
+  %scevgep121 = getelementptr float* %I, i64 %tmp120 ;  [#uses=1]
+  %scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep123 = getelementptr float* %O, i64 %tmp105 ;  [#uses=1]
+  %scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep126 = getelementptr float* %O, i64 %tmp112 ;  [#uses=1]
+  %scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep128 = getelementptr float* %O, i64 %tmp116 ;  [#uses=1]
+  %scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep130 = getelementptr float* %O, i64 %tmp120 ;  [#uses=1]
+  %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp132 = mul i64 %indvar102, -16               ;  [#uses=1]
+  %tmp136 = add i64 %tmp135, %tmp132              ;  [#uses=2]
+  %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
+  %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
+  %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
+  %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
+  %40 = fmul <4 x float> %36, %vX0.039            ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %vX0.039, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %42 = fmul <4 x float> %37, %vX1.036            ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %38, %vX2.037            ; <<4 x float>> [#uses=1]
+  %44 = fmul <4 x float> %39, %vX3.041            ; <<4 x float>> [#uses=1]
+  store <4 x float> %40, <4 x float>* %scevgep123124, align 16
+  store <4 x float> %42, <4 x float>* %scevgep126127, align 16
+  store <4 x float> %43, <4 x float>* %scevgep128129, align 16
+  store <4 x float> %44, <4 x float>* %scevgep130131, align 16
+  %45 = fadd <4 x float> %vX3.041, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %46 = fadd <4 x float> %vX2.037, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %47 = fadd <4 x float> %vX1.036, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %48 = icmp sgt i64 %tmp136, 15                  ;  [#uses=1]
+  %indvar.next103 = add i64 %indvar102, 1         ;  [#uses=1]
+  br i1 %48, label %bb5, label %bb6.bb7_crit_edge
+
+bb6.bb7_crit_edge:                                ; preds = %bb5
+  call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  %scevgep110 = getelementptr float* %I, i64 %tmp109 ;  [#uses=1]
+  %scevgep125 = getelementptr float* %O, i64 %tmp109 ;  [#uses=1]
+  br label %bb7
+
+bb7:                                              ; preds = %bb6.bb7_crit_edge, %bb6.preheader
+  %I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ;  [#uses=1]
+  %O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ;  [#uses=1]
+  %vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1]
+  %N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ;  [#uses=1]
+  %asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb11
+
+bb8:                                              ; preds = %bb4
+  br i1 %35, label %bb.nph, label %bb11
+
+bb.nph:                                           ; preds = %bb8
+  %I_addr.0.sum = add i64 %14, -1                 ;  [#uses=1]
+  %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ;  [#uses=1]
+  %50 = bitcast float* %49 to <4 x float>*        ; <<4 x float>*> [#uses=1]
+  %51 = load <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1]
+  %tmp54 = add i64 %14, 16                        ;  [#uses=1]
+  %tmp56 = add i64 %14, 3                         ;  [#uses=1]
+  %tmp60 = add i64 %14, 7                         ;  [#uses=1]
+  %tmp64 = add i64 %14, 11                        ;  [#uses=1]
+  %tmp68 = add i64 %14, 15                        ;  [#uses=1]
+  %tmp76 = add i64 %14, 4                         ;  [#uses=1]
+  %tmp80 = add i64 %14, 8                         ;  [#uses=1]
+  %tmp84 = add i64 %14, 12                        ;  [#uses=1]
+  %tmp90 = add i64 %N, -16                        ;  [#uses=1]
+  %tmp91 = sub i64 %tmp90, %14                    ;  [#uses=1]
+  br label %bb9
+
+bb9:                                              ; preds = %bb.nph, %bb9
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ;  [#uses=3]
+  %vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1]
+  %tmp51 = shl i64 %indvar, 4                     ;  [#uses=9]
+  %tmp55 = add i64 %tmp54, %tmp51                 ;  [#uses=2]
+  %tmp57 = add i64 %tmp56, %tmp51                 ;  [#uses=1]
+  %scevgep58 = getelementptr float* %I, i64 %tmp57 ;  [#uses=1]
+  %scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp61 = add i64 %tmp60, %tmp51                 ;  [#uses=1]
+  %scevgep62 = getelementptr float* %I, i64 %tmp61 ;  [#uses=1]
+  %scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp65 = add i64 %tmp64, %tmp51                 ;  [#uses=1]
+  %scevgep66 = getelementptr float* %I, i64 %tmp65 ;  [#uses=1]
+  %scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp69 = add i64 %tmp68, %tmp51                 ;  [#uses=1]
+  %scevgep70 = getelementptr float* %I, i64 %tmp69 ;  [#uses=1]
+  %scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp72 = add i64 %14, %tmp51                    ;  [#uses=1]
+  %scevgep73 = getelementptr float* %O, i64 %tmp72 ;  [#uses=1]
+  %scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp77 = add i64 %tmp76, %tmp51                 ;  [#uses=1]
+  %scevgep78 = getelementptr float* %O, i64 %tmp77 ;  [#uses=1]
+  %scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp81 = add i64 %tmp80, %tmp51                 ;  [#uses=1]
+  %scevgep82 = getelementptr float* %O, i64 %tmp81 ;  [#uses=1]
+  %scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp85 = add i64 %tmp84, %tmp51                 ;  [#uses=1]
+  %scevgep86 = getelementptr float* %O, i64 %tmp85 ;  [#uses=1]
+  %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp88 = mul i64 %indvar, -16                   ;  [#uses=1]
+  %tmp92 = add i64 %tmp91, %tmp88                 ;  [#uses=2]
+  %52 = load <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2]
+  %53 = load <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2]
+  %54 = load <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2]
+  %55 = load <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2]
+  %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32>  ; <<4 x float>> [#uses=1]
+  %64 = fmul <4 x float> %57, %vX0.223            ; <<4 x float>> [#uses=1]
+  %65 = fadd <4 x float> %vX0.223, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %66 = fmul <4 x float> %59, %vX1.120            ; <<4 x float>> [#uses=1]
+  %67 = fmul <4 x float> %61, %vX2.121            ; <<4 x float>> [#uses=1]
+  %68 = fmul <4 x float> %63, %vX3.125            ; <<4 x float>> [#uses=1]
+  store <4 x float> %64, <4 x float>* %scevgep7374, align 16
+  store <4 x float> %66, <4 x float>* %scevgep7879, align 16
+  store <4 x float> %67, <4 x float>* %scevgep8283, align 16
+  store <4 x float> %68, <4 x float>* %scevgep8687, align 16
+  %69 = fadd <4 x float> %vX3.125, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %70 = fadd <4 x float> %vX2.121, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %71 = fadd <4 x float> %vX1.120, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %72 = icmp sgt i64 %tmp92, 15                   ;  [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ;  [#uses=1]
+  br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge
+
+bb10.bb11.loopexit_crit_edge:                     ; preds = %bb9
+  %scevgep = getelementptr float* %I, i64 %tmp55  ;  [#uses=1]
+  %scevgep75 = getelementptr float* %O, i64 %tmp55 ;  [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7
+  %N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ;  [#uses=2]
+  %vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1]
+  %O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ;  [#uses=1]
+  %I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ;  [#uses=1]
+  %73 = extractelement <4 x float> %vX0.1, i32 0  ;  [#uses=2]
+  %74 = icmp sgt i64 %N_addr.2, 0                 ;  [#uses=1]
+  br i1 %74, label %bb12, label %bb14
+
+bb12:                                             ; preds = %bb11, %bb12
+  %indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ;  [#uses=3]
+  %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ;  [#uses=2]
+  %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ;  [#uses=1]
+  %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ;  [#uses=1]
+  %75 = load float* %I_addr.433, align 4          ;  [#uses=1]
+  %76 = fmul float %75, %x.130                    ;  [#uses=1]
+  store float %76, float* %O_addr.432, align 4
+  %77 = fadd float %x.130, %0                     ;  [#uses=2]
+  %indvar.next95 = add i64 %indvar94, 1           ;  [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ;  [#uses=1]
+  br i1 %exitcond, label %bb14, label %bb12
+
+bb14:                                             ; preds = %bb12, %bb11
+  %x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ;  [#uses=1]
+  store float %x.1.lcssa, float* %Start, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}

Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll?rev=100699&r1=100698&r2=100699&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll Wed Apr  7 17:27:08 2010
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)}
+; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)}
 
 ; The value of %r is dependent on a polynomial iteration expression.
 




From sabre at nondot.org  Wed Apr  7 17:29:10 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 22:29:10 -0000
Subject: [llvm-commits] [llvm] r100700 -
	/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Message-ID: <20100407222910.C99272A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 17:29:10 2010
New Revision: 100700

URL: http://llvm.org/viewvc/llvm-project?rev=100700&view=rev
Log:
tidy up

Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=100700&r1=100699&r2=100700&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Wed Apr  7 17:29:10 2010
@@ -466,7 +466,7 @@
   OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
 
   // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V((MDNode*)(MI->getOperand(2).getMetadata()));
+  DIVariable V(const_cast(MI->getOperand(2).getMetadata()));
   OS << V.getName() << " <- ";
 
   // Register or immediate value. Register 0 means undef.
@@ -486,7 +486,8 @@
     }
   } else if (MI->getOperand(0).isImm()) {
     OS << MI->getOperand(0).getImm();
-  } else if (MI->getOperand(0).isReg()) {
+  } else {
+    assert(MI->getOperand(0).isReg() && "Unknown operand type");
     if (MI->getOperand(0).getReg() == 0) {
       // Suppress offset, it is not meaningful here.
       OS << "undef";
@@ -495,9 +496,8 @@
       return true;
     }
     OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
-  } else
-    llvm_unreachable("Unknown operand type");
-
+  }
+  
   OS << '+' << MI->getOperand(1).getImm();
   // NOTE: Want this comment at start of line, don't emit with AddComment.
   AP.OutStreamer.EmitRawText(OS.str());




From sabre at nondot.org  Wed Apr  7 17:41:29 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 22:41:29 -0000
Subject: [llvm-commits] [llvm] r100702 - in /llvm/trunk:
 include/llvm/Support/ErrorHandling.h lib/VMCore/PassManager.cpp
Message-ID: <20100407224129.3D7B42A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 17:41:29 2010
New Revision: 100702

URL: http://llvm.org/viewvc/llvm-project?rev=100702&view=rev
Log:
minor tidying up

Modified:
    llvm/trunk/include/llvm/Support/ErrorHandling.h
    llvm/trunk/lib/VMCore/PassManager.cpp

Modified: llvm/trunk/include/llvm/Support/ErrorHandling.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/ErrorHandling.h?rev=100702&r1=100701&r2=100702&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/ErrorHandling.h (original)
+++ llvm/trunk/include/llvm/Support/ErrorHandling.h Wed Apr  7 17:41:29 2010
@@ -25,7 +25,7 @@
   typedef void (*llvm_error_handler_t)(void *user_data,
                                        const std::string& reason);
 
-  /// llvm_instal_error_handler - Installs a new error handler to be used
+  /// llvm_install_error_handler - Installs a new error handler to be used
   /// whenever a serious (non-recoverable) error is encountered by LLVM.
   ///
   /// If you are using llvm_start_multithreaded, you should register the handler

Modified: llvm/trunk/lib/VMCore/PassManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/PassManager.cpp?rev=100702&r1=100701&r2=100702&view=diff
==============================================================================
--- llvm/trunk/lib/VMCore/PassManager.cpp (original)
+++ llvm/trunk/lib/VMCore/PassManager.cpp Wed Apr  7 17:41:29 2010
@@ -1293,9 +1293,8 @@
 bool FunctionPassManager::run(Function &F) {
   if (F.isMaterializable()) {
     std::string errstr;
-    if (F.Materialize(&errstr)) {
+    if (F.Materialize(&errstr))
       llvm_report_error("Error reading bitcode file: " + errstr);
-    }
   }
   return FPM->run(F);
 }




From sabre at nondot.org  Wed Apr  7 17:44:07 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 22:44:07 -0000
Subject: [llvm-commits] [llvm] r100703 - in /llvm/trunk:
 include/llvm/CodeGen/SlotIndexes.h lib/CodeGen/Spiller.cpp
Message-ID: <20100407224407.BF82D2A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 17:44:07 2010
New Revision: 100703

URL: http://llvm.org/viewvc/llvm-project?rev=100703&view=rev
Log:
remove some unneeded errorhandling stuff.

Modified:
    llvm/trunk/include/llvm/CodeGen/SlotIndexes.h
    llvm/trunk/lib/CodeGen/Spiller.cpp

Modified: llvm/trunk/include/llvm/CodeGen/SlotIndexes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SlotIndexes.h?rev=100703&r1=100702&r2=100703&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SlotIndexes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SlotIndexes.h Wed Apr  7 17:44:07 2010
@@ -28,7 +28,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 
@@ -37,8 +36,6 @@
   /// SlotIndex & SlotIndexes classes for the public interface to this
   /// information.
   class IndexListEntry {
-  private:
-
     static const unsigned EMPTY_KEY_INDEX = ~0U & ~3U,
                           TOMBSTONE_KEY_INDEX = ~0U & ~7U;
 
@@ -66,10 +63,9 @@
   public:
 
     IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {
-      if (index == EMPTY_KEY_INDEX || index == TOMBSTONE_KEY_INDEX) {
-        llvm_report_error("Attempt to create invalid index. "
-                          "Available indexes may have been exhausted?.");
-      }
+      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
+             "Attempt to create invalid index. "
+             "Available indexes may have been exhausted?.");
     }
 
     bool isValid() const {

Modified: llvm/trunk/lib/CodeGen/Spiller.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Spiller.cpp?rev=100703&r1=100702&r2=100703&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/Spiller.cpp (original)
+++ llvm/trunk/lib/CodeGen/Spiller.cpp Wed Apr  7 17:44:07 2010
@@ -46,7 +46,6 @@
 /// Utility class for spillers.
 class SpillerBase : public Spiller {
 protected:
-
   MachineFunction *mf;
   LiveIntervals *lis;
   MachineFrameInfo *mfi;
@@ -160,9 +159,11 @@
 
     return added;
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
 
 /// Spills any live range using the spill-everywhere method with no attempt at
 /// folding.
@@ -178,9 +179,12 @@
     // Ignore spillIs - we don't use it.
     return trivialSpillEverywhere(li);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// Falls back on LiveIntervals::addIntervalsForSpills.
 class StandardSpiller : public Spiller {
 protected:
@@ -198,9 +202,12 @@
                                    SlotIndex*) {
     return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// When a call to spill is placed this spiller will first try to break the
 /// interval up into its component values (one new interval per value).
 /// If this fails, or if a call is placed to spill a previously split interval
@@ -513,15 +520,16 @@
 
 };
 
-}
+} // end anonymous namespace
+
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
                                    const MachineLoopInfo *loopInfo,
                                    VirtRegMap *vrm) {
   switch (spillerOpt) {
-    case trivial: return new TrivialSpiller(mf, lis, vrm); break;
-    case standard: return new StandardSpiller(lis, loopInfo, vrm); break;
-    case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break;
-    default: llvm_unreachable("Unreachable!"); break;
+  default: assert(0 && "unknown spiller");
+  case trivial: return new TrivialSpiller(mf, lis, vrm);
+  case standard: return new StandardSpiller(lis, loopInfo, vrm);
+  case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
   }
 }




From sabre at nondot.org  Wed Apr  7 17:53:18 2010
From: sabre at nondot.org (Chris Lattner)
Date: Wed, 07 Apr 2010 22:53:18 -0000
Subject: [llvm-commits] [llvm] r100705 - in /llvm/trunk/test:
 Analysis/BasicAA/ Assembler/ Bitcode/ CodeGen/MSP430/ CodeGen/PowerPC/
 CodeGen/SystemZ/ CodeGen/X86/ DebugInfo/ FrontendC/ Transforms/DeadArgElim/
 Transforms/DeadStoreElimination/ Transforms/GVN/ Transforms/IndVarSimplify/
 Transforms/Inline/ Transforms/InstCombine/ Transforms/LoopIndexSplit/
 Transforms/LoopStrengthReduce/
Message-ID: <20100407225318.500542A6C12C@llvm.org>

Author: lattner
Date: Wed Apr  7 17:53:17 2010
New Revision: 100705

URL: http://llvm.org/viewvc/llvm-project?rev=100705&view=rev
Log:
add newlines at the end of files.

Modified:
    llvm/trunk/test/Analysis/BasicAA/cas.ll
    llvm/trunk/test/Assembler/metadata.ll
    llvm/trunk/test/Bitcode/sse41_pmulld.ll
    llvm/trunk/test/CodeGen/MSP430/2009-05-17-Rot.ll
    llvm/trunk/test/CodeGen/MSP430/2009-05-17-Shift.ll
    llvm/trunk/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
    llvm/trunk/test/CodeGen/SystemZ/00-RetVoid.ll
    llvm/trunk/test/CodeGen/SystemZ/01-RetArg.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetAdd.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetAddImm.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetAnd.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetNeg.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetOr.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetSub.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetSubImm.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetXor.ll
    llvm/trunk/test/CodeGen/SystemZ/02-RetXorImm.ll
    llvm/trunk/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
    llvm/trunk/test/CodeGen/X86/dllexport.ll
    llvm/trunk/test/CodeGen/X86/vec_shuffle-36.ll
    llvm/trunk/test/DebugInfo/2009-10-16-Phi.ll
    llvm/trunk/test/FrontendC/2008-11-02-WeakAlias.c
    llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
    llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll
    llvm/trunk/test/Transforms/GVN/invariant-simple.ll
    llvm/trunk/test/Transforms/GVN/lifetime-simple.ll
    llvm/trunk/test/Transforms/IndVarSimplify/crash.ll
    llvm/trunk/test/Transforms/Inline/externally_available.ll
    llvm/trunk/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
    llvm/trunk/test/Transforms/InstCombine/odr-linkage.ll
    llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll
    llvm/trunk/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
    llvm/trunk/test/Transforms/LoopStrengthReduce/pr2537.ll

Modified: llvm/trunk/test/Analysis/BasicAA/cas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/BasicAA/cas.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/BasicAA/cas.ll (original)
+++ llvm/trunk/test/Analysis/BasicAA/cas.ll Wed Apr  7 17:53:17 2010
@@ -12,4 +12,4 @@
   ret i32 %d
 }
 
-declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
\ No newline at end of file
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind

Modified: llvm/trunk/test/Assembler/metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Assembler/metadata.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Assembler/metadata.ll (original)
+++ llvm/trunk/test/Assembler/metadata.ll Wed Apr  7 17:53:17 2010
@@ -19,4 +19,4 @@
 !foo = !{ !0 }
 !bar = !{ !1 }
 
-; !foo = !{ !0, !"foo" }
\ No newline at end of file
+; !foo = !{ !0, !"foo" }

Modified: llvm/trunk/test/Bitcode/sse41_pmulld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/sse41_pmulld.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Bitcode/sse41_pmulld.ll (original)
+++ llvm/trunk/test/Bitcode/sse41_pmulld.ll Wed Apr  7 17:53:17 2010
@@ -1,2 +1,2 @@
 ; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld}
-; RUN: llvm-dis < %s.bc | grep mul
\ No newline at end of file
+; RUN: llvm-dis < %s.bc | grep mul

Modified: llvm/trunk/test/CodeGen/MSP430/2009-05-17-Rot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MSP430/2009-05-17-Rot.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MSP430/2009-05-17-Rot.ll (original)
+++ llvm/trunk/test/CodeGen/MSP430/2009-05-17-Rot.ll Wed Apr  7 17:53:17 2010
@@ -14,4 +14,4 @@
 return:
         %6 = load i16* %retval
         ret i16 %6
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/MSP430/2009-05-17-Shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MSP430/2009-05-17-Shift.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MSP430/2009-05-17-Shift.ll (original)
+++ llvm/trunk/test/CodeGen/MSP430/2009-05-17-Shift.ll Wed Apr  7 17:53:17 2010
@@ -12,4 +12,4 @@
         %3 = load i16* %retval
         ret i16 %3
 
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll Wed Apr  7 17:53:17 2010
@@ -25,4 +25,4 @@
 return:
   ret void
 ; CHECK: blr
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/00-RetVoid.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/00-RetVoid.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/00-RetVoid.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/00-RetVoid.ll Wed Apr  7 17:53:17 2010
@@ -3,4 +3,4 @@
 define void @foo() {
 entry:
     ret void
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/01-RetArg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/01-RetArg.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/01-RetArg.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/01-RetArg.ll Wed Apr  7 17:53:17 2010
@@ -3,4 +3,4 @@
 define i64 @foo(i64 %a, i64 %b) {
 entry:
     ret i64 %b
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetAdd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetAdd.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetAdd.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetAdd.ll Wed Apr  7 17:53:17 2010
@@ -3,4 +3,4 @@
 entry:
     %c = add i64 %a, %b
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetAddImm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetAddImm.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetAddImm.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetAddImm.ll Wed Apr  7 17:53:17 2010
@@ -3,4 +3,4 @@
 entry:
     %c = add i64 %a, 1
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetAnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetAnd.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetAnd.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetAnd.ll Wed Apr  7 17:53:17 2010
@@ -4,4 +4,4 @@
 entry:
     %c = and i64 %a, %b
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetNeg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetNeg.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetNeg.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetNeg.ll Wed Apr  7 17:53:17 2010
@@ -4,4 +4,4 @@
 entry:
     %c = sub i64 0, %a
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetOr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetOr.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetOr.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetOr.ll Wed Apr  7 17:53:17 2010
@@ -3,4 +3,4 @@
 entry:
     %c = or i64 %a, %b
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetSub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetSub.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetSub.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetSub.ll Wed Apr  7 17:53:17 2010
@@ -4,4 +4,4 @@
 entry:
     %c = sub i64 %a, %b
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetSubImm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetSubImm.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetSubImm.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetSubImm.ll Wed Apr  7 17:53:17 2010
@@ -4,4 +4,4 @@
 entry:
     %c = sub i64 %a, 1
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetXor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetXor.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetXor.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetXor.ll Wed Apr  7 17:53:17 2010
@@ -3,4 +3,4 @@
 entry:
     %c = xor i64 %a, %b
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/02-RetXorImm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/02-RetXorImm.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/02-RetXorImm.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/02-RetXorImm.ll Wed Apr  7 17:53:17 2010
@@ -3,4 +3,4 @@
 entry:
     %c = xor i64 %a, 1
     ret i64 %c
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll Wed Apr  7 17:53:17 2010
@@ -13,4 +13,4 @@
 entry:
     %b = bitcast float %a to i32
     ret i32 %b
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/CodeGen/X86/dllexport.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dllexport.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dllexport.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dllexport.ll Wed Apr  7 17:53:17 2010
@@ -9,4 +9,4 @@
 }
 
 ; CHECK: .section .drectve
-; CHECK: -export:@foo at 0
\ No newline at end of file
+; CHECK: -export:@foo at 0

Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-36.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-36.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-36.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-36.ll Wed Apr  7 17:53:17 2010
@@ -13,4 +13,4 @@
 ; CHECK: pshufd
   %tmp10 = shufflevector <8 x i16> %t0, <8 x i16> undef, <8 x i32> < i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef >
   ret <8 x i16> %tmp10
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/DebugInfo/2009-10-16-Phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2009-10-16-Phi.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/2009-10-16-Phi.ll (original)
+++ llvm/trunk/test/DebugInfo/2009-10-16-Phi.ll Wed Apr  7 17:53:17 2010
@@ -10,4 +10,4 @@
    ret i32 %0
 }
 
-!0 = metadata !{i32 42}
\ No newline at end of file
+!0 = metadata !{i32 42}

Modified: llvm/trunk/test/FrontendC/2008-11-02-WeakAlias.c
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC/2008-11-02-WeakAlias.c?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/FrontendC/2008-11-02-WeakAlias.c (original)
+++ llvm/trunk/test/FrontendC/2008-11-02-WeakAlias.c Wed Apr  7 17:53:17 2010
@@ -2,4 +2,4 @@
 // PR2691
 
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-void native_init_IRQ(void) {}
\ No newline at end of file
+void native_init_IRQ(void) {}

Modified: llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll (original)
+++ llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll Wed Apr  7 17:53:17 2010
@@ -23,4 +23,4 @@
   ret i32 %y
 T2:
   unreachable
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll Wed Apr  7 17:53:17 2010
@@ -54,4 +54,4 @@
   store i32 4, i32* %P2
   store i32 4, i32* %Q2
   br label %dead
-}
\ No newline at end of file
+}

Modified: llvm/trunk/test/Transforms/GVN/invariant-simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/invariant-simple.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/GVN/invariant-simple.ll (original)
+++ llvm/trunk/test/Transforms/GVN/invariant-simple.ll Wed Apr  7 17:53:17 2010
@@ -33,4 +33,4 @@
 declare i32 @foo(i8*) nounwind 
 declare i32 @bar(i8*) nounwind readonly
 declare {}* @llvm.invariant.start(i64 %S, i8* nocapture %P) readonly
-declare void @llvm.invariant.end({}* %S, i64 %SS, i8* nocapture %P)
\ No newline at end of file
+declare void @llvm.invariant.end({}* %S, i64 %SS, i8* nocapture %P)

Modified: llvm/trunk/test/Transforms/GVN/lifetime-simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/lifetime-simple.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/GVN/lifetime-simple.ll (original)
+++ llvm/trunk/test/Transforms/GVN/lifetime-simple.ll Wed Apr  7 17:53:17 2010
@@ -17,4 +17,4 @@
 }
 
 declare {}* @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly
-declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)
\ No newline at end of file
+declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)

Modified: llvm/trunk/test/Transforms/IndVarSimplify/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/crash.ll?rev=100705&r1=100704&r2=100705&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/crash.ll (original)
+++ llvm/trunk/test/Transforms/IndVarSimplify/crash.ll Wed Apr  7 17:53:17 2010
@@ -16,4 +16,4 @@
 
 ;