From sabre at nondot.org Mon Oct 5 00:00:37 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 05:00:37 -0000 Subject: [llvm-commits] [llvm] r83292 - /llvm/trunk/lib/Analysis/ConstantFolding.cpp Message-ID: <200910050500.n9550cAc023211@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 00:00:35 2009 New Revision: 83292 URL: http://llvm.org/viewvc/llvm-project?rev=83292&view=rev Log: code simplifications. Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=83292&r1=83291&r2=83292&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Mon Oct 5 00:00:35 2009 @@ -678,7 +678,9 @@ case Intrinsic::ctlz: case Intrinsic::cttz: return true; - default: break; + default: + return false; + case 0: break; } if (!F->hasName()) return false; @@ -748,25 +750,24 @@ /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. - Constant * llvm::ConstantFoldCall(Function *F, - Constant* const* Operands, unsigned NumOperands) { + Constant *const *Operands, unsigned NumOperands) { if (!F->hasName()) return 0; LLVMContext &Context = F->getContext(); StringRef Name = F->getName(); - + const Type *Ty = F->getReturnType(); if (NumOperands == 1) { if (ConstantFP *Op = dyn_cast(Operands[0])) { - if (Ty!=Type::getFloatTy(F->getContext()) && - Ty!=Type::getDoubleTy(Context)) + if (Ty != Type::getFloatTy(F->getContext()) && + Ty != Type::getDoubleTy(Context)) return 0; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty==Type::getFloatTy(F->getContext()) ? + double V = Ty == Type::getFloatTy(Context) ? (double)Op->getValueAPF().convertToFloat(): Op->getValueAPF().convertToDouble(); switch (Name[0]) { @@ -832,7 +833,11 @@ default: break; } - } else if (ConstantInt *Op = dyn_cast(Operands[0])) { + return 0; + } + + + if (ConstantInt *Op = dyn_cast(Operands[0])) { if (Name.startswith("llvm.bswap")) return ConstantInt::get(Context, Op->getValue().byteSwap()); else if (Name.startswith("llvm.ctpop")) @@ -841,8 +846,13 @@ return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); else if (Name.startswith("llvm.ctlz")) return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); + return 0; } - } else if (NumOperands == 2) { + + return 0; + } + + if (NumOperands == 2) { if (ConstantFP *Op1 = dyn_cast(Operands[0])) { if (Ty!=Type::getFloatTy(F->getContext()) && Ty!=Type::getDoubleTy(Context)) @@ -855,22 +865,21 @@ (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); - if (Name == "pow") { + if (Name == "pow") return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context); - } else if (Name == "fmod") { + if (Name == "fmod") return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context); - } else if (Name == "atan2") { + if (Name == "atan2") return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context); - } } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { - if (Name == "llvm.powi.f32") { + if (Name == "llvm.powi.f32") return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V, (int)Op2C->getZExtValue()))); - } else if (Name == "llvm.powi.f64") { + if (Name == "llvm.powi.f64") return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); - } } + return 0; } } return 0; From sabre at nondot.org Mon Oct 5 00:05:58 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 05:05:58 -0000 Subject: [llvm-commits] [llvm] r83293 - /llvm/trunk/include/llvm/Type.h Message-ID: <200910050505.n9555wYk023857@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 00:05:57 2009 New Revision: 83293 URL: http://llvm.org/viewvc/llvm-project?rev=83293&view=rev Log: add some helper functions. Modified: llvm/trunk/include/llvm/Type.h Modified: llvm/trunk/include/llvm/Type.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Type.h?rev=83293&r1=83292&r2=83293&view=diff ============================================================================== --- llvm/trunk/include/llvm/Type.h (original) +++ llvm/trunk/include/llvm/Type.h Mon Oct 5 00:05:57 2009 @@ -205,6 +205,12 @@ bool isFloatingPoint() const { return ID == FloatTyID || ID == DoubleTyID || ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; } + /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type. + bool isFloatTy() const { return ID == FloatTyID; } + + /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type. + bool isDoubleTy() const { return ID == DoubleTyID; } + /// isFPOrFPVector - Return true if this is a FP type or a vector of FP types. /// bool isFPOrFPVector() const; From sabre at nondot.org Mon Oct 5 00:06:25 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 05:06:25 -0000 Subject: [llvm-commits] [llvm] r83294 - /llvm/trunk/lib/Analysis/ConstantFolding.cpp Message-ID: <200910050506.n9556PrA023921@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 00:06:24 2009 New Revision: 83294 URL: http://llvm.org/viewvc/llvm-project?rev=83294&view=rev Log: simplify this code a bunch. Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=83294&r1=83293&r2=83294&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Mon Oct 5 00:06:24 2009 @@ -721,9 +721,9 @@ return 0; } - if (Ty == Type::getFloatTy(Context)) + if (Ty->isFloatTy()) return ConstantFP::get(Context, APFloat((float)V)); - if (Ty == Type::getDoubleTy(Context)) + if (Ty->isDoubleTy()) return ConstantFP::get(Context, APFloat(V)); llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning @@ -740,9 +740,9 @@ return 0; } - if (Ty == Type::getFloatTy(Context)) + if (Ty->isFloatTy()) return ConstantFP::get(Context, APFloat((float)V)); - if (Ty == Type::getDoubleTy(Context)) + if (Ty->isDoubleTy()) return ConstantFP::get(Context, APFloat(V)); llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning @@ -756,19 +756,17 @@ if (!F->hasName()) return 0; LLVMContext &Context = F->getContext(); StringRef Name = F->getName(); - + const Type *Ty = F->getReturnType(); if (NumOperands == 1) { if (ConstantFP *Op = dyn_cast(Operands[0])) { - if (Ty != Type::getFloatTy(F->getContext()) && - Ty != Type::getDoubleTy(Context)) + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty == Type::getFloatTy(Context) ? - (double)Op->getValueAPF().convertToFloat(): + double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : Op->getValueAPF().convertToDouble(); switch (Name[0]) { case 'a': @@ -854,14 +852,16 @@ if (NumOperands == 2) { if (ConstantFP *Op1 = dyn_cast(Operands[0])) { - if (Ty!=Type::getFloatTy(F->getContext()) && - Ty!=Type::getDoubleTy(Context)) + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty==Type::getFloatTy(F->getContext()) ? - (double)Op1->getValueAPF().convertToFloat(): + double Op1V = Ty->isFloatTy() ? + (double)Op1->getValueAPF().convertToFloat() : Op1->getValueAPF().convertToDouble(); if (ConstantFP *Op2 = dyn_cast(Operands[1])) { - double Op2V = Ty==Type::getFloatTy(F->getContext()) ? + if (Op2->getType() != Op1->getType()) + return 0; + + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); From sabre at nondot.org Mon Oct 5 00:26:05 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 05:26:05 -0000 Subject: [llvm-commits] [llvm] r83295 - in /llvm/trunk: lib/Analysis/ConstantFolding.cpp test/Transforms/ConstProp/overflow-ops.ll Message-ID: <200910050526.n955Q56b026361@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 00:26:04 2009 New Revision: 83295 URL: http://llvm.org/viewvc/llvm-project?rev=83295&view=rev Log: teach the optimizer how to constant fold uadd/usub intrinsics. Added: llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=83295&r1=83294&r2=83295&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Mon Oct 5 00:26:04 2009 @@ -677,6 +677,8 @@ case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::uadd_with_overflow: + case Intrinsic::usub_with_overflow: return true; default: return false; @@ -756,7 +758,7 @@ if (!F->hasName()) return 0; LLVMContext &Context = F->getContext(); StringRef Name = F->getName(); - + const Type *Ty = F->getReturnType(); if (NumOperands == 1) { if (ConstantFP *Op = dyn_cast(Operands[0])) { @@ -881,6 +883,32 @@ } return 0; } + + + if (ConstantInt *Op1 = dyn_cast(Operands[0])) { + if (ConstantInt *Op2 = dyn_cast(Operands[1])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::usub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + } + } + + return 0; + } + return 0; } return 0; } Added: llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll?rev=83295&view=auto ============================================================================== --- llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll (added) +++ llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll Mon Oct 5 00:26:04 2009 @@ -0,0 +1,53 @@ +; RUN: opt < %s -constprop -S | FileCheck %s + +%i8i1 = type {i8, i1} + +;;----------------------------- +;; uadd +;;----------------------------- + +define {i8, i1} @uadd_1() nounwind { +entry: + %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 42, i8 100) + ret {i8, i1} %t + +; CHECK: @uadd_1 +; CHECK: ret %i8i1 { i8 -114, i1 false } +} + +define {i8, i1} @uadd_2() nounwind { +entry: + %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 142, i8 120) + ret {i8, i1} %t + +; CHECK: @uadd_2 +; CHECK: ret %i8i1 { i8 6, i1 true } +} + + +;;----------------------------- +;; usub +;;----------------------------- + +define {i8, i1} @usub_1() nounwind { +entry: + %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 2) + ret {i8, i1} %t + +; CHECK: @usub_1 +; CHECK: ret %i8i1 { i8 2, i1 false } +} + +define {i8, i1} @usub_2() nounwind { +entry: + %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 6) + ret {i8, i1} %t + +; CHECK: @usub_2 +; CHECK: ret %i8i1 { i8 -2, i1 true } +} + + + +declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) +declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) From sabre at nondot.org Mon Oct 5 00:48:40 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 05:48:40 -0000 Subject: [llvm-commits] [llvm] r83296 - /llvm/trunk/include/llvm/Type.h Message-ID: <200910050548.n955mel9029138@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 00:48:40 2009 New Revision: 83296 URL: http://llvm.org/viewvc/llvm-project?rev=83296&view=rev Log: add more type predicates. Modified: llvm/trunk/include/llvm/Type.h Modified: llvm/trunk/include/llvm/Type.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Type.h?rev=83296&r1=83295&r2=83296&view=diff ============================================================================== --- llvm/trunk/include/llvm/Type.h (original) +++ llvm/trunk/include/llvm/Type.h Mon Oct 5 00:48:40 2009 @@ -188,6 +188,30 @@ /// inline TypeID getTypeID() const { return ID; } + /// isVoidTy - Return true if this is 'void'. + bool isVoidTy() const { return ID == VoidTyID; } + + /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type. + bool isFloatTy() const { return ID == FloatTyID; } + + /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type. + bool isDoubleTy() const { return ID == DoubleTyID; } + + /// isX86_FP80Ty - Return true if this is x86 long double. + bool isX86_FP80Ty() const { return ID == X86_FP80TyID; } + + /// isFP128Ty - Return true if this is 'fp128'. + bool isFP128Ty() const { return ID == FP128TyID; } + + /// isPPC_FP128Ty - Return true if this is powerpc long double. + bool isPPC_FP128Ty() const { return ID == PPC_FP128TyID; } + + /// isLabelTy - Return true if this is 'label'. + bool isLabelTy() const { return ID == LabelTyID; } + + /// isMetadataTy - Return true if this is 'metadata'. + bool isMetadataTy() const { return ID == MetadataTyID; } + /// getDescription - Return the string representation of the type. std::string getDescription() const; @@ -205,12 +229,6 @@ bool isFloatingPoint() const { return ID == FloatTyID || ID == DoubleTyID || ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; } - /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type. - bool isFloatTy() const { return ID == FloatTyID; } - - /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type. - bool isDoubleTy() const { return ID == DoubleTyID; } - /// isFPOrFPVector - Return true if this is a FP type or a vector of FP types. /// bool isFPOrFPVector() const; From sabre at nondot.org Mon Oct 5 00:54:47 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 05:54:47 -0000 Subject: [llvm-commits] [llvm] r83297 - in /llvm/trunk/lib: AsmParser/ Bitcode/Reader/ Bitcode/Writer/ CodeGen/ CodeGen/AsmPrinter/ ExecutionEngine/ ExecutionEngine/Interpreter/ Target/ARM/ Target/X86/ Transforms/Scalar/ VMCore/ Message-ID: <200910050554.n955slO5029926@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 00:54:46 2009 New Revision: 83297 URL: http://llvm.org/viewvc/llvm-project?rev=83297&view=rev Log: strength reduce a ton of type equality tests to check the typeid (Through the new predicates I added) instead of going through a context and doing a pointer comparison. Besides being cheaper, this allows a smart compiler to turn the if sequence into a switch. Modified: llvm/trunk/lib/AsmParser/LLParser.cpp llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/CodeGen/ELFWriter.cpp llvm/trunk/lib/CodeGen/MachineInstr.cpp llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp llvm/trunk/lib/ExecutionEngine/Interpreter/Execution.cpp llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp llvm/trunk/lib/Target/X86/X86FastISel.cpp llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp llvm/trunk/lib/Transforms/Scalar/SCCP.cpp llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp llvm/trunk/lib/VMCore/ConstantFold.cpp llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/Verifier.cpp Modified: llvm/trunk/lib/AsmParser/LLParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/LLParser.cpp (original) +++ llvm/trunk/lib/AsmParser/LLParser.cpp Mon Oct 5 00:54:46 2009 @@ -647,7 +647,7 @@ return true; } - if (isa(Ty) || Ty == Type::getLabelTy(Context)) + if (isa(Ty) || Ty->isLabelTy()) return Error(TyLoc, "invalid type for global variable"); GlobalVariable *GV = 0; @@ -1113,7 +1113,7 @@ if (!UpRefs.empty()) return Error(UpRefs.back().Loc, "invalid unresolved type up reference"); - if (!AllowVoid && Result.get() == Type::getVoidTy(Context)) + if (!AllowVoid && Result.get()->isVoidTy()) return Error(TypeLoc, "void type only allowed for function results"); return false; @@ -1275,9 +1275,9 @@ // TypeRec ::= TypeRec '*' case lltok::star: - if (Result.get() == Type::getLabelTy(Context)) + if (Result.get()->isLabelTy()) return TokError("basic block pointers are invalid"); - if (Result.get() == Type::getVoidTy(Context)) + if (Result.get()->isVoidTy()) return TokError("pointers to void are invalid; use i8* instead"); if (!PointerType::isValidElementType(Result.get())) return TokError("pointer to this type is invalid"); @@ -1287,9 +1287,9 @@ // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*' case lltok::kw_addrspace: { - if (Result.get() == Type::getLabelTy(Context)) + if (Result.get()->isLabelTy()) return TokError("basic block pointers are invalid"); - if (Result.get() == Type::getVoidTy(Context)) + if (Result.get()->isVoidTy()) return TokError("pointers to void are invalid; use i8* instead"); if (!PointerType::isValidElementType(Result.get())) return TokError("pointer to this type is invalid"); @@ -1380,7 +1380,7 @@ if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || ParseOptionalAttrs(Attrs, 0)) return true; - if (ArgTy == Type::getVoidTy(Context)) + if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); if (Lex.getKind() == lltok::LocalVar || @@ -1406,7 +1406,7 @@ if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || ParseOptionalAttrs(Attrs, 0)) return true; - if (ArgTy == Type::getVoidTy(Context)) + if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); if (Lex.getKind() == lltok::LocalVar || @@ -1484,7 +1484,7 @@ if (ParseTypeRec(Result)) return true; ParamsList.push_back(Result); - if (Result == Type::getVoidTy(Context)) + if (Result->isVoidTy()) return Error(EltTyLoc, "struct element can not have void type"); if (!StructType::isValidElementType(Result)) return Error(EltTyLoc, "invalid element type for struct"); @@ -1493,7 +1493,7 @@ EltTyLoc = Lex.getLoc(); if (ParseTypeRec(Result)) return true; - if (Result == Type::getVoidTy(Context)) + if (Result->isVoidTy()) return Error(EltTyLoc, "struct element can not have void type"); if (!StructType::isValidElementType(Result)) return Error(EltTyLoc, "invalid element type for struct"); @@ -1532,7 +1532,7 @@ PATypeHolder EltTy(Type::getVoidTy(Context)); if (ParseTypeRec(EltTy)) return true; - if (EltTy == Type::getVoidTy(Context)) + if (EltTy->isVoidTy()) return Error(TypeLoc, "array and vector element type cannot be void"); if (ParseToken(isVector ? lltok::greater : lltok::rsquare, @@ -1623,7 +1623,7 @@ // If we have the value in the symbol table or fwd-ref table, return it. if (Val) { if (Val->getType() == Ty) return Val; - if (Ty == Type::getLabelTy(F.getContext())) + if (Ty->isLabelTy()) P.Error(Loc, "'%" + Name + "' is not a basic block"); else P.Error(Loc, "'%" + Name + "' defined with type '" + @@ -1640,7 +1640,7 @@ // Otherwise, create a new forward reference for this value and remember it. Value *FwdVal; - if (Ty == Type::getLabelTy(F.getContext())) + if (Ty->isLabelTy()) FwdVal = BasicBlock::Create(F.getContext(), Name, &F); else FwdVal = new Argument(Ty, Name); @@ -1666,7 +1666,7 @@ // If we have the value in the symbol table or fwd-ref table, return it. if (Val) { if (Val->getType() == Ty) return Val; - if (Ty == Type::getLabelTy(F.getContext())) + if (Ty->isLabelTy()) P.Error(Loc, "'%" + utostr(ID) + "' is not a basic block"); else P.Error(Loc, "'%" + utostr(ID) + "' defined with type '" + @@ -1682,7 +1682,7 @@ // Otherwise, create a new forward reference for this value and remember it. Value *FwdVal; - if (Ty == Type::getLabelTy(F.getContext())) + if (Ty->isLabelTy()) FwdVal = BasicBlock::Create(F.getContext(), "", &F); else FwdVal = new Argument(Ty); @@ -1697,7 +1697,7 @@ const std::string &NameStr, LocTy NameLoc, Instruction *Inst) { // If this instruction has void type, it cannot have a name or ID specified. - if (Inst->getType() == Type::getVoidTy(F.getContext())) { + if (Inst->getType()->isVoidTy()) { if (NameID != -1 || !NameStr.empty()) return P.Error(NameLoc, "instructions returning void cannot have a name"); return false; @@ -2279,7 +2279,7 @@ // The lexer has no type info, so builds all float and double FP constants // as double. Fix this here. Long double does not need this. if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble && - Ty == Type::getFloatTy(Context)) { + Ty->isFloatTy()) { bool Ignored; ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Ignored); @@ -2298,7 +2298,7 @@ return false; case ValID::t_Undef: // FIXME: LabelTy should not be a first-class type. - if ((!Ty->isFirstClassType() || Ty == Type::getLabelTy(Context)) && + if ((!Ty->isFirstClassType() || Ty->isLabelTy()) && !isa(Ty)) return Error(ID.Loc, "invalid type for undef constant"); V = UndefValue::get(Ty); @@ -2310,7 +2310,7 @@ return false; case ValID::t_Zero: // FIXME: LabelTy should not be a first-class type. - if (!Ty->isFirstClassType() || Ty == Type::getLabelTy(Context)) + if (!Ty->isFirstClassType() || Ty->isLabelTy()) return Error(ID.Loc, "invalid type for null constant"); V = Constant::getNullValue(Ty); return false; @@ -2856,7 +2856,7 @@ PATypeHolder Ty(Type::getVoidTy(Context)); if (ParseType(Ty, true /*void allowed*/)) return true; - if (Ty == Type::getVoidTy(Context)) { + if (Ty->isVoidTy()) { if (EatIfPresent(lltok::comma)) if (ParseOptionalCustomMetadata()) return true; Inst = ReturnInst::Create(Context); Modified: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp (original) +++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp Mon Oct 5 00:54:46 2009 @@ -342,7 +342,7 @@ resize(Idx + 1); if (Value *V = MDValuePtrs[Idx]) { - assert(V->getType() == Type::getMetadataTy(Context) && "Type mismatch in value table!"); + assert(V->getType()->isMetadataTy() && "Type mismatch in value table!"); return V; } @@ -808,7 +808,7 @@ SmallVector Elts; for (unsigned i = 0; i != Size; i += 2) { const Type *Ty = getTypeByID(Record[i], false); - if (Ty == Type::getMetadataTy(Context)) + if (Ty->isMetadataTy()) Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); else if (Ty != Type::getVoidTy(Context)) Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty)); @@ -967,19 +967,19 @@ case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) return Error("Invalid FLOAT record"); - if (CurTy == Type::getFloatTy(Context)) + if (CurTy->isFloatTy()) V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0]))); - else if (CurTy == Type::getDoubleTy(Context)) + else if (CurTy->isDoubleTy()) V = ConstantFP::get(Context, APFloat(APInt(64, Record[0]))); - else if (CurTy == Type::getX86_FP80Ty(Context)) { + else if (CurTy->isX86_FP80Ty()) { // Bits are not stored the same way as a normal i80 APInt, compensate. uint64_t Rearrange[2]; Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); Rearrange[1] = Record[0] >> 48; V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange))); - } else if (CurTy == Type::getFP128Ty(Context)) + } else if (CurTy->isFP128Ty()) V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true)); - else if (CurTy == Type::getPPC_FP128Ty(Context)) + else if (CurTy->isPPC_FP128Ty()) V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]))); else V = UndefValue::get(CurTy); Modified: llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp (original) +++ llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp Mon Oct 5 00:54:46 2009 @@ -729,18 +729,16 @@ } else if (const ConstantFP *CFP = dyn_cast(C)) { Code = bitc::CST_CODE_FLOAT; const Type *Ty = CFP->getType(); - if (Ty == Type::getFloatTy(Ty->getContext()) || - Ty == Type::getDoubleTy(Ty->getContext())) { + if (Ty->isFloatTy() || Ty->isDoubleTy()) { Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - } else if (Ty == Type::getX86_FP80Ty(Ty->getContext())) { + } else if (Ty->isX86_FP80Ty()) { // api needed to prevent premature destruction // bits are not in the same order as a normal i80 APInt, compensate. APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back((p[1] << 48) | (p[0] >> 16)); Record.push_back(p[0] & 0xffffLL); - } else if (Ty == Type::getFP128Ty(Ty->getContext()) || - Ty == Type::getPPC_FP128Ty(Ty->getContext())) { + } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) { APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back(p[0]); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Mon Oct 5 00:54:46 2009 @@ -1006,7 +1006,7 @@ // precision... LLVMContext &Context = CFP->getContext(); const TargetData *TD = TM.getTargetData(); - if (CFP->getType() == Type::getDoubleTy(Context)) { + if (CFP->getType()->isDoubleTy()) { double Val = CFP->getValueAPF().convertToDouble(); // for comment only uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); if (MAI->getData64bitsDirective(AddrSpace)) { @@ -1048,7 +1048,9 @@ O << '\n'; } return; - } else if (CFP->getType() == Type::getFloatTy(Context)) { + } + + if (CFP->getType()->isFloatTy()) { float Val = CFP->getValueAPF().convertToFloat(); // for comment only O << MAI->getData32bitsDirective(AddrSpace) << CFP->getValueAPF().bitcastToAPInt().getZExtValue(); @@ -1058,7 +1060,9 @@ } O << '\n'; return; - } else if (CFP->getType() == Type::getX86_FP80Ty(Context)) { + } + + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); @@ -1143,7 +1147,9 @@ EmitZeros(TD->getTypeAllocSize(Type::getX86_FP80Ty(Context)) - TD->getTypeStoreSize(Type::getX86_FP80Ty(Context)), AddrSpace); return; - } else if (CFP->getType() == Type::getPPC_FP128Ty(Context)) { + } + + if (CFP->getType()->isPPC_FP128Ty()) { // all long double variants are printed as hex // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); Modified: llvm/trunk/lib/CodeGen/ELFWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ELFWriter.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/ELFWriter.cpp (original) +++ llvm/trunk/lib/CodeGen/ELFWriter.cpp Mon Oct 5 00:54:46 2009 @@ -457,16 +457,15 @@ return; } else if (const ConstantFP *CFP = dyn_cast(CV)) { APInt Val = CFP->getValueAPF().bitcastToAPInt(); - if (CFP->getType() == Type::getDoubleTy(CV->getContext())) + if (CFP->getType()->isDoubleTy()) GblS.emitWord64(Val.getZExtValue()); - else if (CFP->getType() == Type::getFloatTy(CV->getContext())) + else if (CFP->getType()->isFloatTy()) GblS.emitWord32(Val.getZExtValue()); - else if (CFP->getType() == Type::getX86_FP80Ty(CV->getContext())) { - unsigned PadSize = - TD->getTypeAllocSize(Type::getX86_FP80Ty(CV->getContext()))- - TD->getTypeStoreSize(Type::getX86_FP80Ty(CV->getContext())); + else if (CFP->getType()->isX86_FP80Ty()) { + unsigned PadSize = TD->getTypeAllocSize(CFP->getType())- + TD->getTypeStoreSize(CFP->getType()); GblS.emitWordFP80(Val.getRawData(), PadSize); - } else if (CFP->getType() == Type::getPPC_FP128Ty(CV->getContext())) + } else if (CFP->getType()->isPPC_FP128Ty()) llvm_unreachable("PPC_FP128Ty global emission not implemented"); return; } else if (const ConstantInt *CI = dyn_cast(CV)) { Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Mon Oct 5 00:54:46 2009 @@ -239,7 +239,7 @@ OS << getImm(); break; case MachineOperand::MO_FPImmediate: - if (getFPImm()->getType() == Type::getFloatTy(getFPImm()->getContext())) + if (getFPImm()->getType()->isFloatTy()) OS << getFPImm()->getValueAPF().convertToFloat(); else OS << getFPImm()->getValueAPF().convertToDouble(); Modified: llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp Mon Oct 5 00:54:46 2009 @@ -539,11 +539,11 @@ } case Instruction::UIToFP: { GenericValue GV = getConstantValue(Op0); - if (CE->getType() == Type::getFloatTy(CE->getContext())) + if (CE->getType()->isFloatTy()) GV.FloatVal = float(GV.IntVal.roundToDouble()); - else if (CE->getType() == Type::getDoubleTy(CE->getContext())) + else if (CE->getType()->isDoubleTy()) GV.DoubleVal = GV.IntVal.roundToDouble(); - else if (CE->getType() == Type::getX86_FP80Ty(Op0->getContext())) { + else if (CE->getType()->isX86_FP80Ty()) { const uint64_t zero[] = {0, 0}; APFloat apf = APFloat(APInt(80, 2, zero)); (void)apf.convertFromAPInt(GV.IntVal, @@ -555,11 +555,11 @@ } case Instruction::SIToFP: { GenericValue GV = getConstantValue(Op0); - if (CE->getType() == Type::getFloatTy(CE->getContext())) + if (CE->getType()->isFloatTy()) GV.FloatVal = float(GV.IntVal.signedRoundToDouble()); - else if (CE->getType() == Type::getDoubleTy(CE->getContext())) + else if (CE->getType()->isDoubleTy()) GV.DoubleVal = GV.IntVal.signedRoundToDouble(); - else if (CE->getType() == Type::getX86_FP80Ty(CE->getContext())) { + else if (CE->getType()->isX86_FP80Ty()) { const uint64_t zero[] = { 0, 0}; APFloat apf = APFloat(APInt(80, 2, zero)); (void)apf.convertFromAPInt(GV.IntVal, @@ -573,11 +573,11 @@ case Instruction::FPToSI: { GenericValue GV = getConstantValue(Op0); uint32_t BitWidth = cast(CE->getType())->getBitWidth(); - if (Op0->getType() == Type::getFloatTy(Op0->getContext())) + if (Op0->getType()->isFloatTy()) GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth); - else if (Op0->getType() == Type::getDoubleTy(Op0->getContext())) + else if (Op0->getType()->isDoubleTy()) GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth); - else if (Op0->getType() == Type::getX86_FP80Ty(Op0->getContext())) { + else if (Op0->getType()->isX86_FP80Ty()) { APFloat apf = APFloat(GV.IntVal); uint64_t v; bool ignored; @@ -610,9 +610,9 @@ default: llvm_unreachable("Invalid bitcast operand"); case Type::IntegerTyID: assert(DestTy->isFloatingPoint() && "invalid bitcast"); - if (DestTy == Type::getFloatTy(Op0->getContext())) + if (DestTy->isFloatTy()) GV.FloatVal = GV.IntVal.bitsToFloat(); - else if (DestTy == Type::getDoubleTy(DestTy->getContext())) + else if (DestTy->isDoubleTy()) GV.DoubleVal = GV.IntVal.bitsToDouble(); break; case Type::FloatTyID: Modified: llvm/trunk/lib/ExecutionEngine/Interpreter/Execution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/Interpreter/Execution.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/Interpreter/Execution.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/Interpreter/Execution.cpp Mon Oct 5 00:54:46 2009 @@ -365,7 +365,7 @@ } #define IMPLEMENT_UNORDERED(TY, X,Y) \ - if (TY == Type::getFloatTy(Ty->getContext())) { \ + if (TY->isFloatTy()) { \ if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \ Dest.IntVal = APInt(1,true); \ return Dest; \ @@ -421,7 +421,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, const Type *Ty) { GenericValue Dest; - if (Ty == Type::getFloatTy(Ty->getContext())) + if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && Src2.FloatVal == Src2.FloatVal)); else @@ -433,7 +433,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, const Type *Ty) { GenericValue Dest; - if (Ty == Type::getFloatTy(Ty->getContext())) + if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || Src2.FloatVal != Src2.FloatVal)); else @@ -970,8 +970,7 @@ GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcVal->getType() == Type::getDoubleTy(SrcVal->getContext()) && - DstTy == Type::getFloatTy(SrcVal->getContext()) && + assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() && "Invalid FPTrunc instruction"); Dest.FloatVal = (float) Src.DoubleVal; return Dest; @@ -980,8 +979,7 @@ GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcVal->getType() == Type::getFloatTy(SrcVal->getContext()) && - DstTy == Type::getDoubleTy(SrcVal->getContext()) && + assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() && "Invalid FPTrunc instruction"); Dest.DoubleVal = (double) Src.FloatVal; return Dest; @@ -1072,22 +1070,22 @@ assert(isa(SrcTy) && "Invalid BitCast"); Dest.PointerVal = Src.PointerVal; } else if (DstTy->isInteger()) { - if (SrcTy == Type::getFloatTy(SrcVal->getContext())) { + if (SrcTy->isFloatTy()) { Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT); Dest.IntVal.floatToBits(Src.FloatVal); - } else if (SrcTy == Type::getDoubleTy(SrcVal->getContext())) { + } else if (SrcTy->isDoubleTy()) { Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT); Dest.IntVal.doubleToBits(Src.DoubleVal); } else if (SrcTy->isInteger()) { Dest.IntVal = Src.IntVal; } else llvm_unreachable("Invalid BitCast"); - } else if (DstTy == Type::getFloatTy(SrcVal->getContext())) { + } else if (DstTy->isFloatTy()) { if (SrcTy->isInteger()) Dest.FloatVal = Src.IntVal.bitsToFloat(); else Dest.FloatVal = Src.FloatVal; - } else if (DstTy == Type::getDoubleTy(SrcVal->getContext())) { + } else if (DstTy->isDoubleTy()) { if (SrcTy->isInteger()) Dest.DoubleVal = Src.IntVal.bitsToDouble(); else Modified: llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp Mon Oct 5 00:54:46 2009 @@ -460,9 +460,9 @@ uint32_t Val = *(uint32_t*)CI->getValue().getRawData(); emitWordLE(Val); } else if (const ConstantFP *CFP = dyn_cast(CV)) { - if (CFP->getType() == Type::getFloatTy(CFP->getContext())) + if (CFP->getType()->isFloatTy()) emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + else if (CFP->getType()->isDoubleTy()) emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); else { llvm_unreachable("Unable to handle this constantpool entry!"); Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original) +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Oct 5 00:54:46 2009 @@ -1058,9 +1058,9 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) { // fpext from float to double. if (Subtarget->hasSSE2() && - I->getType() == Type::getDoubleTy(I->getContext())) { + I->getType()->isDoubleTy()) { Value *V = I->getOperand(0); - if (V->getType() == Type::getFloatTy(I->getContext())) { + if (V->getType()->isFloatTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR64RegisterClass); @@ -1075,9 +1075,9 @@ bool X86FastISel::X86SelectFPTrunc(Instruction *I) { if (Subtarget->hasSSE2()) { - if (I->getType() == Type::getFloatTy(I->getContext())) { + if (I->getType()->isFloatTy()) { Value *V = I->getOperand(0); - if (V->getType() == Type::getDoubleTy(I->getContext())) { + if (V->getType()->isDoubleTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR32RegisterClass); @@ -1244,7 +1244,7 @@ // Handle *simple* calls for now. const Type *RetTy = CS.getType(); EVT RetVT; - if (RetTy == Type::getVoidTy(I->getContext())) + if (RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(RetTy, RetVT, true)) return false; Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Mon Oct 5 00:54:46 2009 @@ -38,16 +38,18 @@ /// true for all i8 values obviously, but is also true for i32 0, i32 -1, /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated /// byte store (e.g. i16 0x1234), return null. -static Value *isBytewiseValue(Value *V, LLVMContext &Context) { +static Value *isBytewiseValue(Value *V) { + LLVMContext &Context = V->getContext(); + // All byte-wide stores are splatable, even of arbitrary variables. if (V->getType() == Type::getInt8Ty(Context)) return V; // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. if (ConstantFP *CFP = dyn_cast(V)) { - if (CFP->getType() == Type::getFloatTy(Context)) + if (CFP->getType()->isFloatTy()) V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context)); - if (CFP->getType() == Type::getDoubleTy(Context)) + if (CFP->getType()->isDoubleTy()) V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context)); // Don't handle long double formats, which have strange constraints. } @@ -349,7 +351,7 @@ // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. - Value *ByteVal = isBytewiseValue(SI->getOperand(0), Context); + Value *ByteVal = isBytewiseValue(SI->getOperand(0)); if (!ByteVal) return false; @@ -390,7 +392,7 @@ if (NextStore->isVolatile()) break; // Check to see if this stored value is of the same byte-splattable value. - if (ByteVal != isBytewiseValue(NextStore->getOperand(0), Context)) + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. Modified: llvm/trunk/lib/Transforms/Scalar/SCCP.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SCCP.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SCCP.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SCCP.cpp Mon Oct 5 00:54:46 2009 @@ -1184,7 +1184,7 @@ if (F == 0 || !F->hasLocalLinkage()) { CallOverdefined: // Void return and not tracking callee, just bail. - if (I->getType() == Type::getVoidTy(I->getContext())) return; + if (I->getType()->isVoidTy()) return; // Otherwise, if we have a single return value case, and if the function is // a declaration, maybe we can constant fold it. @@ -1354,7 +1354,7 @@ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { // Look for instructions which produce undef values. - if (I->getType() == Type::getVoidTy(F.getContext())) continue; + if (I->getType()->isVoidTy()) continue; LatticeVal &LV = getValueState(I); if (!LV.isUndefined()) continue; @@ -1593,8 +1593,7 @@ // for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Inst->getType() == Type::getVoidTy(F.getContext()) || - isa(Inst)) + if (Inst->getType()->isVoidTy() || isa(Inst)) continue; LatticeVal &IV = Values[Inst]; @@ -1769,7 +1768,7 @@ } else { for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Inst->getType() == Type::getVoidTy(M.getContext())) + if (Inst->getType()->isVoidTy()) continue; LatticeVal &IV = Values[Inst]; @@ -1846,7 +1845,7 @@ for (DenseMap::const_iterator I = RV.begin(), E = RV.end(); I != E; ++I) if (!I->second.isOverdefined() && - I->first->getReturnType() != Type::getVoidTy(M.getContext())) { + !I->first->getReturnType()->isVoidTy()) { Function *F = I->first; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast(BB->getTerminator())) Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Mon Oct 5 00:54:46 2009 @@ -1297,8 +1297,7 @@ VecTy = VInTy; return; } - } else if (In == Type::getFloatTy(Context) || - In == Type::getDoubleTy(Context) || + } else if (In->isFloatTy() || In->isDoubleTy() || (isa(In) && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { // If we're accessing something that could be an element of a vector, see Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Mon Oct 5 00:54:46 2009 @@ -225,12 +225,12 @@ IRBuilder<> &B, const AttrListPtr &Attrs) { char NameBuffer[20]; - if (Op->getType() != Type::getDoubleTy(*Context)) { + if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. unsigned NameLen = strlen(Name); assert(NameLen < sizeof(NameBuffer)-2); memcpy(NameBuffer, Name, NameLen); - if (Op->getType() == Type::getFloatTy(*Context)) + if (Op->getType()->isFloatTy()) NameBuffer[NameLen] = 'f'; // floorf else NameBuffer[NameLen] = 'l'; // floorl @@ -622,7 +622,8 @@ if (!TD) return 0; uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32. + if (Len == 0 || + FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32. return 0; return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. @@ -1082,15 +1083,15 @@ if (LdExpArg) { const char *Name; - if (Op->getType() == Type::getFloatTy(*Context)) + if (Op->getType()->isFloatTy()) Name = "ldexpf"; - else if (Op->getType() == Type::getDoubleTy(*Context)) + else if (Op->getType()->isDoubleTy()) Name = "ldexp"; else Name = "ldexpl"; Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); - if (Op->getType() != Type::getFloatTy(*Context)) + if (!Op->getType()->isFloatTy()) One = ConstantExpr::getFPExtend(One, Op->getType()); Module *M = Caller->getParent(); @@ -1112,13 +1113,13 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || FT->getReturnType() != Type::getDoubleTy(*Context) || - FT->getParamType(0) != Type::getDoubleTy(*Context)) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) return 0; // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast(CI->getOperand(1)); - if (Cast == 0 || Cast->getOperand(0)->getType() != Type::getFloatTy(*Context)) + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) return 0; // floor((double)floatval) -> (double)floorf(floatval) @@ -1260,7 +1261,7 @@ const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !isa(FT->getParamType(0)) || !(isa(FT->getReturnType()) || - FT->getReturnType() == Type::getVoidTy(*Context))) + FT->getReturnType()->isVoidTy())) return 0; // Check for a fixed format string. Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantFold.cpp (original) +++ llvm/trunk/lib/VMCore/ConstantFold.cpp Mon Oct 5 00:54:46 2009 @@ -192,7 +192,7 @@ return UndefValue::get(DestTy); } // No compile-time operations on this type yet. - if (V->getType() == Type::getPPC_FP128Ty(Context) || DestTy == Type::getPPC_FP128Ty(Context)) + if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty()) return 0; // If the cast operand is a constant expression, there's a few things we can @@ -241,10 +241,10 @@ if (ConstantFP *FPC = dyn_cast(V)) { bool ignored; APFloat Val = FPC->getValueAPF(); - Val.convert(DestTy == Type::getFloatTy(Context) ? APFloat::IEEEsingle : - DestTy == Type::getDoubleTy(Context) ? APFloat::IEEEdouble : - DestTy == Type::getX86_FP80Ty(Context) ? APFloat::x87DoubleExtended : - DestTy == Type::getFP128Ty(Context) ? APFloat::IEEEquad : + Val.convert(DestTy->isFloatTy() ? APFloat::IEEEsingle : + DestTy->isDoubleTy() ? APFloat::IEEEdouble : + DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended : + DestTy->isFP128Ty() ? APFloat::IEEEquad : APFloat::Bogus, APFloat::rmNearestTiesToEven, &ignored); return ConstantFP::get(Context, Val); @@ -584,7 +584,7 @@ unsigned Opcode, Constant *C1, Constant *C2) { // No compile-time operations on this type yet. - if (C1->getType() == Type::getPPC_FP128Ty(Context)) + if (C1->getType()->isPPC_FP128Ty()) return 0; // Handle UndefValue up front. @@ -1110,7 +1110,7 @@ "Cannot compare values of different types!"); // No compile-time operations on this type yet. - if (V1->getType() == Type::getPPC_FP128Ty(Context)) + if (V1->getType()->isPPC_FP128Ty()) return FCmpInst::BAD_FCMP_PREDICATE; // Handle degenerate case quickly @@ -1403,7 +1403,7 @@ return UndefValue::get(ResultTy); // No compile-time operations on this type yet. - if (C1->getType() == Type::getPPC_FP128Ty(Context)) + if (C1->getType()->isPPC_FP128Ty()) return 0; // icmp eq/ne(null,GV) -> false/true @@ -1837,7 +1837,8 @@ // This happens with pointers to member functions in C++. if (CE->getOpcode() == Instruction::IntToPtr && NumIdx == 1 && isa(CE->getOperand(0)) && isa(Idxs[0]) && - cast(CE->getType())->getElementType() == Type::getInt8Ty(Context)) { + cast(CE->getType())->getElementType() == + Type::getInt8Ty(Context)) { Constant *Base = CE->getOperand(0); Constant *Offset = Idxs[0]; Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Oct 5 00:54:46 2009 @@ -327,16 +327,16 @@ //===----------------------------------------------------------------------===// static const fltSemantics *TypeToFloatSemantics(const Type *Ty) { - if (Ty == Type::getFloatTy(Ty->getContext())) + if (Ty->isFloatTy()) return &APFloat::IEEEsingle; - if (Ty == Type::getDoubleTy(Ty->getContext())) + if (Ty->isDoubleTy()) return &APFloat::IEEEdouble; - if (Ty == Type::getX86_FP80Ty(Ty->getContext())) + if (Ty->isX86_FP80Ty()) return &APFloat::x87DoubleExtended; - else if (Ty == Type::getFP128Ty(Ty->getContext())) + else if (Ty->isFP128Ty()) return &APFloat::IEEEquad; - assert(Ty == Type::getPPC_FP128Ty(Ty->getContext()) && "Unknown FP format"); + assert(Ty->isPPC_FP128Ty() && "Unknown FP format"); return &APFloat::PPCDoubleDouble; } Modified: llvm/trunk/lib/VMCore/Verifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Verifier.cpp?rev=83297&r1=83296&r2=83297&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Verifier.cpp (original) +++ llvm/trunk/lib/VMCore/Verifier.cpp Mon Oct 5 00:54:46 2009 @@ -600,12 +600,11 @@ "# formal arguments must match # of arguments for function type!", &F, FT); Assert1(F.getReturnType()->isFirstClassType() || - F.getReturnType()->getTypeID() == Type::VoidTyID || + F.getReturnType()->isVoidTy() || isa(F.getReturnType()), "Functions cannot return aggregate values!", &F); - Assert1(!F.hasStructRetAttr() || - F.getReturnType()->getTypeID() == Type::VoidTyID, + Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(), "Invalid struct return type!", &F); const AttrListPtr &Attrs = F.getAttributes(); @@ -643,7 +642,7 @@ Assert1(I->getType()->isFirstClassType(), "Function arguments must have first-class types!", I); if (!isLLVMdotName) - Assert2(I->getType() != Type::getMetadataTy(F.getContext()), + Assert2(!I->getType()->isMetadataTy(), "Function takes metadata but isn't an intrinsic", I, &F); } @@ -738,7 +737,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) { Function *F = RI.getParent()->getParent(); unsigned N = RI.getNumOperands(); - if (F->getReturnType()->getTypeID() == Type::VoidTyID) + if (F->getReturnType()->isVoidTy()) Assert2(N == 0, "Found return instr that returns non-void in Function of void " "return type!", &RI, F->getReturnType()); @@ -1103,7 +1102,7 @@ CS.getCalledFunction()->getName().substr(0, 5) != "llvm.") { for (FunctionType::param_iterator PI = FTy->param_begin(), PE = FTy->param_end(); PI != PE; ++PI) - Assert1(PI->get() != Type::getMetadataTy(I->getContext()), + Assert1(!PI->get()->isMetadataTy(), "Function has metadata parameter but isn't an intrinsic", I); } @@ -1329,18 +1328,18 @@ Assert1(BB->getTerminator() == &I, "Terminator not at end of block!", &I); // Check that void typed values don't have names - Assert1(I.getType() != Type::getVoidTy(I.getContext()) || !I.hasName(), + Assert1(!I.getType()->isVoidTy() || !I.hasName(), "Instruction has a name, but provides a void value!", &I); // Check that the return value of the instruction is either void or a legal // value type. - Assert1(I.getType()->getTypeID() == Type::VoidTyID || + Assert1(I.getType()->isVoidTy() || I.getType()->isFirstClassType(), "Instruction returns a non-scalar type!", &I); // Check that the instruction doesn't produce metadata. Calls are already // checked against the callee type. - Assert1(I.getType()->getTypeID() != Type::MetadataTyID || + Assert1(!I.getType()->isMetadataTy() || isa(I) || isa(I), "Invalid use of metadata!", &I); From sabre at nondot.org Mon Oct 5 01:16:42 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 06:16:42 -0000 Subject: [llvm-commits] [test-suite] r83298 - /test-suite/trunk/SingleSource/Benchmarks/Misc/oourafft.c Message-ID: <200910050616.n956GgTG032647@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 01:16:42 2009 New Revision: 83298 URL: http://llvm.org/viewvc/llvm-project?rev=83298&view=rev Log: improve portability of this test to work with FreeBSD (PR5133), patch by Pawel Worach! Modified: test-suite/trunk/SingleSource/Benchmarks/Misc/oourafft.c Modified: test-suite/trunk/SingleSource/Benchmarks/Misc/oourafft.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/Misc/oourafft.c?rev=83298&r1=83297&r2=83298&view=diff ============================================================================== --- test-suite/trunk/SingleSource/Benchmarks/Misc/oourafft.c (original) +++ test-suite/trunk/SingleSource/Benchmarks/Misc/oourafft.c Mon Oct 5 01:16:42 2009 @@ -2,7 +2,7 @@ #include #include #include -#ifndef __APPLE__ // memalign +#if !defined(__APPLE__) && !defined(__FreeBSD__) // memalign #include #endif @@ -37,7 +37,7 @@ t_overhead = t_end - t_start; /* Prepare aux data */ -#ifndef __APPLE__ /* Darwin always 16-byte aligns malloc data */ +#if !defined(__APPLE__) && !defined(__FreeBSD__) /* Darwin always 16-byte aligns malloc data */ ip = memalign(16, sqrt(N)*sizeof(int)); w = memalign(16, 2*N*5/4*sizeof(double)); #else @@ -47,7 +47,7 @@ makewt(N >> 1, ip, w); /* Allocate buffers */ -#ifndef __APPLE__ /* Darwin always 16-byte aligns malloc data */ +#if !defined(__APPLE__) && !defined(__FreeBSD__) /* Darwin always 16-byte aligns malloc data */ ref = memalign(16, 2*N*sizeof(double)); cmp = memalign(16, 2*N*sizeof(double)); src = memalign(16, 2*N*sizeof(double)); From edwintorok at gmail.com Mon Oct 5 01:45:06 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Mon, 05 Oct 2009 09:45:06 +0300 Subject: [llvm-commits] [llvm] r83287 - /llvm/trunk/docs/ReleaseNotes-2.6.html In-Reply-To: <200910050212.n952Cf0l002259@zion.cs.uiuc.edu> References: <200910050212.n952Cf0l002259@zion.cs.uiuc.edu> Message-ID: <4AC995F2.2050101@gmail.com> On 2009-10-05 05:12, Chris Lattner wrote: > Author: lattner > Date: Sun Oct 4 21:12:39 2009 > New Revision: 83287 > > URL: http://llvm.org/viewvc/llvm-project?rev=83287&view=rev > Log: > add some completely unformated and probably incoherent notes about things > I saw while reading all the commits between the 2.5 and 2.6 release branches. > > Modified: > llvm/trunk/docs/ReleaseNotes-2.6.html > > > + llvm/Analysis/PointerTracking.h ==> dead?? > Hi Chris, I didn't abandon PointerTracking.{h,cpp}, I intend to submit more code, but I didn't make it in time for 2.6. If you want, you can drop it from the 2.6 release branch (as nothing uses it there), but please keep it in trunk. Best regards, --Edwin From sabre at nondot.org Mon Oct 5 02:02:04 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 5 Oct 2009 00:02:04 -0700 Subject: [llvm-commits] [llvm] r83287 - /llvm/trunk/docs/ReleaseNotes-2.6.html In-Reply-To: <4AC995F2.2050101@gmail.com> References: <200910050212.n952Cf0l002259@zion.cs.uiuc.edu> <4AC995F2.2050101@gmail.com> Message-ID: <08F996F0-C2AA-4767-862C-A9C6225FF97B@nondot.org> On Oct 4, 2009, at 11:45 PM, T?r?k Edwin wrote: > On 2009-10-05 05:12, Chris Lattner wrote: >> Author: lattner >> Date: Sun Oct 4 21:12:39 2009 >> New Revision: 83287 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83287&view=rev >> Log: >> add some completely unformated and probably incoherent notes about >> things >> I saw while reading all the commits between the 2.5 and 2.6 release >> branches. >> >> Modified: >> llvm/trunk/docs/ReleaseNotes-2.6.html >> >> >> + llvm/Analysis/PointerTracking.h ==> dead?? >> > > Hi Chris, > > I didn't abandon PointerTracking.{h,cpp}, I intend to submit more > code, > but I didn't make it in time for 2.6. > If you want, you can drop it from the 2.6 release branch (as nothing > uses it there), but please keep it in trunk. No worries, those were just notes to myself to investigate more. -Chris From sabre at nondot.org Mon Oct 5 02:07:29 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 05 Oct 2009 07:07:29 -0000 Subject: [llvm-commits] [llvm] r83299 - /llvm/trunk/docs/ReleaseNotes-2.6.html Message-ID: <200910050707.n9577TAd006542@zion.cs.uiuc.edu> Author: lattner Date: Mon Oct 5 02:07:29 2009 New Revision: 83299 URL: http://llvm.org/viewvc/llvm-project?rev=83299&view=rev Log: callgraph changes came after the 2.6 branch. Modified: llvm/trunk/docs/ReleaseNotes-2.6.html Modified: llvm/trunk/docs/ReleaseNotes-2.6.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/ReleaseNotes-2.6.html?rev=83299&r1=83298&r2=83299&view=diff ============================================================================== --- llvm/trunk/docs/ReleaseNotes-2.6.html (original) +++ llvm/trunk/docs/ReleaseNotes-2.6.html Mon Oct 5 02:07:29 2009 @@ -71,7 +71,6 @@ Inbounds for GEP SRoA improvements for vector unions, memset, arbitrary weird bitfield accesses etc. It now produces "strange" sized integers. pre-alloc splitter?? - Callgraph + SCCPassMgr?? X86: Support for softfloat modes, typically used by OS kernels. MC: MCSection, MCAsmInfo From baldrick at free.fr Mon Oct 5 05:31:05 2009 From: baldrick at free.fr (Duncan Sands) Date: Mon, 05 Oct 2009 12:31:05 +0200 Subject: [llvm-commits] [llvm] r83284 - /llvm/trunk/Makefile.rules In-Reply-To: <200910041754.n94HsbfJ005192@zion.cs.uiuc.edu> References: <200910041754.n94HsbfJ005192@zion.cs.uiuc.edu> Message-ID: <4AC9CAE9.1030103@free.fr> Hi Jakob > - $(Echo) "Compiling $*.cpp for $(BuildMode) build " $(PIC_FLAG) > + $(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG) if PIC_FLAG is not empty then doesn't this result in a missing space between "build" and the contents of PIC_FLAG? Ciao, Duncan. From baldrick at free.fr Mon Oct 5 06:10:47 2009 From: baldrick at free.fr (Duncan Sands) Date: Mon, 05 Oct 2009 11:10:47 -0000 Subject: [llvm-commits] [gcc-plugin] r83300 - /gcc-plugin/trunk/llvm-backend.cpp Message-ID: <200910051110.n95BAmsC018018@zion.cs.uiuc.edu> Author: baldrick Date: Mon Oct 5 06:10:46 2009 New Revision: 83300 URL: http://llvm.org/viewvc/llvm-project?rev=83300&view=rev Log: During the emit_variables pass, set TREE_ASM_WRITTEN on all global variables, including internal ones. Previously this was set for internal globals during function emission, but that's too late: GCC would already have output them. Since GCC output is thrown away this was harmless, but wasted time. Also, add an option for seeing what GCC is outputting exactly. Modified: gcc-plugin/trunk/llvm-backend.cpp Modified: gcc-plugin/trunk/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-backend.cpp?rev=83300&r1=83299&r2=83300&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-backend.cpp (original) +++ gcc-plugin/trunk/llvm-backend.cpp Mon Oct 5 06:10:46 2009 @@ -120,6 +120,7 @@ static bool DisableLLVMOptimizations; static bool EnableGCCOptimizations; static bool EmitIR; +static bool SaveGCCOutput; std::vector > StaticCtors, StaticDtors; SmallSetVector AttributeUsedGlobals; @@ -1620,19 +1621,28 @@ if (!main_input_filename && !asm_file_name) { llvm_asm_file_name = "-"; } else if (!asm_file_name) { - int len = strlen (dump_base_name); - char *dumpname = XNEWVEC (char, len + 6); + int len = strlen(dump_base_name); + char *dumpname = XNEWVEC(char, len + 6); - memcpy (dumpname, dump_base_name, len + 1); - strip_off_ending (dumpname, len); - strcat (dumpname, ".s"); + memcpy(dumpname, dump_base_name, len + 1); + strip_off_ending(dumpname, len); + strcat(dumpname, ".s"); llvm_asm_file_name = dumpname; } else { llvm_asm_file_name = asm_file_name; } - // Redirect any GCC output to /dev/null. - asm_file_name = HOST_BIT_BUCKET; + if (!SaveGCCOutput) { + // Redirect any GCC output to /dev/null. + asm_file_name = HOST_BIT_BUCKET; + } else { + // Save GCC output to a special file. Good for seeing how much pointless + // output gcc is producing. + int len = strlen(llvm_asm_file_name); + char *name = XNEWVEC(char, len + 5); + memcpy(name, llvm_asm_file_name, len + 1); + asm_file_name = strcat(name, ".gcc"); + } } @@ -1676,10 +1686,15 @@ // this compilation unit or not. Global variables that are not externally // visible will be output when their user is, or discarded if unused. struct varpool_node *vnode; - FOR_EACH_STATIC_VARIABLE (vnode) + FOR_EACH_STATIC_VARIABLE (vnode) { if (TREE_PUBLIC(vnode->decl)) + // An externally visible global variable - output it. emit_global_to_llvm(vnode->decl); + // Mark all variables as written so gcc doesn't waste time outputting them. + TREE_ASM_WRITTEN(vnode->decl) = 1; + } + return 0; } @@ -2033,6 +2048,7 @@ { "disable-llvm-optzns", &DisableLLVMOptimizations }, { "enable-gcc-optzns", &EnableGCCOptimizations }, { "emit-ir", &EmitIR }, + { "save-gcc-output", &SaveGCCOutput }, { NULL, NULL } // Terminator. }; From baldrick at free.fr Mon Oct 5 08:05:18 2009 From: baldrick at free.fr (Duncan Sands) Date: Mon, 05 Oct 2009 13:05:18 -0000 Subject: [llvm-commits] [gcc-plugin] r83301 - in /gcc-plugin/trunk: llvm-abi.h llvm-backend.cpp llvm-convert.cpp llvm-types.cpp Message-ID: <200910051305.n95D5JB3032277@zion.cs.uiuc.edu> Author: baldrick Date: Mon Oct 5 08:05:17 2009 New Revision: 83301 URL: http://llvm.org/viewvc/llvm-project?rev=83301&view=rev Log: Use new predicates to test whether a type is void, float etc. Modified: gcc-plugin/trunk/llvm-abi.h gcc-plugin/trunk/llvm-backend.cpp gcc-plugin/trunk/llvm-convert.cpp gcc-plugin/trunk/llvm-types.cpp Modified: gcc-plugin/trunk/llvm-abi.h URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-abi.h?rev=83301&r1=83300&r2=83301&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-abi.h (original) +++ gcc-plugin/trunk/llvm-abi.h Mon Oct 5 08:05:17 2009 @@ -391,7 +391,7 @@ C.HandleScalarShadowResult(PointerType::getUnqual(Ty), false); else C.HandleScalarResult(Ty); - } else if (Ty->isSingleValueType() || Ty == Type::getVoidTy(getGlobalContext())) { + } else if (Ty->isSingleValueType() || Ty->isVoidTy()) { // Return scalar values normally. C.HandleScalarResult(Ty); } else if (doNotUseShadowReturn(type, fn)) { @@ -437,7 +437,7 @@ // Figure out if this field is zero bits wide, e.g. {} or [0 x int]. Do // not include variable sized fields here. std::vector Elts; - if (Ty == Type::getVoidTy(getGlobalContext())) { + if (Ty->isVoidTy()) { // Handle void explicitly as an opaque type. const Type *OpTy = OpaqueType::get(getGlobalContext()); C.HandleScalarArgument(OpTy, type); @@ -665,7 +665,7 @@ const Type* wordType = getTargetData().getPointerSize() == 4 ? Type::getInt32Ty(getGlobalContext()) : Type::getInt64Ty(getGlobalContext()); for (unsigned i=0, e=Elts.size(); i!=e; ++i) - if (OrigElts[i]==Type::getVoidTy(getGlobalContext())) + if (OrigElts[i]->isVoidTy()) Elts[i] = wordType; const StructType *STy = StructType::get(getGlobalContext(), Elts, false); @@ -687,7 +687,7 @@ } } for (unsigned i = 0, e = Elts.size(); i != e; ++i) { - if (OrigElts[i] != Type::getVoidTy(getGlobalContext())) { + if (!OrigElts[i]->isVoidTy()) { C.EnterField(i, STy); unsigned RealSize = 0; if (LastEltSizeDiff && i == (e - 1)) @@ -747,7 +747,7 @@ C.HandleScalarShadowResult(PointerType::getUnqual(Ty), false); else C.HandleScalarResult(Ty); - } else if (Ty->isSingleValueType() || Ty == Type::getVoidTy(getGlobalContext())) { + } else if (Ty->isSingleValueType() || Ty->isVoidTy()) { // Return scalar values normally. C.HandleScalarResult(Ty); } else if (doNotUseShadowReturn(type, fn)) { @@ -1110,7 +1110,7 @@ const Type* wordType = getTargetData().getPointerSize() == 4 ? Type::getInt32Ty(getGlobalContext()) : Type::getInt64Ty(getGlobalContext()); for (unsigned i=0, e=Elts.size(); i!=e; ++i) - if (OrigElts[i]==Type::getVoidTy(getGlobalContext())) + if (OrigElts[i]->isVoidTy()) Elts[i] = wordType; const StructType *STy = StructType::get(getGlobalContext(), Elts, false); @@ -1132,7 +1132,7 @@ } } for (unsigned i = 0, e = Elts.size(); i != e; ++i) { - if (OrigElts[i] != Type::getVoidTy(getGlobalContext())) { + if (!OrigElts[i]->isVoidTy()) { C.EnterField(i, STy); unsigned RealSize = 0; if (LastEltSizeDiff && i == (e - 1)) Modified: gcc-plugin/trunk/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-backend.cpp?rev=83301&r1=83300&r2=83301&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-backend.cpp (original) +++ gcc-plugin/trunk/llvm-backend.cpp Mon Oct 5 08:05:17 2009 @@ -1409,7 +1409,7 @@ // If we have "extern void foo", make the global have type {} instead of // type void. - if (Ty == Type::getVoidTy(Context)) + if (Ty->isVoidTy()) Ty = StructType::get(Context); if (Name.empty()) { // Global has no name. Modified: gcc-plugin/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-convert.cpp?rev=83301&r1=83300&r2=83301&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-convert.cpp (original) +++ gcc-plugin/trunk/llvm-convert.cpp Mon Oct 5 08:05:17 2009 @@ -380,7 +380,7 @@ // If this is GCC being sloppy about pointer types, insert a bitcast. // See PR1083 for an example. ArgVal = Builder.CreateBitCast(ArgVal, LLVMTy); - } else if (ArgVal->getType() == Type::getDoubleTy(Context)) { + } else if (ArgVal->getType()->isDoubleTy()) { // If this is a K&R float parameter, it got promoted to double. Insert // the truncation to float now. ArgVal = Builder.CreateFPTrunc(ArgVal, LLVMTy, @@ -796,7 +796,7 @@ SmallVector RetVals; // If the function returns a value, get it into a register and return it now. - if (Fn->getReturnType() != Type::getVoidTy(Context)) { + if (!Fn->getReturnType()->isVoidTy()) { if (!AGGREGATE_TYPE_P(TREE_TYPE(DECL_RESULT(FnDecl)))) { // If the DECL_RESULT is a scalar type, just load out the return value // and return it. @@ -2920,7 +2920,7 @@ if (Client.isShadowReturn()) return Client.EmitShadowResult(gimple_call_return_type(stmt), DestLoc); - if (Call->getType() == Type::getVoidTy(Context)) + if (Call->getType()->isVoidTy()) return 0; if (Client.isAggrReturn()) { @@ -5014,7 +5014,7 @@ // FIXME: HACK: Just ignore these. { const Type *Ty = ConvertType(gimple_call_return_type(stmt)); - if (Ty != Type::getVoidTy(Context)) + if (!Ty->isVoidTy()) Result = Constant::getNullValue(Ty); return true; } @@ -6147,7 +6147,7 @@ const Type *Ty = ConvertType(TREE_TYPE(exp)); // If we have "extern void foo", make the global have type {} instead of // type void. - if (Ty == Type::getVoidTy(Context)) Ty = StructType::get(Context); + if (Ty->isVoidTy()) Ty = StructType::get(Context); const PointerType *PTy = PointerType::getUnqual(Ty); unsigned Alignment = Ty->isSized() ? TD.getABITypeAlignment(Ty) : 1; if (DECL_ALIGN(exp)) { @@ -6987,7 +6987,7 @@ int UArr[2]; double V; }; - if (Ty==Type::getFloatTy(Context) || Ty==Type::getDoubleTy(Context)) { + if (Ty->isFloatTy() || Ty->isDoubleTy()) { REAL_VALUE_TO_TARGET_DOUBLE(TREE_REAL_CST(exp), RealArr); // Here's how this works: @@ -7013,9 +7013,8 @@ std::swap(UArr[0], UArr[1]); return - ConstantFP::get(Context, Ty==Type::getFloatTy(Context) ? - APFloat((float)V) : APFloat(V)); - } else if (Ty==Type::getX86_FP80Ty(Context)) { + ConstantFP::get(Context, Ty->isFloatTy() ? APFloat((float)V) : APFloat(V)); + } else if (Ty->isX86_FP80Ty()) { long RealArr[4]; uint64_t UArr[2]; REAL_VALUE_TO_TARGET_LONG_DOUBLE(TREE_REAL_CST(exp), RealArr); @@ -7023,7 +7022,7 @@ ((uint64_t)((uint32_t)RealArr[1]) << 32); UArr[1] = (uint16_t)RealArr[2]; return ConstantFP::get(Context, APFloat(APInt(80, 2, UArr))); - } else if (Ty==Type::getPPC_FP128Ty(Context)) { + } else if (Ty->isPPC_FP128Ty()) { long RealArr[4]; uint64_t UArr[2]; REAL_VALUE_TO_TARGET_LONG_DOUBLE(TREE_REAL_CST(exp), RealArr); @@ -7887,7 +7886,7 @@ // itself (allowed in GCC but not in LLVM) then the global is changed to have // the type of the initializer. Correct for this now. const Type *Ty = ConvertType(TREE_TYPE(exp)); - if (Ty == Type::getVoidTy(Context)) Ty = Type::getInt8Ty(Context); // void* -> i8*. + if (Ty->isVoidTy()) Ty = Type::getInt8Ty(Context); // void* -> i8*. return TheFolder->CreateBitCast(Val, Ty->getPointerTo()); } Modified: gcc-plugin/trunk/llvm-types.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-types.cpp?rev=83301&r1=83300&r2=83301&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-types.cpp (original) +++ gcc-plugin/trunk/llvm-types.cpp Mon Oct 5 08:05:17 2009 @@ -745,7 +745,7 @@ // Restore ConvertingStruct for the caller. ConvertingStruct = false; - if (Actual == Type::getVoidTy(Context)) + if (Actual->isVoidTy()) Actual = Type::getInt8Ty(Context); // void* -> sbyte* // Update the type, potentially updating TYPE_LLVM(type). @@ -780,7 +780,7 @@ Ty = ConvertType(TREE_TYPE(type)); } - if (Ty == Type::getVoidTy(Context)) + if (Ty->isVoidTy()) Ty = Type::getInt8Ty(Context); // void* -> sbyte* return TypeDB.setType(type, PointerType::getUnqual(Ty)); } From stoklund at 2pi.dk Mon Oct 5 09:42:04 2009 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 5 Oct 2009 16:42:04 +0200 Subject: [llvm-commits] [llvm] r83284 - /llvm/trunk/Makefile.rules In-Reply-To: <4AC9CAE9.1030103@free.fr> References: <200910041754.n94HsbfJ005192@zion.cs.uiuc.edu> <4AC9CAE9.1030103@free.fr> Message-ID: <52E51955-40CA-41FD-8C8D-6DFA0E71DEFE@2pi.dk> On 05/10/2009, at 12.31, Duncan Sands wrote: > Hi Jakob > >> - $(Echo) "Compiling $*.cpp for $(BuildMode) build " $(PIC_FLAG) >> + $(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG) > > if PIC_FLAG is not empty then doesn't this result in a missing space > between "build" and the contents of PIC_FLAG? No, I tested that. The echo command will insert spaces between arguments: $ echo hello world hello world The shell will conveniently ignore empty arguments: $ echo hello $UNDEF | hexdump -C 00000000 68 65 6c 6c 6f 0a |hello.| So there are no trailing spaces. From baldrick at free.fr Mon Oct 5 09:56:39 2009 From: baldrick at free.fr (Duncan Sands) Date: Mon, 05 Oct 2009 16:56:39 +0200 Subject: [llvm-commits] [llvm] r83284 - /llvm/trunk/Makefile.rules In-Reply-To: <52E51955-40CA-41FD-8C8D-6DFA0E71DEFE@2pi.dk> References: <200910041754.n94HsbfJ005192@zion.cs.uiuc.edu> <4AC9CAE9.1030103@free.fr> <52E51955-40CA-41FD-8C8D-6DFA0E71DEFE@2pi.dk> Message-ID: <4ACA0927.4040603@free.fr> Hi Jakob, >> if PIC_FLAG is not empty then doesn't this result in a missing space >> between "build" and the contents of PIC_FLAG? > > No, I tested that. > > The echo command will insert spaces between arguments: I realised that just after hitting the send button, sorry for the noise. Jetlag and no coffee - a fatal combination! Ciao, Duncan. From gohman at apple.com Mon Oct 5 10:23:17 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Oct 2009 15:23:17 -0000 Subject: [llvm-commits] [llvm] r83306 - /llvm/trunk/include/llvm/CodeGen/FastISel.h Message-ID: <200910051523.n95FNHdq017679@zion.cs.uiuc.edu> Author: djg Date: Mon Oct 5 10:23:17 2009 New Revision: 83306 URL: http://llvm.org/viewvc/llvm-project?rev=83306&view=rev Log: Fix a name in a comment. Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FastISel.h?rev=83306&r1=83305&r2=83306&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/FastISel.h (original) +++ llvm/trunk/include/llvm/CodeGen/FastISel.h Mon Oct 5 10:23:17 2009 @@ -91,7 +91,7 @@ /// bool SelectInstruction(Instruction *I); - /// SelectInstruction - Do "fast" instruction selection for the given + /// SelectOperator - Do "fast" instruction selection for the given /// LLVM IR operator (Instruction or ConstantExpr), and append /// generated machine instructions to the current block. Return true /// if selection was successful. From gohman at apple.com Mon Oct 5 10:42:09 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Oct 2009 15:42:09 -0000 Subject: [llvm-commits] [llvm] r83307 - /llvm/trunk/lib/Target/X86/X86RegisterInfo.td Message-ID: <200910051542.n95Fg9CZ020242@zion.cs.uiuc.edu> Author: djg Date: Mon Oct 5 10:42:08 2009 New Revision: 83307 URL: http://llvm.org/viewvc/llvm-project?rev=83307&view=rev Log: Add RIP to GR64_NOREX. This fixed a MachineVerifier error when RIP is used in an operand which requires GR64_NOREX. Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=83307&r1=83306&r2=83307&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Mon Oct 5 10:42:08 2009 @@ -555,7 +555,7 @@ } // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> { + [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; @@ -567,11 +567,11 @@ const TargetRegisterInfo *RI = TM.getRegisterInfo(); // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - // If so, don't allocate RSP or RBP. - return end() - 2; + // If so, don't allocate RIP, RSP or RBP. + return end() - 3; else - // If not, just don't allocate RSP. - return end() - 1; + // If not, just don't allocate RIP or RSP. + return end() - 2; } }]; } From gohman at apple.com Mon Oct 5 10:52:09 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Oct 2009 15:52:09 -0000 Subject: [llvm-commits] [llvm] r83308 - in /llvm/trunk/lib/Target: Mips/MipsInstrInfo.h X86/X86InstrInfo.h Message-ID: <200910051552.n95Fq9Jt021611@zion.cs.uiuc.edu> Author: djg Date: Mon Oct 5 10:52:08 2009 New Revision: 83308 URL: http://llvm.org/viewvc/llvm-project?rev=83308&view=rev Log: Remove explicit enum integer values. They don't appear to be needed, and they make it less convenient to add new entries. Modified: llvm/trunk/lib/Target/Mips/MipsInstrInfo.h llvm/trunk/lib/Target/X86/X86InstrInfo.h Modified: llvm/trunk/lib/Target/Mips/MipsInstrInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsInstrInfo.h?rev=83308&r1=83307&r2=83308&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsInstrInfo.h (original) +++ llvm/trunk/lib/Target/Mips/MipsInstrInfo.h Mon Oct 5 10:52:08 2009 @@ -139,25 +139,25 @@ //===------------------------------------------------------------------===// // Mips Specific MachineOperand flags. - MO_NO_FLAG = 0, + MO_NO_FLAG, /// MO_GOT - Represents the offset into the global offset table at which /// the address the relocation entry symbol resides during execution. - MO_GOT = 1, + MO_GOT, /// MO_GOT_CALL - Represents the offset into the global offset table at /// which the address of a call site relocation entry symbol resides /// during execution. This is different from the above since this flag /// can only be present in call instructions. - MO_GOT_CALL = 2, + MO_GOT_CALL, /// MO_GPREL - Represents the offset from the current gp value to be used /// for the relocatable object file being produced. - MO_GPREL = 3, + MO_GPREL, /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol /// address. - MO_ABS_HILO = 4 + MO_ABS_HILO }; } Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=83308&r1=83307&r2=83308&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Mon Oct 5 10:52:08 2009 @@ -74,31 +74,31 @@ //===------------------------------------------------------------------===// // X86 Specific MachineOperand flags. - MO_NO_FLAG = 0, + MO_NO_FLAG, /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a /// relocation of: /// SYMBOL_LABEL + [. - PICBASELABEL] - MO_GOT_ABSOLUTE_ADDRESS = 1, + MO_GOT_ABSOLUTE_ADDRESS, /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the /// immediate should get the value of the symbol minus the PIC base label: /// SYMBOL_LABEL - PICBASELABEL - MO_PIC_BASE_OFFSET = 2, + MO_PIC_BASE_OFFSET, /// MO_GOT - On a symbol operand this indicates that the immediate is the /// offset to the GOT entry for the symbol name from the base of the GOT. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOT - MO_GOT = 3, + MO_GOT, /// MO_GOTOFF - On a symbol operand this indicates that the immediate is /// the offset to the location of the symbol name from the base of the GOT. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTOFF - MO_GOTOFF = 4, + MO_GOTOFF, /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is /// offset to the GOT entry for the symbol name from the current code @@ -106,75 +106,75 @@ /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTPCREL - MO_GOTPCREL = 5, + MO_GOTPCREL, /// MO_PLT - On a symbol operand this indicates that the immediate is /// offset to the PLT entry of symbol name from the current code location. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @PLT - MO_PLT = 6, + MO_PLT, /// MO_TLSGD - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSGD - MO_TLSGD = 7, + MO_TLSGD, /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @GOTTPOFF - MO_GOTTPOFF = 8, + MO_GOTTPOFF, /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @INDNTPOFF - MO_INDNTPOFF = 9, + MO_INDNTPOFF, /// MO_TPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TPOFF - MO_TPOFF = 10, + MO_TPOFF, /// MO_NTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @NTPOFF - MO_NTPOFF = 11, + MO_NTPOFF, /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the /// reference is actually to the "__imp_FOO" symbol. This is used for /// dllimport linkage on windows. - MO_DLLIMPORT = 12, + MO_DLLIMPORT, /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$stub" symbol. This is used for calls /// and jumps to external functions on Tiger and before. - MO_DARWIN_STUB = 13, + MO_DARWIN_STUB, /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. - MO_DARWIN_NONLAZY = 14, + MO_DARWIN_NONLAZY, /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. - MO_DARWIN_NONLAZY_PIC_BASE = 15, + MO_DARWIN_NONLAZY_PIC_BASE, /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", /// which is a PIC-base-relative reference to a hidden dyld lazy pointer /// stub. - MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE = 16 + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE }; } From gohman at apple.com Mon Oct 5 11:31:55 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Oct 2009 16:31:55 -0000 Subject: [llvm-commits] [llvm] r83310 - /llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200910051631.n95GVtmP026939@zion.cs.uiuc.edu> Author: djg Date: Mon Oct 5 11:31:55 2009 New Revision: 83310 URL: http://llvm.org/viewvc/llvm-project?rev=83310&view=rev Log: Use Use::operator= instead of Use::set, for consistency. Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=83310&r1=83309&r2=83310&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Mon Oct 5 11:31:55 2009 @@ -774,7 +774,7 @@ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth); if (NewVal == 0) return false; - U.set(NewVal); + U = NewVal; return true; } @@ -12819,7 +12819,7 @@ if (Constant *NewC = ConstantFoldConstantExpression(CE, F.getContext(), TD)) if (NewC != CE) { - i->set(NewC); + *i = NewC; MadeIRChange = true; } } From gohman at apple.com Mon Oct 5 11:36:27 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 05 Oct 2009 16:36:27 -0000 Subject: [llvm-commits] [llvm] r83311 - in /llvm/trunk: include/llvm/Analysis/ConstantFolding.h lib/Analysis/ConstantFolding.cpp lib/Transforms/IPO/GlobalOpt.cpp lib/Transforms/Scalar/InstructionCombining.cpp lib/Transforms/Scalar/SCCP.cpp lib/Transforms/Utils/CloneFunction.cpp Message-ID: <200910051636.n95GaRcG027532@zion.cs.uiuc.edu> Author: djg Date: Mon Oct 5 11:36:26 2009 New Revision: 83311 URL: http://llvm.org/viewvc/llvm-project?rev=83311&view=rev Log: Remove an unnnecessary LLVMContext argument in ConstantFoldLoadThroughGEPConstantExpr. Modified: llvm/trunk/include/llvm/Analysis/ConstantFolding.h llvm/trunk/lib/Analysis/ConstantFolding.cpp llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp llvm/trunk/lib/Transforms/Scalar/SCCP.cpp llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp Modified: llvm/trunk/include/llvm/Analysis/ConstantFolding.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ConstantFolding.h?rev=83311&r1=83310&r2=83311&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/ConstantFolding.h (original) +++ llvm/trunk/include/llvm/Analysis/ConstantFolding.h Mon Oct 5 11:36:26 2009 @@ -66,8 +66,7 @@ /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a /// getelementptr constantexpr, return the constant value being addressed by the /// constant expression, or null if something is funny and we can't decide. -Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE, - LLVMContext &Context); +Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE); /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=83311&r1=83310&r2=83311&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Mon Oct 5 11:36:26 2009 @@ -606,8 +606,7 @@ /// getelementptr constantexpr, return the constant value being addressed by the /// constant expression, or null if something is funny and we can't decide. Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, - ConstantExpr *CE, - LLVMContext &Context) { + ConstantExpr *CE) { if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType())) return 0; // Do not allow stepping over the value! Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=83311&r1=83310&r2=83311&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Mon Oct 5 11:36:26 2009 @@ -304,7 +304,7 @@ if (CE->getOpcode() == Instruction::GetElementPtr) { Constant *SubInit = 0; if (Init) - SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE, Context); + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context); } else if (CE->getOpcode() == Instruction::BitCast && isa(CE->getType())) { @@ -325,7 +325,7 @@ ConstantExpr *CE = dyn_cast_or_null(ConstantFoldInstruction(GEP, Context)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) - SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE, Context); + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); } Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context); @@ -2475,8 +2475,7 @@ if (!CE->isGEPWithNoNotionalOverIndexing()) return false; - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE, - Context); + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } return false; } @@ -2588,8 +2587,7 @@ isa(CE->getOperand(0))) { GlobalVariable *GV = cast(CE->getOperand(0)); if (GV->hasDefinitiveInitializer()) - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE, - Context); + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } return 0; // don't know how to evaluate. Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=83311&r1=83310&r2=83311&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Mon Oct 5 11:36:26 2009 @@ -11439,8 +11439,7 @@ if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) if (GV->isConstant() && GV->hasDefinitiveInitializer()) if (Constant *V = - ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE, - *Context)) + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) return ReplaceInstUsesWith(LI, V); if (CE->getOperand(0)->isNullValue()) { // Insert a new store to null instruction before the load to indicate Modified: llvm/trunk/lib/Transforms/Scalar/SCCP.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SCCP.cpp?rev=83311&r1=83310&r2=83311&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SCCP.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SCCP.cpp Mon Oct 5 11:36:26 2009 @@ -1162,8 +1162,7 @@ if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) if (GV->isConstant() && GV->hasDefinitiveInitializer()) if (Constant *V = - ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE, - *Context)) { + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) { markConstant(IV, &I, V); return; } Modified: llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp?rev=83311&r1=83310&r2=83311&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/CloneFunction.cpp Mon Oct 5 11:36:26 2009 @@ -346,7 +346,7 @@ if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) if (GV->isConstant() && GV->hasDefinitiveInitializer()) return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), - CE, Context); + CE); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], Ops.size(), Context, TD); From bob.wilson at apple.com Mon Oct 5 11:47:38 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Mon, 05 Oct 2009 16:47:38 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83312 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200910051647.n95Glcxk028965@zion.cs.uiuc.edu> Author: bwilson Date: Mon Oct 5 11:47:38 2009 New Revision: 83312 URL: http://llvm.org/viewvc/llvm-project?rev=83312&view=rev Log: Change my previous optimization, for aggregates with only one element, to only apply to types with constant sizes. As Duncan explained on the mailing list, there may be other elements at variable offsets that do not show up in the llvm type. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83312&r1=83311&r2=83312&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Oct 5 11:47:38 2009 @@ -1323,8 +1323,8 @@ // If the type is small, copy the elements instead of using a block copy. const Type *LLVMTy = ConvertType(type); unsigned NumElts = CountAggregateElements(LLVMTy); - if (NumElts == 1 || - (TREE_CODE(TYPE_SIZE(type)) == INTEGER_CST && + if (TREE_CODE(TYPE_SIZE(type)) == INTEGER_CST && + (NumElts == 1 || TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type)) < TARGET_LLVM_MIN_BYTES_COPY_BY_MEMCPY)) { From stuart at apple.com Mon Oct 5 11:54:05 2009 From: stuart at apple.com (Stuart Hastings) Date: Mon, 05 Oct 2009 16:54:05 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83313 - /llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/ Message-ID: <200910051654.n95Gs561029793@zion.cs.uiuc.edu> Author: stuart Date: Mon Oct 5 11:54:05 2009 New Revision: 83313 URL: http://llvm.org/viewvc/llvm-project?rev=83313&view=rev Log: Creating llvmgcc42-2311 tag. Added: llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/ - copied from r83312, llvm-gcc-4.2/tags/Apple/llvmgcc42-2310.1/ From stuart at apple.com Mon Oct 5 11:54:54 2009 From: stuart at apple.com (Stuart Hastings) Date: Mon, 05 Oct 2009 16:54:54 -0000 Subject: [llvm-commits] [llvm] r83314 - /llvm/tags/Apple/llvmCore-2311/ Message-ID: <200910051654.n95GsscO029909@zion.cs.uiuc.edu> Author: stuart Date: Mon Oct 5 11:54:54 2009 New Revision: 83314 URL: http://llvm.org/viewvc/llvm-project?rev=83314&view=rev Log: Creating llvmCore-2311 tag. Added: llvm/tags/Apple/llvmCore-2311/ - copied from r83313, llvm/tags/Apple/llvmCore-2310.1/ From dpatel at apple.com Mon Oct 5 13:03:20 2009 From: dpatel at apple.com (Devang Patel) Date: Mon, 05 Oct 2009 18:03:20 -0000 Subject: [llvm-commits] [llvm] r83317 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Message-ID: <200910051803.n95I3KuR006248@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 13:03:19 2009 New Revision: 83317 URL: http://llvm.org/viewvc/llvm-project?rev=83317&view=rev Log: Gracefully handle various scopes while recording source line info. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83317&r1=83316&r2=83317&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 13:03:19 2009 @@ -1958,16 +1958,33 @@ /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, - MDNode *Scope) { + MDNode *S) { if (!MMI) return 0; if (TimePassesIsEnabled) DebugTimer->startTimer(); - DICompileUnit CU(Scope); - unsigned Src = GetOrCreateSourceID(CU.getDirectory(), - CU.getFilename()); + const char *Dir = NULL; + const char *Fn = NULL; + + DIDescriptor Scope(S); + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Dir = CU.getDirectory(); + Fn = CU.getFilename(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Dir = SP.getDirectory(); + Fn = SP.getFilename(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Dir = DB.getDirectory(); + Fn = DB.getFilename(); + } else + assert (0 && "Unexpected scope info"); + + unsigned Src = GetOrCreateSourceID(Dir, Fn); unsigned ID = MMI->NextLabelID(); Lines.push_back(SrcLineInfo(Line, Col, Src, ID)); From eocallaghan at auroraux.org Mon Oct 5 13:43:19 2009 From: eocallaghan at auroraux.org (Edward O'Callaghan) Date: Mon, 05 Oct 2009 18:43:19 -0000 Subject: [llvm-commits] [llvm] r83318 - in /llvm/trunk/lib/MC: MCDisassembler.cpp MCInstPrinter.cpp Message-ID: <200910051843.n95IhJdf011569@zion.cs.uiuc.edu> Author: evocallaghan Date: Mon Oct 5 13:43:19 2009 New Revision: 83318 URL: http://llvm.org/viewvc/llvm-project?rev=83318&view=rev Log: No newline at end of files. Modified: llvm/trunk/lib/MC/MCDisassembler.cpp llvm/trunk/lib/MC/MCInstPrinter.cpp Modified: llvm/trunk/lib/MC/MCDisassembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCDisassembler.cpp?rev=83318&r1=83317&r2=83318&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCDisassembler.cpp (original) +++ llvm/trunk/lib/MC/MCDisassembler.cpp Mon Oct 5 13:43:19 2009 @@ -11,4 +11,4 @@ using namespace llvm; MCDisassembler::~MCDisassembler() { -} \ No newline at end of file +} Modified: llvm/trunk/lib/MC/MCInstPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCInstPrinter.cpp?rev=83318&r1=83317&r2=83318&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCInstPrinter.cpp (original) +++ llvm/trunk/lib/MC/MCInstPrinter.cpp Mon Oct 5 13:43:19 2009 @@ -11,4 +11,4 @@ using namespace llvm; MCInstPrinter::~MCInstPrinter() { -} \ No newline at end of file +} From jyasskin at google.com Mon Oct 5 14:59:22 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 5 Oct 2009 12:59:22 -0700 Subject: [llvm-commits] [patch] Add a test for PR3043 In-Reply-To: References: Message-ID: Ping? (Sorry for being pushy, but the PR5116 fix that this makes safe is blocking our Q3 release.) On Sat, Oct 3, 2009 at 6:56 PM, Jeffrey Yasskin wrote: > In fixing bugs around stubs, it would be nice to have tests for the > bugs that have already been fixed so I don't reopen them. Here's one > for http://llvm.org/bugs/show_bug.cgi?id=3043. Since it's my first > addition to the dejagnu test suite, I'd appreciate any stylistic > pointers. I tested the test by disabling r59265, and indeed the test > fails. > > Evan, could you write a test for the bug > http://llvm.org/viewvc/llvm-project?view=rev&revision=60822 fixed? My > current fix for PR5116 simply reverts that change, and I want to know > if I'm breaking something or if the original bug has now been fixed > some other way. > > Thanks, > Jeffrey > From evan at fallingsnow.net Mon Oct 5 15:53:34 2009 From: evan at fallingsnow.net (Evan Phoenix) Date: Mon, 5 Oct 2009 13:53:34 -0700 Subject: [llvm-commits] [PATCH] Add signed math cases to overflow folding Message-ID: <06059EF3-7215-43FC-B518-22B778BDFC18@fallingsnow.net> Extend constant folding to understand the signed variants of the overflow intrinsics. - Evan Phoenix -------------- next part -------------- A non-text attachment was scrubbed... Name: llvm-signed-overflow-folding.diff Type: application/octet-stream Size: 4955 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091005/0b368987/attachment.obj From edwintorok at gmail.com Mon Oct 5 16:15:44 2009 From: edwintorok at gmail.com (Torok Edwin) Date: Mon, 05 Oct 2009 21:15:44 -0000 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp Message-ID: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> Author: edwin Date: Mon Oct 5 16:15:43 2009 New Revision: 83324 URL: http://llvm.org/viewvc/llvm-project?rev=83324&view=rev Log: Don't treat malloc calls with non-matching prototype as malloc. Fixes second part of PR5130, miscompilation in FreeBSD kernel, where malloc takes 3 params, and *does* initialize memory. Modified: llvm/trunk/lib/Analysis/MallocHelper.cpp Modified: llvm/trunk/lib/Analysis/MallocHelper.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MallocHelper.cpp?rev=83324&r1=83323&r2=83324&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/MallocHelper.cpp (original) +++ llvm/trunk/lib/Analysis/MallocHelper.cpp Mon Oct 5 16:15:43 2009 @@ -34,12 +34,23 @@ return false; const Module* M = CI->getParent()->getParent()->getParent(); - Constant *MallocFunc = M->getFunction("malloc"); + Function *MallocFunc = M->getFunction("malloc"); if (CI->getOperand(0) != MallocFunc) return false; - return true; + // Check malloc prototype. + // FIXME: this will be obsolete when nobuiltin attribute will exist. + const FunctionType *FTy = MallocFunc->getFunctionType(); + if (FTy->getNumParams() != 1) + return false; + if (IntegerType *ITy = dyn_cast(FTy->param_begin()->get())) { + if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) + return false; + return true; + } + + return false; } /// extractMallocCall - Returns the corresponding CallInst if the instruction From clattner at apple.com Mon Oct 5 16:27:44 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Oct 2009 14:27:44 -0700 Subject: [llvm-commits] [PATCH] Add signed math cases to overflow folding In-Reply-To: <06059EF3-7215-43FC-B518-22B778BDFC18@fallingsnow.net> References: <06059EF3-7215-43FC-B518-22B778BDFC18@fallingsnow.net> Message-ID: <69071CD8-A496-40E9-98D0-B38F2FC8FC06@apple.com> On Oct 5, 2009, at 1:53 PM, Evan Phoenix wrote: > Extend constant folding to understand the signed variants of the > overflow intrinsics. Looks great to me, please commit! -Chris From clattner at apple.com Mon Oct 5 16:28:23 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Oct 2009 14:28:23 -0700 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> Message-ID: <4495EE9D-3ED6-465D-8F49-81BB677765A3@apple.com> On Oct 5, 2009, at 2:15 PM, Torok Edwin wrote: > Author: edwin > Date: Mon Oct 5 16:15:43 2009 > New Revision: 83324 > > URL: http://llvm.org/viewvc/llvm-project?rev=83324&view=rev > Log: > Don't treat malloc calls with non-matching prototype as malloc. > Fixes second part of PR5130, miscompilation in FreeBSD kernel, where > malloc takes 3 params, > and *does* initialize memory. Thanks Edwin, please mention the PR in the FIXME so we can remember what this is working around. -Chris > > Modified: > llvm/trunk/lib/Analysis/MallocHelper.cpp > > Modified: llvm/trunk/lib/Analysis/MallocHelper.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MallocHelper.cpp?rev=83324&r1=83323&r2=83324&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Analysis/MallocHelper.cpp (original) > +++ llvm/trunk/lib/Analysis/MallocHelper.cpp Mon Oct 5 16:15:43 2009 > @@ -34,12 +34,23 @@ > return false; > > const Module* M = CI->getParent()->getParent()->getParent(); > - Constant *MallocFunc = M->getFunction("malloc"); > + Function *MallocFunc = M->getFunction("malloc"); > > if (CI->getOperand(0) != MallocFunc) > return false; > > - return true; > + // Check malloc prototype. > + // FIXME: this will be obsolete when nobuiltin attribute will > exist. > + const FunctionType *FTy = MallocFunc->getFunctionType(); > + if (FTy->getNumParams() != 1) > + return false; > + if (IntegerType *ITy = dyn_cast(FTy->param_begin()- > >get())) { > + if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) > + return false; > + return true; > + } > + > + return false; > } > > /// extractMallocCall - Returns the corresponding CallInst if the > instruction > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From echristo at apple.com Mon Oct 5 17:16:12 2009 From: echristo at apple.com (Eric Christopher) Date: Mon, 05 Oct 2009 22:16:12 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83329 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200910052216.n95MGCCh007859@zion.cs.uiuc.edu> Author: echristo Date: Mon Oct 5 17:16:12 2009 New Revision: 83329 URL: http://llvm.org/viewvc/llvm-project?rev=83329&view=rev Log: Use FUNCTION_BOUNDARY for default function alignment. Use BITS_PER_UNIT instead of 8 for computing alignment size - this is almost always 8 anyhow. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83329&r1=83328&r2=83329&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Oct 5 17:16:12 2009 @@ -504,8 +504,8 @@ handleVisibility(FnDecl, Fn); // Handle attribute "aligned". - if (DECL_ALIGN (FnDecl) != 8) - Fn->setAlignment(DECL_ALIGN (FnDecl) / 8); + if (DECL_ALIGN (FnDecl) != FUNCTION_BOUNDARY) + Fn->setAlignment(DECL_ALIGN (FnDecl) / BITS_PER_UNIT); // Handle functions in specified sections. if (DECL_SECTION_NAME(FnDecl)) @@ -1324,7 +1324,7 @@ const Type *LLVMTy = ConvertType(type); unsigned NumElts = CountAggregateElements(LLVMTy); if (TREE_CODE(TYPE_SIZE(type)) == INTEGER_CST && - (NumElts == 1 || + (NumElts == 1 || TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type)) < TARGET_LLVM_MIN_BYTES_COPY_BY_MEMCPY)) { @@ -1614,7 +1614,7 @@ if (DECL_ALIGN(decl)) { unsigned TargetAlign = getTargetData().getABITypeAlignment(Ty); if (DECL_USER_ALIGN(decl) || 8 * TargetAlign < (unsigned)DECL_ALIGN(decl)) - Alignment = DECL_ALIGN(decl) / 8; + Alignment = DECL_ALIGN(decl) / BITS_PER_UNIT; } const char *Name; // Name of variable @@ -6628,7 +6628,8 @@ if (BitStart == 0 && // llvm pointer points to it. !isBitfield(FieldDecl) && // bitfield computation might offset pointer. DECL_ALIGN(FieldDecl)) - LVAlign = std::max(LVAlign, unsigned(DECL_ALIGN(FieldDecl)) / 8); + LVAlign = std::max(LVAlign, + unsigned(DECL_ALIGN(FieldDecl)) / BITS_PER_UNIT); #endif // If the FIELD_DECL has an annotate attribute on it, emit it. @@ -6838,7 +6839,7 @@ unsigned Alignment = Ty->isSized() ? TD.getABITypeAlignment(Ty) : 1; if (DECL_ALIGN(exp)) { if (DECL_USER_ALIGN(exp) || 8 * Alignment < (unsigned)DECL_ALIGN(exp)) - Alignment = DECL_ALIGN(exp) / 8; + Alignment = DECL_ALIGN(exp) / BITS_PER_UNIT; } return LValue(BitCastToType(Decl, PTy), Alignment); From clattner at apple.com Mon Oct 5 17:22:02 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Oct 2009 15:22:02 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r83329 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp In-Reply-To: <200910052216.n95MGCCh007859@zion.cs.uiuc.edu> References: <200910052216.n95MGCCh007859@zion.cs.uiuc.edu> Message-ID: <8871B393-AD07-4AEC-ACA5-64FB199B7409@apple.com> On Oct 5, 2009, at 3:16 PM, Eric Christopher wrote: > Author: echristo > Date: Mon Oct 5 17:16:12 2009 > New Revision: 83329 > > URL: http://llvm.org/viewvc/llvm-project?rev=83329&view=rev > Log: > Use FUNCTION_BOUNDARY for default function alignment. Use > BITS_PER_UNIT > instead of 8 for computing alignment size - this is almost always > 8 anyhow. The last time I did this, Duncan pointed out that it is actually wrong. LLVM does alignment in bytes, so if BITS_PER_UNIT is (e.g.) 32, then we'd get the wrong results. -Chris > > Modified: > llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83329&r1=83328&r2=83329&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Oct 5 17:16:12 2009 > @@ -504,8 +504,8 @@ > handleVisibility(FnDecl, Fn); > > // Handle attribute "aligned". > - if (DECL_ALIGN (FnDecl) != 8) > - Fn->setAlignment(DECL_ALIGN (FnDecl) / 8); > + if (DECL_ALIGN (FnDecl) != FUNCTION_BOUNDARY) > + Fn->setAlignment(DECL_ALIGN (FnDecl) / BITS_PER_UNIT); > > // Handle functions in specified sections. > if (DECL_SECTION_NAME(FnDecl)) > @@ -1324,7 +1324,7 @@ > const Type *LLVMTy = ConvertType(type); > unsigned NumElts = CountAggregateElements(LLVMTy); > if (TREE_CODE(TYPE_SIZE(type)) == INTEGER_CST && > - (NumElts == 1 || > + (NumElts == 1 || > TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type)) < > TARGET_LLVM_MIN_BYTES_COPY_BY_MEMCPY)) { > > @@ -1614,7 +1614,7 @@ > if (DECL_ALIGN(decl)) { > unsigned TargetAlign = getTargetData().getABITypeAlignment(Ty); > if (DECL_USER_ALIGN(decl) || 8 * TargetAlign < > (unsigned)DECL_ALIGN(decl)) > - Alignment = DECL_ALIGN(decl) / 8; > + Alignment = DECL_ALIGN(decl) / BITS_PER_UNIT; > } > > const char *Name; // Name of variable > @@ -6628,7 +6628,8 @@ > if (BitStart == 0 && // llvm pointer points to it. > !isBitfield(FieldDecl) && // bitfield computation might > offset pointer. > DECL_ALIGN(FieldDecl)) > - LVAlign = std::max(LVAlign, unsigned(DECL_ALIGN(FieldDecl)) / > 8); > + LVAlign = std::max(LVAlign, > + unsigned(DECL_ALIGN(FieldDecl)) / BITS_PER_UNIT); > #endif > > // If the FIELD_DECL has an annotate attribute on it, emit it. > @@ -6838,7 +6839,7 @@ > unsigned Alignment = Ty->isSized() ? TD.getABITypeAlignment(Ty) : 1; > if (DECL_ALIGN(exp)) { > if (DECL_USER_ALIGN(exp) || 8 * Alignment < > (unsigned)DECL_ALIGN(exp)) > - Alignment = DECL_ALIGN(exp) / 8; > + Alignment = DECL_ALIGN(exp) / BITS_PER_UNIT; > } > > return LValue(BitCastToType(Decl, PTy), Alignment); > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From echristo at apple.com Mon Oct 5 17:23:21 2009 From: echristo at apple.com (Eric Christopher) Date: Mon, 5 Oct 2009 15:23:21 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r83329 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp In-Reply-To: <8871B393-AD07-4AEC-ACA5-64FB199B7409@apple.com> References: <200910052216.n95MGCCh007859@zion.cs.uiuc.edu> <8871B393-AD07-4AEC-ACA5-64FB199B7409@apple.com> Message-ID: On Oct 5, 2009, at 3:22 PM, Chris Lattner wrote: > > On Oct 5, 2009, at 3:16 PM, Eric Christopher wrote: > >> Author: echristo >> Date: Mon Oct 5 17:16:12 2009 >> New Revision: 83329 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83329&view=rev >> Log: >> Use FUNCTION_BOUNDARY for default function alignment. Use >> BITS_PER_UNIT >> instead of 8 for computing alignment size - this is almost always >> 8 anyhow. > > The last time I did this, Duncan pointed out that it is actually > wrong. LLVM does alignment in bytes, so if BITS_PER_UNIT is (e.g.) > 32, then we'd get the wrong results. Hmm... I guess so. If this ever actually came up we'd probably see problems all over the place anyhow. I'll revert that part :) -eric From echristo at apple.com Mon Oct 5 17:25:32 2009 From: echristo at apple.com (Eric Christopher) Date: Mon, 05 Oct 2009 22:25:32 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83332 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200910052225.n95MPX8L009144@zion.cs.uiuc.edu> Author: echristo Date: Mon Oct 5 17:25:32 2009 New Revision: 83332 URL: http://llvm.org/viewvc/llvm-project?rev=83332&view=rev Log: Revert the BITS_PER_UNIT part of my last patch. llvm does alignment computation on bytes and would fail if BITS_PER_UNIT were anything other than 8. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83332&r1=83331&r2=83332&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Oct 5 17:25:32 2009 @@ -505,7 +505,7 @@ // Handle attribute "aligned". if (DECL_ALIGN (FnDecl) != FUNCTION_BOUNDARY) - Fn->setAlignment(DECL_ALIGN (FnDecl) / BITS_PER_UNIT); + Fn->setAlignment(DECL_ALIGN (FnDecl) / 8); // Handle functions in specified sections. if (DECL_SECTION_NAME(FnDecl)) @@ -1614,7 +1614,7 @@ if (DECL_ALIGN(decl)) { unsigned TargetAlign = getTargetData().getABITypeAlignment(Ty); if (DECL_USER_ALIGN(decl) || 8 * TargetAlign < (unsigned)DECL_ALIGN(decl)) - Alignment = DECL_ALIGN(decl) / BITS_PER_UNIT; + Alignment = DECL_ALIGN(decl) / 8; } const char *Name; // Name of variable @@ -6628,8 +6628,7 @@ if (BitStart == 0 && // llvm pointer points to it. !isBitfield(FieldDecl) && // bitfield computation might offset pointer. DECL_ALIGN(FieldDecl)) - LVAlign = std::max(LVAlign, - unsigned(DECL_ALIGN(FieldDecl)) / BITS_PER_UNIT); + LVAlign = std::max(LVAlign, unsigned(DECL_ALIGN(FieldDecl)) / 8); #endif // If the FIELD_DECL has an annotate attribute on it, emit it. @@ -6839,7 +6838,7 @@ unsigned Alignment = Ty->isSized() ? TD.getABITypeAlignment(Ty) : 1; if (DECL_ALIGN(exp)) { if (DECL_USER_ALIGN(exp) || 8 * Alignment < (unsigned)DECL_ALIGN(exp)) - Alignment = DECL_ALIGN(exp) / BITS_PER_UNIT; + Alignment = DECL_ALIGN(exp) / 8; } return LValue(BitCastToType(Decl, PTy), Alignment); From stuart at apple.com Mon Oct 5 17:26:25 2009 From: stuart at apple.com (Stuart Hastings) Date: Mon, 05 Oct 2009 22:26:25 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83333 - /llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/gcc/llvm-convert.cpp Message-ID: <200910052226.n95MQP2r009263@zion.cs.uiuc.edu> Author: stuart Date: Mon Oct 5 17:26:25 2009 New Revision: 83333 URL: http://llvm.org/viewvc/llvm-project?rev=83333&view=rev Log: Merge Evan's fix for into 2311. Modified: llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/gcc/llvm-convert.cpp?rev=83333&r1=83332&r2=83333&view=diff ============================================================================== --- llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/gcc/llvm-convert.cpp Mon Oct 5 17:26:25 2009 @@ -503,6 +503,10 @@ // Handle visibility style handleVisibility(FnDecl, Fn); + // Handle attribute "aligned". + if (DECL_ALIGN (FnDecl) != 8) + Fn->setAlignment(DECL_ALIGN (FnDecl) / 8); + // Handle functions in specified sections. if (DECL_SECTION_NAME(FnDecl)) Fn->setSection(TREE_STRING_POINTER(DECL_SECTION_NAME(FnDecl))); From evan at fallingsnow.net Mon Oct 5 17:29:11 2009 From: evan at fallingsnow.net (Evan Phoenix) Date: Mon, 05 Oct 2009 22:29:11 -0000 Subject: [llvm-commits] [llvm] r83334 - /llvm/trunk/README.txt Message-ID: <200910052229.n95MTBcN009635@zion.cs.uiuc.edu> Author: evanphx Date: Mon Oct 5 17:29:11 2009 New Revision: 83334 URL: http://llvm.org/viewvc/llvm-project?rev=83334&view=rev Log: First test commit Modified: llvm/trunk/README.txt Modified: llvm/trunk/README.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/README.txt?rev=83334&r1=83333&r2=83334&view=diff ============================================================================== --- llvm/trunk/README.txt (original) +++ llvm/trunk/README.txt Mon Oct 5 17:29:11 2009 @@ -1,9 +1,9 @@ Low Level Virtual Machine (LLVM) ================================ -This directory and its subdirectories contain source code for the Low Level +This directory and its subdirectories contain source code for the Low Level Virtual Machine, a toolkit for the construction of highly optimized compilers, -optimizers, and runtime environments. +optimizers, and runtime environments. LLVM is open source software. You may freely distribute it under the terms of the license agreement found in LICENSE.txt. From grosbach at apple.com Mon Oct 5 17:30:23 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 05 Oct 2009 22:30:23 -0000 Subject: [llvm-commits] [llvm] r83336 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/RegisterScavenging.cpp lib/Target/ARM/ARMBaseRegisterInfo.cpp lib/Target/ARM/Thumb1RegisterInfo.cpp lib/Target/ARM/Thumb1RegisterInfo.h Message-ID: <200910052230.n95MUODr009829@zion.cs.uiuc.edu> Author: grosbach Date: Mon Oct 5 17:30:23 2009 New Revision: 83336 URL: http://llvm.org/viewvc/llvm-project?rev=83336&view=rev Log: In Thumb1, the register scavenger is not always able to use an emergency spill slot. When frame references are via the frame pointer, they will be negative, but Thumb1 load/store instructions only allow positive immediate offsets. Instead, Thumb1 will spill to R12. Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h llvm/trunk/lib/CodeGen/RegisterScavenging.cpp llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83336&r1=83335&r2=83336&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Mon Oct 5 17:30:23 2009 @@ -635,6 +635,24 @@ virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const { } + /// saveScavengerRegister - Save the register so it can be used by the + /// register scavenger. Return true if the register was saved, false + /// otherwise. If this function does not save the register, the scavenger + /// will instead spill it to the emergency spill slot. + /// + virtual bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const {return false;} + + /// restoreScavengerRegister - Restore a register saved by + /// saveScavengerRegister(). + /// + virtual void restoreScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const {} + /// eliminateFrameIndex - This method must be overriden to eliminate abstract /// frame indices from instructions which may use them. The instruction /// referenced by the iterator contains an MO_FrameIndex operand which must be Modified: llvm/trunk/lib/CodeGen/RegisterScavenging.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterScavenging.cpp?rev=83336&r1=83335&r2=83336&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegisterScavenging.cpp (original) +++ llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Mon Oct 5 17:30:23 2009 @@ -268,9 +268,6 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { - assert(ScavengingFrameIndex >= 0 && - "Cannot scavenge a register without an emergency spill slot!"); - // Mask off the registers which are not in the TargetRegisterClass. BitVector Candidates(NumPhysRegs, false); CreateRegClassMask(RC, Candidates); @@ -301,14 +298,23 @@ // Avoid infinite regress ScavengedReg = SReg; - // Spill the scavenged register before I. - TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); - MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, this); + // If the target knows how to save/restore the register, let it do so; + // otherwise, use the emergency stack spill slot. + if (!TRI->saveScavengerRegister(*MBB, I, RC, SReg)) { + // Spill the scavenged register before I. + assert(ScavengingFrameIndex >= 0 && + "Cannot scavenging register without an emergency spill slot!"); + TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); + MachineBasicBlock::iterator II = prior(I); + TRI->eliminateFrameIndex(II, SPAdj, this); + + // Restore the scavenged register before its use (or first terminator). + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC); + } else + TRI->restoreScavengerRegister(*MBB, UseMI, RC, SReg); - // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC); ScavengeRestore = prior(UseMI); + // Doing this here leads to infinite regress. // ScavengedReg = SReg; ScavengedRC = RC; Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=83336&r1=83335&r2=83336&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Mon Oct 5 17:30:23 2009 @@ -660,8 +660,7 @@ // off the frame pointer, the effective stack size is 4 bytes larger // since the FP points to the stack slot of the previous FP. if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0) - >= estimateRSStackSizeLimit(MF) - || AFI->isThumb1OnlyFunction()) { + >= estimateRSStackSizeLimit(MF)) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; @@ -690,7 +689,8 @@ MF.getRegInfo().setPhysRegUsed(Extras[i]); AFI->setCSRegisterIsSpilled(Extras[i]); } - } else { + } else if (!AFI->isThumb1OnlyFunction()) { + // note: Thumb1 functions spill to R12, not the stack. // Reserve a slot closest to SP or frame pointer. const TargetRegisterClass *RC = ARM::GPRRegisterClass; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83336&r1=83335&r2=83336&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Mon Oct 5 17:30:23 2009 @@ -402,6 +402,31 @@ return 0; } +/// saveScavengerRegister - Save the register so it can be used by the +/// register scavenger. Return true. +bool Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const { + // Thumb1 can't use the emergency spill slot on the stack because + // ldr/str immediate offsets must be positive, and if we're referencing + // off the frame pointer (if, for example, there are alloca() calls in + // the function, the offset will be negative. Use R12 instead since that's + // a call clobbered register that we know won't be used in Thumb1 mode. + + TII.copyRegToReg(MBB, I, ARM::R12, Reg, ARM::GPRRegisterClass, RC); + return true; +} + +/// restoreScavengerRegister - restore a registers saved by +// saveScavengerRegister(). +void Thumb1RegisterInfo::restoreScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const { + TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); +} + void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const{ unsigned i = 0; Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83336&r1=83335&r2=83336&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Mon Oct 5 17:30:23 2009 @@ -54,6 +54,14 @@ unsigned FrameReg, int Offset, unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const; + bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const; + void restoreScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; From clattner at apple.com Mon Oct 5 17:32:58 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Oct 2009 15:32:58 -0700 Subject: [llvm-commits] [patch] Add a test for PR3043 In-Reply-To: References: Message-ID: On Oct 3, 2009, at 6:56 PM, Jeffrey Yasskin wrote: > In fixing bugs around stubs, it would be nice to have tests for the > bugs that have already been fixed so I don't reopen them. Here's one > for http://llvm.org/bugs/show_bug.cgi?id=3043. Since it's my first > addition to the dejagnu test suite, I'd appreciate any stylistic > pointers. I tested the test by disabling r59265, and indeed the test > fails. > > Evan, could you write a test for the bug > http://llvm.org/viewvc/llvm-project?view=rev&revision=60822 fixed? My > current fix for PR5116 simply reverts that change, and I want to know > if I'm breaking something or if the original bug has now been fixed > some other way. This looks ok to me. However, we really prefer to minimize the number of executable tests that are in llvm/test. I understand that this is hard to impossible for JIT tests though. Please try to aggregate them together into fewer larger tests. -Chris From evan at fallingsnow.net Mon Oct 5 17:53:52 2009 From: evan at fallingsnow.net (Evan Phoenix) Date: Mon, 05 Oct 2009 22:53:52 -0000 Subject: [llvm-commits] [llvm] r83338 - in /llvm/trunk: lib/Analysis/ConstantFolding.cpp test/Transforms/ConstProp/overflow-ops.ll Message-ID: <200910052253.n95MrqV1012888@zion.cs.uiuc.edu> Author: evanphx Date: Mon Oct 5 17:53:52 2009 New Revision: 83338 URL: http://llvm.org/viewvc/llvm-project?rev=83338&view=rev Log: Extend ConstantFolding to understand signed overflow variants Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=83338&r1=83337&r2=83338&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original) +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Mon Oct 5 17:53:52 2009 @@ -678,6 +678,8 @@ case Intrinsic::cttz: case Intrinsic::uadd_with_overflow: case Intrinsic::usub_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: return true; default: return false; @@ -902,6 +904,28 @@ }; return ConstantStruct::get(F->getContext(), Ops, 2, false); } + case Intrinsic::sadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op1->getType(), 0), Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::ssub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op2->getType(), 0), Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } } } Modified: llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll?rev=83338&r1=83337&r2=83338&view=diff ============================================================================== --- llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll (original) +++ llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll Mon Oct 5 17:53:52 2009 @@ -24,7 +24,6 @@ ; CHECK: ret %i8i1 { i8 6, i1 true } } - ;;----------------------------- ;; usub ;;----------------------------- @@ -47,7 +46,127 @@ ; CHECK: ret %i8i1 { i8 -2, i1 true } } +;;----------------------------- +;; sadd +;;----------------------------- + +define {i8, i1} @sadd_1() nounwind { +entry: + %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 42, i8 2) + ret {i8, i1} %t + +; CHECK: @sadd_1 +; CHECK: ret %i8i1 { i8 44, i1 false } +} + +define {i8, i1} @sadd_2() nounwind { +entry: + %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 120, i8 10) + ret {i8, i1} %t + +; CHECK: @sadd_2 +; CHECK: ret %i8i1 { i8 -126, i1 true } +} + +define {i8, i1} @sadd_3() nounwind { +entry: + %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 10) + ret {i8, i1} %t + +; CHECK: @sadd_3 +; CHECK: ret %i8i1 { i8 -110, i1 false } +} + +define {i8, i1} @sadd_4() nounwind { +entry: + %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 -10) + ret {i8, i1} %t + +; CHECK: @sadd_4 +; CHECK: ret %i8i1 { i8 126, i1 true } +} + +define {i8, i1} @sadd_5() nounwind { +entry: + %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 2, i8 -10) + ret {i8, i1} %t + +; CHECK: @sadd_5 +; CHECK: ret %i8i1 { i8 -8, i1 false } +} + + +;;----------------------------- +;; ssub +;;----------------------------- + +define {i8, i1} @ssub_1() nounwind { +entry: + %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 2) + ret {i8, i1} %t + +; CHECK: @ssub_1 +; CHECK: ret %i8i1 { i8 2, i1 false } +} + +define {i8, i1} @ssub_2() nounwind { +entry: + %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 6) + ret {i8, i1} %t + +; CHECK: @ssub_2 +; CHECK: ret %i8i1 { i8 -2, i1 false } +} + +define {i8, i1} @ssub_3() nounwind { +entry: + %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 120) + ret {i8, i1} %t + +; CHECK: @ssub_3 +; CHECK: ret %i8i1 { i8 126, i1 true } +} + +define {i8, i1} @ssub_3b() nounwind { +entry: + %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 10) + ret {i8, i1} %t + +; CHECK: @ssub_3b +; CHECK: ret %i8i1 { i8 -20, i1 false } +} + +define {i8, i1} @ssub_4() nounwind { +entry: + %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 120, i8 -10) + ret {i8, i1} %t + +; CHECK: @ssub_4 +; CHECK: ret %i8i1 { i8 -126, i1 true } +} + +define {i8, i1} @ssub_4b() nounwind { +entry: + %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 20, i8 -10) + ret {i8, i1} %t + +; CHECK: @ssub_4b +; CHECK: ret %i8i1 { i8 30, i1 false } +} + +define {i8, i1} @ssub_5() nounwind { +entry: + %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -20, i8 -10) + ret {i8, i1} %t + +; CHECK: @ssub_5 +; CHECK: ret %i8i1 { i8 -10, i1 false } +} + declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) + +declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) +declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) From snaroff at apple.com Mon Oct 5 17:56:49 2009 From: snaroff at apple.com (Steve Naroff) Date: Mon, 05 Oct 2009 22:56:49 -0000 Subject: [llvm-commits] [llvm] r83339 - /llvm/tags/cremebrulee/cremebrulee-10/ Message-ID: <200910052256.n95MuoLD013264@zion.cs.uiuc.edu> Author: snaroff Date: Mon Oct 5 17:56:49 2009 New Revision: 83339 URL: http://llvm.org/viewvc/llvm-project?rev=83339&view=rev Log: Tagging cremebrulee-10. Added: llvm/tags/cremebrulee/cremebrulee-10/ - copied from r83338, llvm/trunk/ From eocallaghan at auroraux.org Mon Oct 5 18:05:32 2009 From: eocallaghan at auroraux.org (Edward O'Callaghan) Date: Mon, 05 Oct 2009 23:05:32 -0000 Subject: [llvm-commits] [llvm] r83341 - /llvm/trunk/cmake/config-ix.cmake Message-ID: <200910052305.n95N5W4I014389@zion.cs.uiuc.edu> Author: evocallaghan Date: Mon Oct 5 18:05:32 2009 New Revision: 83341 URL: http://llvm.org/viewvc/llvm-project?rev=83341&view=rev Log: CMake misses a check for sbrk on NetBSD. Modified: llvm/trunk/cmake/config-ix.cmake Modified: llvm/trunk/cmake/config-ix.cmake URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/config-ix.cmake?rev=83341&r1=83340&r2=83341&view=diff ============================================================================== --- llvm/trunk/cmake/config-ix.cmake (original) +++ llvm/trunk/cmake/config-ix.cmake Mon Oct 5 18:05:32 2009 @@ -86,6 +86,7 @@ check_symbol_exists(mkstemp unistd.h HAVE_MKSTEMP) check_symbol_exists(mktemp unistd.h HAVE_MKTEMP) check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK) +check_symbol_exists(sbrk unistd.h HAVE_SBRK) check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL) check_symbol_exists(strerror string.h HAVE_STRERROR) check_symbol_exists(strerror_r string.h HAVE_STRERROR_R) From jyasskin at google.com Mon Oct 5 18:10:43 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 5 Oct 2009 16:10:43 -0700 Subject: [llvm-commits] [patch] Add a test for PR3043 In-Reply-To: References: Message-ID: On Mon, Oct 5, 2009 at 3:32 PM, Chris Lattner wrote: > > On Oct 3, 2009, at 6:56 PM, Jeffrey Yasskin wrote: > >> In fixing bugs around stubs, it would be nice to have tests for the >> bugs that have already been fixed so I don't reopen them. Here's one >> for http://llvm.org/bugs/show_bug.cgi?id=3043. Since it's my first >> addition to the dejagnu test suite, I'd appreciate any stylistic >> pointers. I tested the test by disabling r59265, and indeed the test >> fails. >> >> Evan, could you write a test for the bug >> http://llvm.org/viewvc/llvm-project?view=rev&revision=60822 fixed? My >> current fix for PR5116 simply reverts that change, and I want to know >> if I'm breaking something or if the original bug has now been fixed >> some other way. > > This looks ok to me. ?However, we really prefer to minimize the number of > executable tests that are in llvm/test. ?I understand that this is hard to > impossible for JIT tests though. ?Please try to aggregate them together into > fewer larger tests. Ah, ok. I'll rename this to stubs.ll and put all new stub-related tests in there. From dpatel at apple.com Mon Oct 5 18:22:08 2009 From: dpatel at apple.com (Devang Patel) Date: Mon, 05 Oct 2009 23:22:08 -0000 Subject: [llvm-commits] [llvm] r83343 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Message-ID: <200910052322.n95NM89v016462@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 18:22:08 2009 New Revision: 83343 URL: http://llvm.org/viewvc/llvm-project?rev=83343&view=rev Log: Set address while constructing DIE. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83343&r1=83342&r2=83343&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 18:22:08 2009 @@ -1107,6 +1107,14 @@ if (!GV.isLocalToUnit()) AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); AddSourceLine(GVDie, &GV); + + // Add address. + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getMangledName(GV.getGlobal())); + AddBlock(GVDie, dwarf::DW_AT_location, 0, Block); + return GVDie; } @@ -1582,13 +1590,6 @@ DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV); - // Add address. - DIEBlock *Block = new DIEBlock(); - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getMangledName(DI_GV.getGlobal())); - AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block); - // Add to map. Slot = VariableDie; From dpatel at apple.com Mon Oct 5 18:40:42 2009 From: dpatel at apple.com (Devang Patel) Date: Mon, 05 Oct 2009 23:40:42 -0000 Subject: [llvm-commits] [llvm] r83344 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h Message-ID: <200910052340.n95NegoD018771@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 18:40:42 2009 New Revision: 83344 URL: http://llvm.org/viewvc/llvm-project?rev=83344&view=rev Log: Adjust context for the global variables that are not at file scope, e.g. void foo() { static int bar = 42; } Here, foo's DIE is parent of bar's DIE. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83344&r1=83343&r2=83344&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 18:40:42 2009 @@ -1477,6 +1477,17 @@ } ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU); + // If there are global variables at this scope then add their dies. + for (SmallVector::iterator SGI = ScopedGVs.begin(), + SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { + MDNode *N = dyn_cast_or_null(*SGI); + if (!N) continue; + DIGlobalVariable GV(N); + if (GV.getContext().getNode() == RootScope->getDesc().getNode()) { + DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); + SPDie->AddChild(ScopedGVDie); + } + } } /// ConstructDefaultDbgScope - Construct a default scope for the subprogram. @@ -1667,8 +1678,13 @@ // Create DIEs for each of the externally visible global variables. for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) - ConstructGlobalVariableDIE(*I); + E = DbgFinder.global_variable_end(); I != E; ++I) { + DIGlobalVariable GV(*I); + if (GV.getContext().getNode() != GV.getCompileUnit().getNode()) + ScopedGVs.push_back(*I); + else + ConstructGlobalVariableDIE(*I); + } // Create DIEs for each of the externally visible subprograms. for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=83344&r1=83343&r2=83344&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Oct 5 18:40:42 2009 @@ -141,6 +141,10 @@ /// DbgScopeMap - Tracks the scopes in the current function. DenseMap DbgScopeMap; + /// ScopedGVs - Tracks global variables that are not at file scope. + /// For example void f() { static int b = 42; } + SmallVector ScopedGVs; + typedef DenseMap > InsnToDbgScopeMapTy; From jyasskin at google.com Mon Oct 5 18:51:08 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 05 Oct 2009 23:51:08 -0000 Subject: [llvm-commits] [llvm] r83346 - /llvm/trunk/test/ExecutionEngine/stubs.ll Message-ID: <200910052351.n95Np8xi020068@zion.cs.uiuc.edu> Author: jyasskin Date: Mon Oct 5 18:51:08 2009 New Revision: 83346 URL: http://llvm.org/viewvc/llvm-project?rev=83346&view=rev Log: Add a test for http://llvm.org/PR3043. Added: llvm/trunk/test/ExecutionEngine/stubs.ll Added: llvm/trunk/test/ExecutionEngine/stubs.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/ExecutionEngine/stubs.ll?rev=83346&view=auto ============================================================================== --- llvm/trunk/test/ExecutionEngine/stubs.ll (added) +++ llvm/trunk/test/ExecutionEngine/stubs.ll Mon Oct 5 18:51:08 2009 @@ -0,0 +1,35 @@ +; RUN: llvm-as < %s | lli -disable-lazy-compilation=false + +define i32 @main() nounwind { +entry: + call void @lazily_compiled_address_is_consistent() + ret i32 0 +} + +; Test PR3043: @test should have the same address before and after +; it's JIT-compiled. + at funcPtr = common global i1 ()* null, align 4 + at lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00" + +define void @lazily_compiled_address_is_consistent() nounwind { +entry: + store i1 ()* @test, i1 ()** @funcPtr + %pass = tail call i1 @test() ; [#uses=1] + br i1 %pass, label %pass_block, label %fail_block +pass_block: + ret void +fail_block: + call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0)) + call void @exit(i32 1) + unreachable +} + +define i1 @test() nounwind { +entry: + %tmp = load i1 ()** @funcPtr + %eq = icmp eq i1 ()* %tmp, @test + ret i1 %eq +} + +declare i32 @puts(i8*) noreturn +declare void @exit(i32) noreturn From dpatel at apple.com Mon Oct 5 18:59:01 2009 From: dpatel at apple.com (Devang Patel) Date: Mon, 05 Oct 2009 23:59:01 -0000 Subject: [llvm-commits] [llvm] r83347 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Message-ID: <200910052359.n95Nx15W021137@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 18:59:00 2009 New Revision: 83347 URL: http://llvm.org/viewvc/llvm-project?rev=83347&view=rev Log: If subprogram die is not available then construct new one. This can happen if debug info is processed lazily. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83347&r1=83346&r2=83347&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 18:59:00 2009 @@ -1464,6 +1464,10 @@ // Get the subprogram die. DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); + if (!SPDie) { + ConstructSubprogram(SPD.getNode()); + SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); + } assert(SPDie && "Missing subprogram descriptor"); if (!AbstractScope) { From dpatel at apple.com Mon Oct 5 19:03:14 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 00:03:14 -0000 Subject: [llvm-commits] [llvm] r83348 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Message-ID: <200910060003.n9603EFr021692@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 19:03:14 2009 New Revision: 83348 URL: http://llvm.org/viewvc/llvm-project?rev=83348&view=rev Log: Existence of a compile unit for input source file is a good indicator to check debug info's presence in a module. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83348&r1=83347&r2=83348&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 19:03:14 2009 @@ -1671,15 +1671,6 @@ if (!ModuleCU) ModuleCU = CompileUnits[0]; - // If there is not any debug info available for any global variables and any - // subprograms then there is not any debug info to emit. - if (DbgFinder.global_variable_count() == 0 - && DbgFinder.subprogram_count() == 0) { - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - return; - } - // Create DIEs for each of the externally visible global variables. for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), E = DbgFinder.global_variable_end(); I != E; ++I) { @@ -1728,7 +1719,7 @@ /// EndModule - Emit all Dwarf sections that should come after the content. /// void DwarfDebug::EndModule() { - if (!ShouldEmitDwarfDebug()) + if (!ModuleCU) return; if (TimePassesIsEnabled) From dpatel at apple.com Mon Oct 5 19:09:08 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 00:09:08 -0000 Subject: [llvm-commits] [llvm] r83349 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Message-ID: <200910060009.n960984m022455@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 19:09:08 2009 New Revision: 83349 URL: http://llvm.org/viewvc/llvm-project?rev=83349&view=rev Log: Set default location for the function if it is not already set. This code is not yet enabled. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=83349&r1=83348&r2=83349&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Mon Oct 5 19:09:08 2009 @@ -747,6 +747,8 @@ DebugLoc Loc = ExtractDebugLocation(DILoc, MF.getDebugLocInfo()); FastIS->setCurDebugLoc(Loc); + if (MF.getDefaultDebugLoc().isUnknown()) + MF.setDefaultDebugLoc(Loc); } } From grosbach at apple.com Mon Oct 5 19:24:17 2009 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 06 Oct 2009 00:24:17 -0000 Subject: [llvm-commits] [test-suite] r83351 - /test-suite/trunk/Makefile.programs Message-ID: <200910060024.n960OHNY024418@zion.cs.uiuc.edu> Author: grosbach Date: Mon Oct 5 19:24:17 2009 New Revision: 83351 URL: http://llvm.org/viewvc/llvm-project?rev=83351&view=rev Log: Add register scavenging for thumb1 to llcbeta Modified: test-suite/trunk/Makefile.programs Modified: test-suite/trunk/Makefile.programs URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/Makefile.programs?rev=83351&r1=83350&r2=83351&view=diff ============================================================================== --- test-suite/trunk/Makefile.programs (original) +++ test-suite/trunk/Makefile.programs Mon Oct 5 19:24:17 2009 @@ -249,7 +249,7 @@ #-new-coalescer-heuristic=true endif ifeq ($(ARCH),THUMB) -LLCBETAOPTION := -combiner-alias-analysis +LLCBETAOPTION := -combiner-alias-analysis -enable-frame-index-scavenging #--enable-thumb-reg-scavenging endif From jyasskin at google.com Mon Oct 5 19:35:55 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 06 Oct 2009 00:35:55 -0000 Subject: [llvm-commits] [llvm] r83353 - in /llvm/trunk: lib/ExecutionEngine/JIT/JITEmitter.cpp unittests/ExecutionEngine/JIT/JITTest.cpp Message-ID: <200910060035.n960Zuca025893@zion.cs.uiuc.edu> Author: jyasskin Date: Mon Oct 5 19:35:55 2009 New Revision: 83353 URL: http://llvm.org/viewvc/llvm-project?rev=83353&view=rev Log: Fix http://llvm.org/PR5116 by rolling back r60822. This passes `make unittests check-lit` on both x86-64 Linux and x86-32 Darwin. Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp?rev=83353&r1=83352&r2=83353&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Mon Oct 5 19:35:55 2009 @@ -644,7 +644,7 @@ // If we have already compiled the function, return a pointer to its body. Function *F = cast(V); void *ResultPtr; - if (!DoesntNeedStub && !TheJIT->isLazyCompilationDisabled()) { + if (!DoesntNeedStub) { // Return the function stub if it's already created. ResultPtr = Resolver.getFunctionStubIfAvailable(F); if (ResultPtr) Modified: llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp?rev=83353&r1=83352&r2=83353&view=diff ============================================================================== --- llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp (original) +++ llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Mon Oct 5 19:35:55 2009 @@ -22,6 +22,7 @@ #include "llvm/Module.h" #include "llvm/ModuleProvider.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/TypeBuilder.h" #include "llvm/Target/TargetSelect.h" #include "llvm/Type.h" @@ -44,6 +45,21 @@ return F; } +class JITTest : public testing::Test { + protected: + virtual void SetUp() { + M = new Module("
", Context); + std::string Error; + TheJIT.reset(EngineBuilder(M).setEngineKind(EngineKind::JIT) + .setErrorStr(&Error).create()); + ASSERT_TRUE(TheJIT.get() != NULL) << Error; + } + + LLVMContext Context; + Module *M; // Owned by ExecutionEngine. + OwningPtr TheJIT; +}; + // Regression test for a bug. The JIT used to allocate globals inside the same // memory block used for the function, and when the function code was freed, // the global was left in the same place. This test allocates a function @@ -115,6 +131,43 @@ EXPECT_EQ(3, *GPtr); } +int PlusOne(int arg) { + return arg + 1; +} + +TEST_F(JITTest, FarCallToKnownFunction) { + // x86-64 can only make direct calls to functions within 32 bits of + // the current PC. To call anything farther away, we have to load + // the address into a register and call through the register. The + // current JIT does this by allocating a stub for any far call. + // There was a bug in which the JIT tried to emit a direct call when + // the target was already in the JIT's global mappings and lazy + // compilation was disabled. + + Function *KnownFunction = Function::Create( + TypeBuilder::get(Context), + GlobalValue::ExternalLinkage, "known", M); + TheJIT->addGlobalMapping(KnownFunction, (void*)(intptr_t)PlusOne); + + // int test() { return known(7); } + Function *TestFunction = Function::Create( + TypeBuilder::get(Context), + GlobalValue::ExternalLinkage, "test", M); + BasicBlock *Entry = BasicBlock::Create(Context, "entry", TestFunction); + IRBuilder<> Builder(Entry); + Value *result = Builder.CreateCall( + KnownFunction, + ConstantInt::get(TypeBuilder::get(Context), 7)); + Builder.CreateRet(result); + + TheJIT->EnableDlsymStubs(false); + TheJIT->DisableLazyCompilation(); + int (*TestFunctionPtr)() = reinterpret_cast( + (intptr_t)TheJIT->getPointerToFunction(TestFunction)); + // This used to crash in trying to call PlusOne(). + EXPECT_EQ(8, TestFunctionPtr()); +} + // This code is copied from JITEventListenerTest, but it only runs once for all // the tests in this directory. Everything seems fine, but that's strange // behavior. From grosbach at apple.com Mon Oct 5 19:40:04 2009 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 06 Oct 2009 00:40:04 -0000 Subject: [llvm-commits] [test-suite] r83354 - /test-suite/trunk/Makefile.programs Message-ID: <200910060040.n960e4Ku026478@zion.cs.uiuc.edu> Author: grosbach Date: Mon Oct 5 19:40:04 2009 New Revision: 83354 URL: http://llvm.org/viewvc/llvm-project?rev=83354&view=rev Log: remove -combiner-alias-analysis from thumb llcbeta Modified: test-suite/trunk/Makefile.programs Modified: test-suite/trunk/Makefile.programs URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/Makefile.programs?rev=83354&r1=83353&r2=83354&view=diff ============================================================================== --- test-suite/trunk/Makefile.programs (original) +++ test-suite/trunk/Makefile.programs Mon Oct 5 19:40:04 2009 @@ -249,7 +249,8 @@ #-new-coalescer-heuristic=true endif ifeq ($(ARCH),THUMB) -LLCBETAOPTION := -combiner-alias-analysis -enable-frame-index-scavenging +LLCBETAOPTION := -enable-frame-index-scavenging +#-combiner-alias-analysis #--enable-thumb-reg-scavenging endif From dpatel at apple.com Mon Oct 5 20:26:37 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 01:26:37 -0000 Subject: [llvm-commits] [llvm] r83355 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h Message-ID: <200910060126.n961Qbig032263@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 20:26:37 2009 New Revision: 83355 URL: http://llvm.org/viewvc/llvm-project?rev=83355&view=rev Log: Add utility routine to collect variable debug info. This is not yet used. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83355&r1=83354&r2=83355&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 20:26:37 2009 @@ -1782,6 +1782,20 @@ DebugTimer->stopTimer(); } +/// CollectVariableInfo - Populate DbgScope entries with variables' info. +void DwarfDebug::CollectVariableInfo() { + if (!MMI) return; + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + MDNode *Var = VI->first; + DILocation VLoc(VI->second.first); + unsigned VSlot = VI->second.second; + DbgScope *Scope = getDbgScope(VLoc.getScope().getNode(), NULL); + Scope->AddVariable(new DbgVariable(DIVariable(Var), VSlot, false)); + } +} + /// ExtractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=83355&r1=83354&r2=83355&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Oct 5 20:26:37 2009 @@ -560,6 +560,9 @@ /// and collect DbgScopes. Return true, if atleast one scope was found. bool ExtractScopeInformation(MachineFunction *MF); + /// CollectVariableInfo - Populate DbgScope entries with variables' info. + void CollectVariableInfo(); + void SetDbgScopeLabels(const MachineInstr *MI, unsigned Label); }; From dpatel at apple.com Mon Oct 5 20:31:35 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 01:31:35 -0000 Subject: [llvm-commits] [llvm] r83356 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Message-ID: <200910060131.n961VZTT000444@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 20:31:35 2009 New Revision: 83356 URL: http://llvm.org/viewvc/llvm-project?rev=83356&view=rev Log: Remove unintentional function decl. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=83356&r1=83355&r2=83356&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Oct 5 20:31:35 2009 @@ -563,7 +563,6 @@ /// CollectVariableInfo - Populate DbgScope entries with variables' info. void CollectVariableInfo(); - void SetDbgScopeLabels(const MachineInstr *MI, unsigned Label); }; } // End of namespace llvm From kremenek at apple.com Mon Oct 5 20:42:26 2009 From: kremenek at apple.com (Ted Kremenek) Date: Tue, 06 Oct 2009 01:42:26 -0000 Subject: [llvm-commits] [llvm] r83359 - /llvm/tags/checker/checker-224/ Message-ID: <200910060142.n961gQDN001781@zion.cs.uiuc.edu> Author: kremenek Date: Mon Oct 5 20:42:26 2009 New Revision: 83359 URL: http://llvm.org/viewvc/llvm-project?rev=83359&view=rev Log: Tagging checker-224. Added: llvm/tags/checker/checker-224/ - copied from r83358, llvm/trunk/ From dpatel at apple.com Mon Oct 5 20:50:42 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 01:50:42 -0000 Subject: [llvm-commits] [llvm] r83361 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h Message-ID: <200910060150.n961ogCD002861@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 20:50:42 2009 New Revision: 83361 URL: http://llvm.org/viewvc/llvm-project?rev=83361&view=rev Log: Add utility routine to set begin and end labels for DbgScopes. This will be used by processDebugLoc(). Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83361&r1=83360&r2=83361&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 20:50:42 2009 @@ -1796,6 +1796,30 @@ } } +/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that +/// start with this machine instruction. +void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) { + InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); + if (I == DbgScopeBeginMap.end()) + return; + SmallVector &SD = I->second; + for (SmallVector::iterator SDI = SD.begin(), SDE = SD.end(); + SDI != SDE; ++SDI) + (*SDI)->setStartLabelID(Label); +} + +/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that +/// end with this machine instruction. +void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) { + InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); + if (I == DbgScopeBeginMap.end()) + return; + SmallVector &SD = I->second; + for (SmallVector::iterator SDI = SD.begin(), SDE = SD.end(); + SDI != SDE; ++SDI) + (*SDI)->setEndLabelID(Label); +} + /// ExtractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=83361&r1=83360&r2=83361&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Oct 5 20:50:42 2009 @@ -563,6 +563,13 @@ /// CollectVariableInfo - Populate DbgScope entries with variables' info. void CollectVariableInfo(); + /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that + /// start with this machine instruction. + void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label); + + /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that + /// end with this machine instruction. + void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label); }; } // End of namespace llvm From dpatel at apple.com Mon Oct 5 21:01:32 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 02:01:32 -0000 Subject: [llvm-commits] [llvm] r83362 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h Message-ID: <200910060201.n9621Wet004365@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 21:01:32 2009 New Revision: 83362 URL: http://llvm.org/viewvc/llvm-project?rev=83362&view=rev Log: Remove dead code. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83362&r1=83361&r2=83362&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 21:01:32 2009 @@ -1989,24 +1989,6 @@ /// RecordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. -unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - CompileUnit *Unit = CompileUnitMap[V]; - assert(Unit && "Unable to find CompileUnit"); - unsigned ID = MMI->NextLabelID(); - Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID)); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - -/// RecordSourceLine - Records location information and associates it with a -/// label. Returns a unique label ID used to generate a label and provide -/// correspondence to the source line list. unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, MDNode *S) { if (!MMI) Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=83362&r1=83361&r2=83362&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Oct 5 21:01:32 2009 @@ -519,11 +519,6 @@ /// RecordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. - unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col); - - /// RecordSourceLine - Records location information and associates it with a - /// label. Returns a unique label ID used to generate a label and provide - /// correspondence to the source line list. unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); /// getRecordSourceLineCount - Return the number of source lines in the debug From dpatel at apple.com Mon Oct 5 21:19:12 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 02:19:12 -0000 Subject: [llvm-commits] [llvm] r83363 - in /llvm/trunk: include/llvm/CodeGen/ lib/CodeGen/AsmPrinter/ lib/Target/ARM/ lib/Target/ARM/AsmPrinter/ lib/Target/Alpha/ lib/Target/Alpha/AsmPrinter/ lib/Target/Blackfin/AsmPrinter/ lib/Target/CellSPU/AsmPrinter/ lib/Target/MSP430/AsmPrinter/ lib/Target/Mips/AsmPrinter/ lib/Target/PIC16/AsmPrinter/ lib/Target/PowerPC/ lib/Target/PowerPC/AsmPrinter/ lib/Target/Sparc/AsmPrinter/ lib/Target/SystemZ/AsmPrinter/ lib/Target/X86/ lib/Target/X86/AsmPrinter/ lib/Target/XCore/AsmPrinter/ Message-ID: <200910060219.n962JCpR006698@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 21:19:11 2009 New Revision: 83363 URL: http://llvm.org/viewvc/llvm-project?rev=83363&view=rev Log: Update processDebugLoc() so that it can be used to process debug info before and after printing an instruction. Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp llvm/trunk/lib/Target/Alpha/AlphaCodeEmitter.cpp llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/AsmPrinter.h?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h (original) +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h Mon Oct 5 21:19:11 2009 @@ -141,7 +141,7 @@ mutable const Function *LastFn; mutable unsigned Counter; - // Private state for processDebugLock() + // Private state for processDebugLoc() mutable DebugLocTuple PrevDLT; protected: @@ -357,8 +357,8 @@ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); /// processDebugLoc - Processes the debug information of each machine - /// instruction's DebugLoc. - void processDebugLoc(const MachineInstr *MI); + /// instruction's DebugLoc. + void processDebugLoc(const MachineInstr *MI, bool BeforePrintingInsn); /// printInlineAsm - This method formats and prints the specified machine /// instruction that is an inline asm. Modified: llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h Mon Oct 5 21:19:11 2009 @@ -237,7 +237,7 @@ /// MachineInstruction. This is called before emitting any bytes associated /// with the instruction. Even if successive instructions have the same debug /// location, this method will be called for each one. - virtual void processDebugLoc(DebugLoc DL) {} + virtual void processDebugLoc(DebugLoc DL, bool BeforePrintintInsn) {} /// emitLabel - Emits a label virtual void emitLabel(uint64_t LabelID) = 0; Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -1353,18 +1353,20 @@ /// processDebugLoc - Processes the debug information of each machine /// instruction's DebugLoc. -void AsmPrinter::processDebugLoc(const MachineInstr *MI) { +void AsmPrinter::processDebugLoc(const MachineInstr *MI, + bool BeforePrintingInsn) { if (!MAI || !DW) return; DebugLoc DL = MI->getDebugLoc(); if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { if (!DL.isUnknown()) { DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); - - if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) { - printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, - CurDLT.CompileUnit)); - O << '\n'; + if (BeforePrintingInsn) { + if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) { + printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + CurDLT.CompileUnit)); + O << '\n'; + } } PrevDLT = CurDLT; Modified: llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMCodeEmitter.cpp Mon Oct 5 21:19:11 2009 @@ -346,7 +346,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) { DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); - MCE.processDebugLoc(MI.getDebugLoc()); + MCE.processDebugLoc(MI.getDebugLoc(), true); NumEmitted++; // Keep track of the # of mi's emitted switch (MI.getDesc().TSFlags & ARMII::FormMask) { @@ -409,6 +409,7 @@ emitMiscInstruction(MI); break; } + MCE.processDebugLoc(MI.getDebugLoc(), false); } template Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -1038,11 +1038,12 @@ }} // Call the autogenerated instruction printer routines. - processDebugLoc(MI); + processDebugLoc(MI, true); printInstruction(MI); if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; + processDebugLoc(MI, false); } void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { Modified: llvm/trunk/lib/Target/Alpha/AlphaCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaCodeEmitter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/Alpha/AlphaCodeEmitter.cpp Mon Oct 5 21:19:11 2009 @@ -116,7 +116,7 @@ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { const MachineInstr &MI = *I; - MCE.processDebugLoc(MI.getDebugLoc()); + MCE.processDebugLoc(MI.getDebugLoc(), true); switch(MI.getOpcode()) { default: MCE.emitWordLE(getBinaryCodeForInstr(*I)); @@ -128,6 +128,7 @@ case TargetInstrInfo::KILL: break; //skip these } + MCE.processDebugLoc(MI.getDebugLoc(), false); } } Modified: llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -177,13 +177,13 @@ II != E; ++II) { // Print the assembly for the instruction. ++EmittedInsts; - processDebugLoc(II); - + processDebugLoc(II, true); printInstruction(II); if (VerboseAsm && !II->getDebugLoc().isUnknown()) EmitComments(*II); O << '\n'; + processDebugLoc(II, false); } } Modified: llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -146,13 +146,14 @@ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { // Print the assembly for the instruction. - processDebugLoc(II); + processDebugLoc(II, true); printInstruction(II); if (VerboseAsm && !II->getDebugLoc().isUnknown()) EmitComments(*II); O << '\n'; + processDebugLoc(II, false); ++EmittedInsts; } } Modified: llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -405,11 +405,11 @@ /// void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - processDebugLoc(MI); + processDebugLoc(MI, true); printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); + processDebugLoc(MI, false); O << '\n'; } Modified: llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -148,7 +148,7 @@ void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - processDebugLoc(MI); + processDebugLoc(MI, true); // Call the autogenerated instruction printer routines. printInstruction(MI); @@ -156,6 +156,8 @@ if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; + + processDebugLoc(MI, false); } void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, Modified: llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -278,7 +278,7 @@ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { - processDebugLoc(II); + processDebugLoc(II, true); // Print the assembly for the instruction. printInstruction(II); @@ -286,7 +286,8 @@ if (VerboseAsm && !II->getDebugLoc().isUnknown()) EmitComments(*II); O << '\n'; - + + processDebugLoc(II, false); ++EmittedInsts; } Modified: llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -43,13 +43,12 @@ } bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { - processDebugLoc(MI); - + processDebugLoc(MI, true); printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; + processDebugLoc(MI, false); return true; } Modified: llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -545,7 +545,7 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - processDebugLoc(MI); + processDebugLoc(MI, true); // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { @@ -595,6 +595,8 @@ if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; + + processDebugLoc(MI, false); } /// runOnMachineFunction - This uses the printMachineInstruction() Modified: llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCCodeEmitter.cpp Mon Oct 5 21:19:11 2009 @@ -132,7 +132,7 @@ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ const MachineInstr &MI = *I; - MCE.processDebugLoc(MI.getDebugLoc()); + MCE.processDebugLoc(MI.getDebugLoc(), true); switch (MI.getOpcode()) { default: MCE.emitWordBE(getBinaryCodeForInstr(MI)); @@ -151,6 +151,7 @@ MCE.emitWordBE(0x48000005); // bl 1 break; } + MCE.processDebugLoc(MI.getDebugLoc(), false); } } Modified: llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -124,13 +124,13 @@ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { // Print the assembly for the instruction. - processDebugLoc(II); + processDebugLoc(II, true); printInstruction(II); if (VerboseAsm && !II->getDebugLoc().isUnknown()) EmitComments(*II); O << '\n'; - + processDebugLoc(II, false); ++EmittedInsts; } } Modified: llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -156,7 +156,7 @@ void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - processDebugLoc(MI); + processDebugLoc(MI, true); // Call the autogenerated instruction printer routines. printInstruction(MI); @@ -164,6 +164,8 @@ if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; + + processDebugLoc(MI, false); } void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){ Modified: llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -653,13 +653,15 @@ void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - processDebugLoc(MI); + processDebugLoc(MI, true); printInstructionThroughMCStreamer(MI); if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; + + processDebugLoc(MI, false); } void X86AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { Modified: llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp (original) +++ llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp Mon Oct 5 21:19:11 2009 @@ -481,7 +481,7 @@ const TargetInstrDesc *Desc) { DEBUG(errs() << MI); - MCE.processDebugLoc(MI.getDebugLoc()); + MCE.processDebugLoc(MI.getDebugLoc(), true); unsigned Opcode = Desc->Opcode; @@ -859,6 +859,8 @@ #endif llvm_unreachable(0); } + + MCE.processDebugLoc(MI.getDebugLoc(), false); } // Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. Modified: llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp?rev=83363&r1=83362&r2=83363&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp Mon Oct 5 21:19:11 2009 @@ -352,7 +352,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - processDebugLoc(MI); + processDebugLoc(MI, true); // Check for mov mnemonic unsigned src, dst, srcSR, dstSR; @@ -365,6 +365,8 @@ if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; + + processDebugLoc(MI, false); } // Force static initialization. From dpatel at apple.com Mon Oct 5 22:04:59 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 03:04:59 -0000 Subject: [llvm-commits] [llvm] r83364 - /llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Message-ID: <200910060304.n9634xi3012449@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 22:04:58 2009 New Revision: 83364 URL: http://llvm.org/viewvc/llvm-project?rev=83364&view=rev Log: Update processDebugLoc() to handle requests to process debug info, before and after emitting instructions. Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp?rev=83364&r1=83363&r2=83364&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Mon Oct 5 22:04:58 2009 @@ -597,7 +597,7 @@ /// MachineRelocations that reference external functions by name. const StringMap &getExternalFnStubs() const { return ExtFnStubs; } - virtual void processDebugLoc(DebugLoc DL); + virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn); virtual void emitLabel(uint64_t LabelID) { if (LabelLocations.size() <= LabelID) @@ -708,18 +708,20 @@ FnRefs.insert(CurFn); } -void JITEmitter::processDebugLoc(DebugLoc DL) { +void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { if (!DL.isUnknown()) { DebugLocTuple CurDLT = EmissionDetails.MF->getDebugLocTuple(DL); - if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) { - JITEvent_EmittedFunctionDetails::LineStart NextLine; - NextLine.Address = getCurrentPCValue(); - NextLine.Loc = DL; - EmissionDetails.LineStarts.push_back(NextLine); + if (BeforePrintingInsn) { + if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) { + JITEvent_EmittedFunctionDetails::LineStart NextLine; + NextLine.Address = getCurrentPCValue(); + NextLine.Loc = DL; + EmissionDetails.LineStarts.push_back(NextLine); + } + + PrevDLT = CurDLT; } - - PrevDLT = CurDLT; } } From kremenek at apple.com Mon Oct 5 22:09:51 2009 From: kremenek at apple.com (Ted Kremenek) Date: Tue, 06 Oct 2009 03:09:51 -0000 Subject: [llvm-commits] [llvm] r83365 - /llvm/tags/checker/checker-224/ Message-ID: <200910060309.n9639p6R013198@zion.cs.uiuc.edu> Author: kremenek Date: Mon Oct 5 22:09:51 2009 New Revision: 83365 URL: http://llvm.org/viewvc/llvm-project?rev=83365&view=rev Log: Removing checker-224. Removed: llvm/tags/checker/checker-224/ From dpatel at apple.com Mon Oct 5 22:15:39 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 03:15:39 -0000 Subject: [llvm-commits] [llvm] r83367 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Message-ID: <200910060315.n963Fd3D013924@zion.cs.uiuc.edu> Author: dpatel Date: Mon Oct 5 22:15:38 2009 New Revision: 83367 URL: http://llvm.org/viewvc/llvm-project?rev=83367&view=rev Log: Fix cut-n-pasto. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83367&r1=83366&r2=83367&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Oct 5 22:15:38 2009 @@ -1812,7 +1812,7 @@ /// end with this machine instruction. void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) { InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); - if (I == DbgScopeBeginMap.end()) + if (I == DbgScopeEndMap.end()) return; SmallVector &SD = I->second; for (SmallVector::iterator SDI = SD.begin(), SDE = SD.end(); From kremenek at apple.com Mon Oct 5 22:54:52 2009 From: kremenek at apple.com (Ted Kremenek) Date: Tue, 06 Oct 2009 03:54:52 -0000 Subject: [llvm-commits] [llvm] r83370 - /llvm/tags/checker/checker-224/ Message-ID: <200910060354.n963sqPL018959@zion.cs.uiuc.edu> Author: kremenek Date: Mon Oct 5 22:54:52 2009 New Revision: 83370 URL: http://llvm.org/viewvc/llvm-project?rev=83370&view=rev Log: Tagging checker-224. Added: llvm/tags/checker/checker-224/ - copied from r83369, llvm/trunk/ From viridia at gmail.com Tue Oct 6 00:48:03 2009 From: viridia at gmail.com (Talin) Date: Mon, 5 Oct 2009 22:48:03 -0700 Subject: [llvm-commits] PATCH: Additional DebugFactory methods Message-ID: The attached patch adds some additional convenience methods to DebugFactory. Specifically, methods that take a uint64 for size, alignment or offset, can now also take a Constant *. This is very useful for target-independent frontends which can use things like ConstantExpr::getSizeof(). I've been using this for about a week in my own code and it seems to work fine, -- -- Talin -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091005/4955672b/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: debug_constants.patch Type: application/octet-stream Size: 7711 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091005/4955672b/attachment.obj From baldrick at free.fr Tue Oct 6 01:49:39 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 08:49:39 +0200 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> Message-ID: <4ACAE883.7000804@free.fr> Hi Torok, > + // Check malloc prototype. > + // FIXME: this will be obsolete when nobuiltin attribute will exist. why would it be obsolete then? If LLVM sees "malloc" with a funky prototype it will always need to bail out, no? Ciao, Duncan. From clattner at apple.com Tue Oct 6 01:53:49 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 5 Oct 2009 23:53:49 -0700 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: <4ACAE883.7000804@free.fr> References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> <4ACAE883.7000804@free.fr> Message-ID: On Oct 5, 2009, at 11:49 PM, Duncan Sands wrote: > Hi Torok, > >> + // Check malloc prototype. >> + // FIXME: this will be obsolete when nobuiltin attribute will >> exist. > > why would it be obsolete then? If LLVM sees "malloc" with a funky > prototype it will always need to bail out, no? Why would it in this case? Though it doesn't make a lot of sense, it wouldn't cause the program to crash, and the code would have undefined behavior. -Chris From baldrick at free.fr Tue Oct 6 02:12:30 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 09:12:30 +0200 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> <4ACAE883.7000804@free.fr> Message-ID: <4ACAEDDE.8000204@free.fr> Hi Chris, >>> + // Check malloc prototype. >>> + // FIXME: this will be obsolete when nobuiltin attribute will exist. >> >> why would it be obsolete then? If LLVM sees "malloc" with a funky >> prototype it will always need to bail out, no? > > Why would it in this case? Though it doesn't make a lot of sense, it > wouldn't cause the program to crash, and the code would have undefined > behavior. if the standard malloc for some operating system has an unusual prototype, for example it returns more info than usual malloc, then it is still a builtin, just a builtin that LLVM doesn't understand yet. I don't see why it shouldn't be marked with a "builtin" attribute. A "builtin" attribute presumably wouldn't be for marking functions that LLVM knows about, but for marking standard functions. LLVM may or may not know about any particular function or particular standard. That said, I appreciate that there are limits to this line of reasoning. Ciao, Duncan. From edwintorok at gmail.com Tue Oct 6 02:13:20 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Tue, 06 Oct 2009 10:13:20 +0300 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: <4495EE9D-3ED6-465D-8F49-81BB677765A3@apple.com> References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> <4495EE9D-3ED6-465D-8F49-81BB677765A3@apple.com> Message-ID: <4ACAEE10.70801@gmail.com> On 2009-10-06 00:28, Chris Lattner wrote: > > On Oct 5, 2009, at 2:15 PM, Torok Edwin wrote: > >> Author: edwin >> Date: Mon Oct 5 16:15:43 2009 >> New Revision: 83324 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83324&view=rev >> Log: >> Don't treat malloc calls with non-matching prototype as malloc. >> Fixes second part of PR5130, miscompilation in FreeBSD kernel, where >> malloc takes 3 params, >> and *does* initialize memory. > > Thanks Edwin, please mention the PR in the FIXME so we can remember > what this is working around. Done. Best regards, --Edwin From edwintorok at gmail.com Tue Oct 6 02:16:30 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Tue, 06 Oct 2009 10:16:30 +0300 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> <4ACAE883.7000804@free.fr> Message-ID: <4ACAEECE.5030003@gmail.com> On 2009-10-06 09:53, Chris Lattner wrote: > > On Oct 5, 2009, at 11:49 PM, Duncan Sands wrote: > >> Hi Torok, >> >>> + // Check malloc prototype. >>> + // FIXME: this will be obsolete when nobuiltin attribute will exist. >> >> why would it be obsolete then? If LLVM sees "malloc" with a funky >> prototype it will always need to bail out, no? > > Why would it in this case? Though it doesn't make a lot of sense, it > wouldn't cause the program to crash, and the code would have undefined > behavior. Once the LLVM optimizers/backends can emit warnings with line numbers, I think we should emit a warning if we see something obviously wrong being done, like using malloc with a wrong proto without using -ffreestanding/nobuiltins, or calling a function with an obviously wrong calling convention, etc. Now that we have debug info when optimizing too, I think this is doable, maybe those warnings should be default off, and have a hook, so clang can print it as a diagnostic, etc. In fact I think we should emit a warning when we see a definition for malloc/some other special function without the nobuiltin attribute, or with a mismatching proto. What do you think? Best regards, --Edwin From baldrick at free.fr Tue Oct 6 03:36:40 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 08:36:40 -0000 Subject: [llvm-commits] [gcc-plugin] r83374 - /gcc-plugin/trunk/llvm-backend.cpp Message-ID: <200910060836.n968afHR002360@zion.cs.uiuc.edu> Author: baldrick Date: Tue Oct 6 03:36:40 2009 New Revision: 83374 URL: http://llvm.org/viewvc/llvm-project?rev=83374&view=rev Log: Map GCC's -fverbose-asm to --asm-verbose, rather than mapping -dA as now, since this seems more logical. Modified: gcc-plugin/trunk/llvm-backend.cpp Modified: gcc-plugin/trunk/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-backend.cpp?rev=83374&r1=83373&r2=83374&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-backend.cpp (original) +++ gcc-plugin/trunk/llvm-backend.cpp Tue Oct 6 03:36:40 2009 @@ -329,7 +329,7 @@ Args.push_back("--disable-fp-elim"); if (!flag_zero_initialized_in_bss) Args.push_back("--nozero-initialized-in-bss"); - if (flag_debug_asm) + if (flag_verbose_asm) Args.push_back("--asm-verbose"); //TODO if (flag_debug_pass_structure) //TODO Args.push_back("--debug-pass=Structure"); From baldrick at free.fr Tue Oct 6 03:40:03 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 08:40:03 -0000 Subject: [llvm-commits] [gcc-plugin] r83375 - /gcc-plugin/trunk/llvm-convert.cpp Message-ID: <200910060840.n968e3u9002787@zion.cs.uiuc.edu> Author: baldrick Date: Tue Oct 6 03:40:03 2009 New Revision: 83375 URL: http://llvm.org/viewvc/llvm-project?rev=83375&view=rev Log: Only give basic blocks the GCC names if compiling with -fverbose-asm. These verbose names take up a lot of space in the bitcode. Modified: gcc-plugin/trunk/llvm-convert.cpp Modified: gcc-plugin/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-convert.cpp?rev=83375&r1=83374&r2=83375&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-convert.cpp (original) +++ gcc-plugin/trunk/llvm-convert.cpp Tue Oct 6 03:40:03 2009 @@ -888,27 +888,32 @@ // while generating code must be nameless. That way, artificial blocks // can be easily identified. - // Give the basic block a name. If BB contains labels, name the LLVM basic - // block after the first label. - gimple stmt = first_stmt(bb); - if (stmt && gimple_code(stmt) == GIMPLE_LABEL) { - tree label = gimple_label_label(stmt); - if (tree name = DECL_NAME(label)) { - // If the label has a name then use it. - BB->setName(IDENTIFIER_POINTER(name)); - } else if (LABEL_DECL_UID(label) != -1) { - // If the label has a UID then use it. - Twine UID(LABEL_DECL_UID(label)); - BB->setName(""); + // Give the basic block a name. If the user specified -fverbose-asm then + // use the same naming scheme as GCC. + if (flag_verbose_asm) { + // If BB contains labels, name the LLVM basic block after the first label. + gimple stmt = first_stmt(bb); + if (stmt && gimple_code(stmt) == GIMPLE_LABEL) { + tree label = gimple_label_label(stmt); + if (tree name = DECL_NAME(label)) { + // If the label has a name then use it. + BB->setName(IDENTIFIER_POINTER(name)); + } else if (LABEL_DECL_UID(label) != -1) { + // If the label has a UID then use it. + Twine UID(LABEL_DECL_UID(label)); + BB->setName(""); + } else { + // Otherwise use the generic UID. + Twine UID(DECL_UID(label)); + BB->setName(""); + } } else { - // Otherwise use the generic UID. - Twine UID(DECL_UID(label)); - BB->setName(""); + // When there is no label, use the same naming scheme as the GCC tree dumps. + Twine Index(bb->index); + BB->setName(""); } } else { - // When there is no label, use the same naming scheme as the GCC tree dumps. - Twine Index(bb->index); - BB->setName(""); + BB->setName("bb"); } return BasicBlocks[bb] = BB; From baldrick at free.fr Tue Oct 6 04:07:13 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 09:07:13 -0000 Subject: [llvm-commits] [gcc-plugin] r83376 - /gcc-plugin/trunk/README Message-ID: <200910060907.n9697Dlj006726@zion.cs.uiuc.edu> Author: baldrick Date: Tue Oct 6 04:07:12 2009 New Revision: 83376 URL: http://llvm.org/viewvc/llvm-project?rev=83376&view=rev Log: Document some command line options that are helpful when working with the plugin. Modified: gcc-plugin/trunk/README Modified: gcc-plugin/trunk/README URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/README?rev=83376&r1=83375&r2=83376&view=diff ============================================================================== --- gcc-plugin/trunk/README (original) +++ gcc-plugin/trunk/README Tue Oct 6 04:07:12 2009 @@ -50,3 +50,41 @@ -fplugin-arg-llvm-enable-gcc-optzns Search for the comment "Process any plugin arguments" in llvm-backend.cpp, the argument parsing code is just after this. + +------------------ +- USEFUL OPTIONS - +------------------ + +-fplugin-arg-llvm-emit-ir + Output LLVM IR rather than target assembler. You need to use -S with this, + since otherwise GCC will pass the output to the system assembler (these don't + usually understand LLVM IR). It would be nice to fix this and have the option + work with -c too but it's not clear how. + +-fstats + Output both LLVM and GCC statistics. + +-ftime-report + Output both LLVM and GCC timing information. + +-fverbose-asm + Annotate the target assembler with helpful comments. Gives values helpful + names in the LLVM IR. + +-fno-ident + If the ident global asm in the LLVM IR annoys you, use this to turn it off. + +-fdump-rtl-all + In the dump file, each function is output both as gimple and as LLVM IR. + +-fplugin-arg-llvm-disable-llvm-optzns + Do not perform any LLVM IR optimizations even if compiling at -O1, -O2 etc. + +-fplugin-arg-llvm-enable-gcc-optzns + Run the GCC tree optimizers as well as the LLVM IR optimizers. Normally the + GCC optimizers are disabled. + +-fplugin-arg-llvm-save-gcc-output + GCC assembler output is normally redirected to /dev/null so that it doesn't + clash with the LLVM output. This option causes GCC output to be written to + a file instead. Good for seeing which GCC output we've failed to turn off. From grosbach at apple.com Tue Oct 6 10:03:45 2009 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 06 Oct 2009 15:03:45 -0000 Subject: [llvm-commits] [llvm] r83378 - /llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Message-ID: <200910061503.n96F3jAt021094@zion.cs.uiuc.edu> Author: grosbach Date: Tue Oct 6 10:03:44 2009 New Revision: 83378 URL: http://llvm.org/viewvc/llvm-project?rev=83378&view=rev Log: grammar Modified: llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Modified: llvm/trunk/lib/CodeGen/RegisterScavenging.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterScavenging.cpp?rev=83378&r1=83377&r2=83378&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegisterScavenging.cpp (original) +++ llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Tue Oct 6 10:03:44 2009 @@ -303,7 +303,7 @@ if (!TRI->saveScavengerRegister(*MBB, I, RC, SReg)) { // Spill the scavenged register before I. assert(ScavengingFrameIndex >= 0 && - "Cannot scavenging register without an emergency spill slot!"); + "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); MachineBasicBlock::iterator II = prior(I); TRI->eliminateFrameIndex(II, SPAdj, this); From baldrick at free.fr Tue Oct 6 10:40:36 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 15:40:36 -0000 Subject: [llvm-commits] [llvm] r83379 - in /llvm/trunk: include/llvm/ lib/Analysis/ lib/CodeGen/ lib/ExecutionEngine/ lib/Transforms/IPO/ lib/Transforms/Instrumentation/ lib/Transforms/Scalar/ lib/Transforms/Utils/ lib/VMCore/ tools/bugpoint/ unittests/Support/ unittests/Transforms/Utils/ Message-ID: <200910061540.n96FebGQ026156@zion.cs.uiuc.edu> Author: baldrick Date: Tue Oct 6 10:40:36 2009 New Revision: 83379 URL: http://llvm.org/viewvc/llvm-project?rev=83379&view=rev Log: Introduce and use convenience methods for getting pointer types where the element is of a basic builtin type. For example, to get an i8* use getInt8PtrTy. Modified: llvm/trunk/include/llvm/Type.h llvm/trunk/lib/Analysis/PointerTracking.cpp llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp llvm/trunk/lib/CodeGen/DwarfEHPrepare.cpp llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp llvm/trunk/lib/CodeGen/PseudoSourceValue.cpp llvm/trunk/lib/CodeGen/ShadowStackGC.cpp llvm/trunk/lib/CodeGen/SjLjEHPrepare.cpp llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp llvm/trunk/lib/Transforms/IPO/ExtractGV.cpp llvm/trunk/lib/Transforms/IPO/LowerSetJmp.cpp llvm/trunk/lib/Transforms/IPO/RaiseAllocations.cpp llvm/trunk/lib/Transforms/Instrumentation/ProfilingUtils.cpp llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp llvm/trunk/lib/Transforms/Utils/LowerAllocations.cpp llvm/trunk/lib/Transforms/Utils/LowerInvoke.cpp llvm/trunk/lib/VMCore/AutoUpgrade.cpp llvm/trunk/lib/VMCore/Instructions.cpp llvm/trunk/lib/VMCore/Type.cpp llvm/trunk/tools/bugpoint/Miscompilation.cpp llvm/trunk/unittests/Support/TypeBuilderTest.cpp llvm/trunk/unittests/Transforms/Utils/Cloning.cpp Modified: llvm/trunk/include/llvm/Type.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Type.h?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/include/llvm/Type.h (original) +++ llvm/trunk/include/llvm/Type.h Tue Oct 6 10:40:36 2009 @@ -381,6 +381,21 @@ static const IntegerType *getInt32Ty(LLVMContext &C); static const IntegerType *getInt64Ty(LLVMContext &C); + //===--------------------------------------------------------------------===// + // Convenience methods for getting pointer types with one of the above builtin + // types as pointee. + // + static const PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0); + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Type *) { return true; } Modified: llvm/trunk/lib/Analysis/PointerTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/PointerTracking.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/PointerTracking.cpp (original) +++ llvm/trunk/lib/Analysis/PointerTracking.cpp Tue Oct 6 10:40:36 2009 @@ -48,7 +48,7 @@ } bool PointerTracking::doInitialization(Module &M) { - const Type *PTy = PointerType::getUnqual(Type::getInt8Ty(M.getContext())); + const Type *PTy = Type::getInt8PtrTy(M.getContext()); // Find calloc(i64, i64) or calloc(i32, i32). callocFunc = M.getFunction("calloc"); Modified: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp Tue Oct 6 10:40:36 2009 @@ -464,7 +464,7 @@ if (!AnyNonZeroIndices) { // Cast the base to i8*. V = InsertNoopCastOfTo(V, - Type::getInt8Ty(Ty->getContext())->getPointerTo(PTy->getAddressSpace())); + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); Modified: llvm/trunk/lib/CodeGen/DwarfEHPrepare.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/DwarfEHPrepare.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/DwarfEHPrepare.cpp (original) +++ llvm/trunk/lib/CodeGen/DwarfEHPrepare.cpp Tue Oct 6 10:40:36 2009 @@ -236,7 +236,7 @@ if (!RewindFunction) { LLVMContext &Ctx = UnwindInsts[0]->getContext(); std::vector - Params(1, PointerType::getUnqual(Type::getInt8Ty(Ctx))); + Params(1, Type::getInt8PtrTy(Ctx)); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Params, false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); Modified: llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp (original) +++ llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp Tue Oct 6 10:40:36 2009 @@ -103,22 +103,22 @@ break; case Intrinsic::memcpy: M.getOrInsertFunction("memcpy", - PointerType::getUnqual(Type::getInt8Ty(Context)), - PointerType::getUnqual(Type::getInt8Ty(Context)), - PointerType::getUnqual(Type::getInt8Ty(Context)), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memmove: M.getOrInsertFunction("memmove", - PointerType::getUnqual(Type::getInt8Ty(Context)), - PointerType::getUnqual(Type::getInt8Ty(Context)), - PointerType::getUnqual(Type::getInt8Ty(Context)), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memset: M.getOrInsertFunction("memset", - PointerType::getUnqual(Type::getInt8Ty(Context)), - PointerType::getUnqual(Type::getInt8Ty(Context)), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), Type::getInt32Ty(M.getContext()), TD.getIntPtrType(Context), (Type *)0); break; Modified: llvm/trunk/lib/CodeGen/PseudoSourceValue.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PseudoSourceValue.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PseudoSourceValue.cpp (original) +++ llvm/trunk/lib/CodeGen/PseudoSourceValue.cpp Tue Oct 6 10:40:36 2009 @@ -44,7 +44,7 @@ // static. For now, we can safely use the global context for the time being to // squeak by. PseudoSourceValue::PseudoSourceValue() : - Value(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext())), + Value(Type::getInt8PtrTy(getGlobalContext()), PseudoSourceValueVal) {} void PseudoSourceValue::printCustom(raw_ostream &O) const { Modified: llvm/trunk/lib/CodeGen/ShadowStackGC.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ShadowStackGC.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/ShadowStackGC.cpp (original) +++ llvm/trunk/lib/CodeGen/ShadowStackGC.cpp Tue Oct 6 10:40:36 2009 @@ -189,7 +189,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { // doInitialization creates the abstract type of this value. - Type *VoidPtr = PointerType::getUnqual(Type::getInt8Ty(F.getContext())); + const Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); // Truncate the ShadowStackDescriptor if some metadata is null. unsigned NumMeta = 0; Modified: llvm/trunk/lib/CodeGen/SjLjEHPrepare.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SjLjEHPrepare.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SjLjEHPrepare.cpp (original) +++ llvm/trunk/lib/CodeGen/SjLjEHPrepare.cpp Tue Oct 6 10:40:36 2009 @@ -88,7 +88,7 @@ // Build the function context structure. // builtin_setjmp uses a five word jbuf const Type *VoidPtrTy = - PointerType::getUnqual(Type::getInt8Ty(M.getContext())); + Type::getInt8PtrTy(M.getContext()); const Type *Int32Ty = Type::getInt32Ty(M.getContext()); FunctionContextTy = StructType::get(M.getContext(), @@ -378,7 +378,7 @@ // the instruction hasn't already been removed. if (!I->getParent()) continue; Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); - Type *Ty = PointerType::getUnqual(Type::getInt8Ty(F.getContext())); + const Type *Ty = Type::getInt8PtrTy(F.getContext()); Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); I->replaceAllUsesWith(Val); @@ -455,8 +455,8 @@ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf Value *SetjmpArg = CastInst::Create(Instruction::BitCast, FieldPtr, - Type::getInt8Ty(F.getContext())->getPointerTo(), "", - EntryBB->getTerminator()); + Type::getInt8PtrTy(F.getContext()), "", + EntryBB->getTerminator()); Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "dispatch", EntryBB->getTerminator()); Modified: llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp Tue Oct 6 10:40:36 2009 @@ -243,7 +243,7 @@ char *Result = new char[(InputArgv.size()+1)*PtrSize]; DEBUG(errs() << "JIT: ARGV = " << (void*)Result << "\n"); - const Type *SBytePtr = PointerType::getUnqual(Type::getInt8Ty(C)); + const Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; Modified: llvm/trunk/lib/Transforms/IPO/ExtractGV.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/ExtractGV.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/ExtractGV.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/ExtractGV.cpp Tue Oct 6 10:40:36 2009 @@ -102,7 +102,7 @@ { std::vector AUGs; const Type *SBP= - PointerType::getUnqual(Type::getInt8Ty(M.getContext())); + Type::getInt8PtrTy(M.getContext()); for (std::vector::iterator GI = Named.begin(), GE = Named.end(); GI != GE; ++GI) { (*GI)->setLinkage(GlobalValue::ExternalLinkage); Modified: llvm/trunk/lib/Transforms/IPO/LowerSetJmp.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/LowerSetJmp.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/LowerSetJmp.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/LowerSetJmp.cpp Tue Oct 6 10:40:36 2009 @@ -201,7 +201,7 @@ // This function is always successful, unless it isn't. bool LowerSetJmp::doInitialization(Module& M) { - const Type *SBPTy = PointerType::getUnqual(Type::getInt8Ty(M.getContext())); + const Type *SBPTy = Type::getInt8PtrTy(M.getContext()); const Type *SBPPTy = PointerType::getUnqual(SBPTy); // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for @@ -266,7 +266,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) { const Type* SBPTy = - PointerType::getUnqual(Type::getInt8Ty(Inst->getContext())); + Type::getInt8PtrTy(Inst->getContext()); // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the // same parameters as "longjmp", except that the buffer is cast to a @@ -319,7 +319,7 @@ // Fill in the alloca and call to initialize the SJ map. const Type *SBPTy = - PointerType::getUnqual(Type::getInt8Ty(Func->getContext())); + Type::getInt8PtrTy(Func->getContext()); AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); CallInst::Create(InitSJMap, Map, "", Inst); return SJMap[Func] = Map; @@ -389,7 +389,7 @@ // Add this setjmp to the setjmp map. const Type* SBPTy = - PointerType::getUnqual(Type::getInt8Ty(Inst->getContext())); + Type::getInt8PtrTy(Inst->getContext()); CastInst* BufPtr = new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); std::vector Args = Modified: llvm/trunk/lib/Transforms/IPO/RaiseAllocations.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/RaiseAllocations.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/RaiseAllocations.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/RaiseAllocations.cpp Tue Oct 6 10:40:36 2009 @@ -77,7 +77,7 @@ // Get the expected prototype for malloc const FunctionType *Malloc1Type = - FunctionType::get(PointerType::getUnqual(Type::getInt8Ty(M.getContext())), + FunctionType::get(Type::getInt8PtrTy(M.getContext()), std::vector(1, Type::getInt64Ty(M.getContext())), false); @@ -229,7 +229,7 @@ Value *Source = *CS.arg_begin(); if (!isa(Source->getType())) Source = new IntToPtrInst(Source, - PointerType::getUnqual(Type::getInt8Ty(M.getContext())), + Type::getInt8PtrTy(M.getContext()), "FreePtrCast", I); new FreeInst(Source, I); Modified: llvm/trunk/lib/Transforms/Instrumentation/ProfilingUtils.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/ProfilingUtils.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/ProfilingUtils.cpp (original) +++ llvm/trunk/lib/Transforms/Instrumentation/ProfilingUtils.cpp Tue Oct 6 10:40:36 2009 @@ -25,9 +25,9 @@ GlobalValue *Array) { LLVMContext &Context = MainFn->getContext(); const Type *ArgVTy = - PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(Context))); + PointerType::getUnqual(Type::getInt8PtrTy(Context)); const PointerType *UIntPtr = - PointerType::getUnqual(Type::getInt32Ty(Context)); + Type::getInt32PtrTy(Context); Module &M = *MainFn->getParent(); Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), Type::getInt32Ty(Context), Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Tue Oct 6 10:40:36 2009 @@ -9947,7 +9947,7 @@ // If the call and callee calling conventions don't match, this call must // be unreachable, as the call is undefined. new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(PointerType::getUnqual(Type::getInt1Ty(*Context))), + UndefValue::get(Type::getInt1PtrTy(*Context)), OldCall); if (!OldCall->use_empty()) OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); @@ -9961,7 +9961,7 @@ // undef so that we know that this code is not reachable, despite the fact // that we can't modify the CFG here. new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(PointerType::getUnqual(Type::getInt1Ty(*Context))), + UndefValue::get(Type::getInt1PtrTy(*Context)), CS.getInstruction()); if (!CS.getInstruction()->use_empty()) @@ -11235,7 +11235,7 @@ if (isa(Op)) { // Insert a new store to null because we cannot modify the CFG here. new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(PointerType::getUnqual(Type::getInt1Ty(*Context))), &FI); + UndefValue::get(Type::getInt1PtrTy(*Context)), &FI); return EraseInstFromFunction(FI); } Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Tue Oct 6 10:40:36 2009 @@ -443,7 +443,7 @@ StartPtr = Range.StartPtr; // Cast the start ptr to be i8* as memset requires. - const Type *i8Ptr = PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *i8Ptr = Type::getInt8PtrTy(Context); if (StartPtr->getType() != i8Ptr) StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Tue Oct 6 10:40:36 2009 @@ -124,7 +124,7 @@ /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { return - B.CreateBitCast(V, PointerType::getUnqual(Type::getInt8Ty(*Context)), "cstr"); + B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -138,7 +138,7 @@ Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), TD->getIntPtrType(*Context), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast(StrLen->stripPointerCasts())) @@ -169,8 +169,8 @@ AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), - PointerType::getUnqual(Type::getInt8Ty(*Context)), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), Type::getInt32Ty(*Context), TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -193,8 +193,8 @@ Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), Type::getInt32Ty(*Context), - PointerType::getUnqual(Type::getInt8Ty(*Context)), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -273,7 +273,7 @@ Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), Type::getInt32Ty(*Context), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast(PutS->stripPointerCasts())) @@ -313,11 +313,11 @@ Constant *F; if (isa(File->getType())) F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::getInt32Ty(*Context), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), File->getType(), NULL); else F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); @@ -338,12 +338,12 @@ if (isa(File->getType())) F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), TD->getIntPtrType(*Context), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), TD->getIntPtrType(*Context), TD->getIntPtrType(*Context), File->getType(), NULL); else F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), - PointerType::getUnqual(Type::getInt8Ty(*Context)), + Type::getInt8PtrTy(*Context), TD->getIntPtrType(*Context), TD->getIntPtrType(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, @@ -510,7 +510,7 @@ // Verify the "strcat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != PointerType::getUnqual(Type::getInt8Ty(*Context)) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) return 0; @@ -560,7 +560,7 @@ // Verify the "strncat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - FT->getReturnType() != PointerType::getUnqual(Type::getInt8Ty(*Context)) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || !isa(FT->getParamType(2))) @@ -608,7 +608,7 @@ // Verify the "strchr" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != PointerType::getUnqual(Type::getInt8Ty(*Context)) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType()) return 0; @@ -666,7 +666,7 @@ const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != PointerType::getUnqual(Type::getInt8Ty(*Context))) + FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); @@ -713,7 +713,7 @@ const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != PointerType::getUnqual(Type::getInt8Ty(*Context)) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getParamType(2))) return 0; @@ -759,7 +759,7 @@ const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != PointerType::getUnqual(Type::getInt8Ty(*Context))) + FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); @@ -789,7 +789,7 @@ const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != PointerType::getUnqual(Type::getInt8Ty(*Context)) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getParamType(2))) return 0; @@ -837,7 +837,7 @@ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || - FT->getParamType(0) != PointerType::getUnqual(Type::getInt8Ty(*Context)) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getReturnType())) return 0; Modified: llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp Tue Oct 6 10:40:36 2009 @@ -316,7 +316,7 @@ !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast(I->getType())->getElementType(); const Type *VoidPtrTy = - PointerType::getUnqual(Type::getInt8Ty(Context)); + Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; Modified: llvm/trunk/lib/Transforms/Utils/LowerAllocations.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LowerAllocations.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/LowerAllocations.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/LowerAllocations.cpp Tue Oct 6 10:40:36 2009 @@ -87,7 +87,7 @@ // This function is always successful. // bool LowerAllocations::doInitialization(Module &M) { - const Type *BPTy = PointerType::getUnqual(Type::getInt8Ty(M.getContext())); + const Type *BPTy = Type::getInt8PtrTy(M.getContext()); FreeFunc = M.getOrInsertFunction("free" , Type::getVoidTy(M.getContext()), BPTy, (Type *)0); return true; @@ -123,7 +123,7 @@ } else if (FreeInst *FI = dyn_cast(I)) { Value *PtrCast = new BitCastInst(FI->getOperand(0), - PointerType::getUnqual(Type::getInt8Ty(BB.getContext())), "", I); + Type::getInt8PtrTy(BB.getContext()), "", I); // Insert a call to the free function... CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall(); Modified: llvm/trunk/lib/Transforms/Utils/LowerInvoke.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LowerInvoke.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/LowerInvoke.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/LowerInvoke.cpp Tue Oct 6 10:40:36 2009 @@ -116,7 +116,7 @@ // current module. bool LowerInvoke::doInitialization(Module &M) { const Type *VoidPtrTy = - PointerType::getUnqual(Type::getInt8Ty(M.getContext())); + Type::getInt8PtrTy(M.getContext()); AbortMessage = 0; if (ExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. @@ -530,7 +530,7 @@ "TheJmpBuf", EntryBB->getTerminator()); JmpBufPtr = new BitCastInst(JmpBufPtr, - PointerType::getUnqual(Type::getInt8Ty(F.getContext())), + Type::getInt8PtrTy(F.getContext()), "tmp", EntryBB->getTerminator()); Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret", EntryBB->getTerminator()); @@ -585,7 +585,7 @@ Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf", UnwindBlock); Idx[0] = new BitCastInst(Idx[0], - PointerType::getUnqual(Type::getInt8Ty(F.getContext())), + Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); Modified: llvm/trunk/lib/VMCore/AutoUpgrade.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AutoUpgrade.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/AutoUpgrade.cpp (original) +++ llvm/trunk/lib/VMCore/AutoUpgrade.cpp Tue Oct 6 10:40:36 2009 @@ -265,7 +265,7 @@ if (isLoadH || isLoadL) { Value *Op1 = UndefValue::get(Op0->getType()); Value *Addr = new BitCastInst(CI->getOperand(2), - PointerType::getUnqual(Type::getDoubleTy(C)), + Type::getDoublePtrTy(C), "upgraded.", CI); Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI); Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0); Modified: llvm/trunk/lib/VMCore/Instructions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instructions.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Instructions.cpp (original) +++ llvm/trunk/lib/VMCore/Instructions.cpp Tue Oct 6 10:40:36 2009 @@ -498,7 +498,7 @@ // Create the call to Malloc. BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd; Module* M = BB->getParent()->getParent(); - const Type *BPTy = PointerType::getUnqual(Type::getInt8Ty(BB->getContext())); + const Type *BPTy = Type::getInt8PtrTy(BB->getContext()); // prototype malloc as "void *malloc(size_t)" Constant *MallocF = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL); if (!cast(MallocF)->doesNotAlias(0)) Modified: llvm/trunk/lib/VMCore/Type.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Type.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Type.cpp (original) +++ llvm/trunk/lib/VMCore/Type.cpp Tue Oct 6 10:40:36 2009 @@ -358,6 +358,46 @@ return &C.pImpl->Int64Ty; } +const PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) { + return getFloatTy(C)->getPointerTo(AS); +} + +const PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) { + return getDoubleTy(C)->getPointerTo(AS); +} + +const PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) { + return getX86_FP80Ty(C)->getPointerTo(AS); +} + +const PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) { + return getFP128Ty(C)->getPointerTo(AS); +} + +const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) { + return getPPC_FP128Ty(C)->getPointerTo(AS); +} + +const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) { + return getInt1Ty(C)->getPointerTo(AS); +} + +const PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) { + return getInt8Ty(C)->getPointerTo(AS); +} + +const PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) { + return getInt16Ty(C)->getPointerTo(AS); +} + +const PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) { + return getInt32Ty(C)->getPointerTo(AS); +} + +const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) { + return getInt64Ty(C)->getPointerTo(AS); +} + //===----------------------------------------------------------------------===// // Derived Type Constructors //===----------------------------------------------------------------------===// Modified: llvm/trunk/tools/bugpoint/Miscompilation.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/bugpoint/Miscompilation.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/tools/bugpoint/Miscompilation.cpp (original) +++ llvm/trunk/tools/bugpoint/Miscompilation.cpp Tue Oct 6 10:40:36 2009 @@ -704,8 +704,8 @@ // Prototype: void *getPointerToNamedFunction(const char* Name) Constant *resolverFunc = Safe->getOrInsertFunction("getPointerToNamedFunction", - PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())), - PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())), + Type::getInt8PtrTy(Safe->getContext()), + Type::getInt8PtrTy(Safe->getContext()), (Type *)0); // Use the function we just added to get addresses of functions we need. Modified: llvm/trunk/unittests/Support/TypeBuilderTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/TypeBuilderTest.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/unittests/Support/TypeBuilderTest.cpp (original) +++ llvm/trunk/unittests/Support/TypeBuilderTest.cpp Tue Oct 6 10:40:36 2009 @@ -20,7 +20,7 @@ EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder::get(getGlobalContext()))); EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder::get(getGlobalContext()))); // Special case for C compatibility: - EXPECT_EQ(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext())), + EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()), (TypeBuilder::get(getGlobalContext()))); } @@ -64,21 +64,21 @@ } TEST(TypeBuilderTest, Derived) { - EXPECT_EQ(PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext()))), + EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())), (TypeBuilder::get(getGlobalContext()))); EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7), (TypeBuilder::get(getGlobalContext()))); EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0), (TypeBuilder::get(getGlobalContext()))); - EXPECT_EQ(PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext()))), + EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())), (TypeBuilder**, false>::get(getGlobalContext()))); EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7), (TypeBuilder[7], false>::get(getGlobalContext()))); EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0), (TypeBuilder[], false>::get(getGlobalContext()))); - EXPECT_EQ(PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext()))), + EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())), (TypeBuilder**, true>::get(getGlobalContext()))); EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7), (TypeBuilder[7], true>::get(getGlobalContext()))); @@ -107,7 +107,7 @@ EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder, true>::get(getGlobalContext()))); - EXPECT_EQ(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext())), + EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()), (TypeBuilder::get(getGlobalContext()))); } Modified: llvm/trunk/unittests/Transforms/Utils/Cloning.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Transforms/Utils/Cloning.cpp?rev=83379&r1=83378&r2=83379&view=diff ============================================================================== --- llvm/trunk/unittests/Transforms/Utils/Cloning.cpp (original) +++ llvm/trunk/unittests/Transforms/Utils/Cloning.cpp Tue Oct 6 10:40:36 2009 @@ -64,7 +64,7 @@ TEST(CloneInstruction, Inbounds) { LLVMContext context; - Value *V = new Argument(Type::getInt32Ty(context)->getPointerTo()); + Value *V = new Argument(Type::getInt32PtrTy(context)); Constant *Z = Constant::getNullValue(Type::getInt32Ty(context)); std::vector ops; ops.push_back(Z); From richard at xmos.com Tue Oct 6 10:41:52 2009 From: richard at xmos.com (Richard Osborne) Date: Tue, 06 Oct 2009 15:41:52 -0000 Subject: [llvm-commits] [llvm] r83380 - /llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp Message-ID: <200910061541.n96Ffqvb026322@zion.cs.uiuc.edu> Author: friedgold Date: Tue Oct 6 10:41:52 2009 New Revision: 83380 URL: http://llvm.org/viewvc/llvm-project?rev=83380&view=rev Log: Default to the xs1b subtarget Modified: llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp Modified: llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp?rev=83380&r1=83379&r2=83380&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp Tue Oct 6 10:41:52 2009 @@ -20,7 +20,7 @@ : IsXS1A(false), IsXS1B(false) { - std::string CPU = "xs1a-generic"; + std::string CPU = "xs1b-generic"; // Parse features string. ParseSubtargetFeatures(FS, CPU); From richard at xmos.com Tue Oct 6 11:01:10 2009 From: richard at xmos.com (Richard Osborne) Date: Tue, 06 Oct 2009 16:01:10 -0000 Subject: [llvm-commits] [llvm] r83381 - in /llvm/trunk/lib/Target/XCore: XCore.td XCoreISelDAGToDAG.cpp XCoreISelLowering.cpp XCoreInstrInfo.td XCoreSubtarget.cpp XCoreSubtarget.h XCoreTargetObjectFile.cpp Message-ID: <200910061601.n96G1Acq028855@zion.cs.uiuc.edu> Author: friedgold Date: Tue Oct 6 11:01:09 2009 New Revision: 83381 URL: http://llvm.org/viewvc/llvm-project?rev=83381&view=rev Log: Remove xs1a subtarget. xs1a is a preproduction device used in early development boards which is no longer supported in the XMOS toolchain. Modified: llvm/trunk/lib/Target/XCore/XCore.td llvm/trunk/lib/Target/XCore/XCoreISelDAGToDAG.cpp llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp llvm/trunk/lib/Target/XCore/XCoreSubtarget.h llvm/trunk/lib/Target/XCore/XCoreTargetObjectFile.cpp Modified: llvm/trunk/lib/Target/XCore/XCore.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCore.td?rev=83381&r1=83380&r2=83381&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCore.td (original) +++ llvm/trunk/lib/Target/XCore/XCore.td Tue Oct 6 11:01:09 2009 @@ -33,10 +33,6 @@ // XCore Subtarget features. //===----------------------------------------------------------------------===// -def FeatureXS1A - : SubtargetFeature<"xs1a", "IsXS1A", "true", - "Enable XS1A instructions">; - def FeatureXS1B : SubtargetFeature<"xs1b", "IsXS1B", "true", "Enable XS1B instructions">; @@ -48,8 +44,7 @@ class Proc Features> : Processor; -def : Proc<"generic", [FeatureXS1A]>; -def : Proc<"xs1a-generic", [FeatureXS1A]>; +def : Proc<"generic", [FeatureXS1B]>; def : Proc<"xs1b-generic", [FeatureXS1B]>; //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/XCore/XCoreISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreISelDAGToDAG.cpp?rev=83381&r1=83380&r2=83381&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreISelDAGToDAG.cpp Tue Oct 6 11:01:09 2009 @@ -186,17 +186,14 @@ } case ISD::SMUL_LOHI: { // FIXME fold addition into the macc instruction - if (!Subtarget.isXS1A()) { - SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, - CurDAG->getTargetConstant(0, MVT::i32)), 0); - SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; - SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl, - MVT::i32, MVT::i32, Ops, 4); - ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); - ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); - return NULL; - } - break; + SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)), 0); + SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; + SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl, + MVT::i32, MVT::i32, Ops, 4); + ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); + ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); + return NULL; } case ISD::UMUL_LOHI: { // FIXME fold addition into the macc / lmul instruction @@ -211,22 +208,16 @@ return NULL; } case XCoreISD::LADD: { - if (!Subtarget.isXS1A()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; - return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - break; + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + Op.getOperand(2) }; + return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); } case XCoreISD::LSUB: { - if (!Subtarget.isXS1A()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; - return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - break; + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + Op.getOperand(2) }; + return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); } // Other cases are autogenerated. } Modified: llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp?rev=83381&r1=83380&r2=83381&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp Tue Oct 6 11:01:09 2009 @@ -89,13 +89,8 @@ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); // 64bit - if (!Subtarget.isXS1A()) { - setOperationAction(ISD::ADD, MVT::i64, Custom); - setOperationAction(ISD::SUB, MVT::i64, Custom); - } - if (Subtarget.isXS1A()) { - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - } + setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Custom); setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -221,17 +216,16 @@ DebugLoc dl = GA.getDebugLoc(); if (isa(GV)) { return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); - } else if (!Subtarget.isXS1A()) { - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias then use the aliasee to determine constness - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); - } - bool isConst = GVar && GVar->isConstant(); - if (isConst) { - return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); - } + } + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + // If GV is an alias then use the aliasee to determine constness + if (const GlobalAlias *GA = dyn_cast(GV)) + GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); + } + bool isConst = GVar && GVar->isConstant(); + if (isConst) { + return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); } return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); } @@ -297,21 +291,16 @@ ConstantPoolSDNode *CP = cast(Op); // FIXME there isn't really debug info here DebugLoc dl = CP->getDebugLoc(); - if (Subtarget.isXS1A()) { - llvm_unreachable("Lowering of constant pool unimplemented"); - return SDValue(); + EVT PtrVT = Op.getValueType(); + SDValue Res; + if (CP->isMachineConstantPoolEntry()) { + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); } else { - EVT PtrVT = Op.getValueType(); - SDValue Res; - if (CP->isMachineConstantPoolEntry()) { - Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, - CP->getAlignment()); - } else { - Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlignment()); - } - return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); + Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment()); } + return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); } SDValue XCoreTargetLowering:: @@ -524,7 +513,6 @@ assert(N->getValueType(0) == MVT::i64 && (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Unknown operand to lower!"); - assert(!Subtarget.isXS1A() && "Cannot custom lower ADD/SUB on xs1a"); DebugLoc dl = N->getDebugLoc(); // Extract components Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td?rev=83381&r1=83380&r2=83381&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td (original) +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td Tue Oct 6 11:01:09 2009 @@ -26,10 +26,6 @@ // Feature predicates. //===----------------------------------------------------------------------===// -// HasXS1A - This predicate is true when the target processor supports XS1A -// instructions. -def HasXS1A : Predicate<"Subtarget.isXS1A()">; - // HasXS1B - This predicate is true when the target processor supports XS1B // instructions. def HasXS1B : Predicate<"Subtarget.isXS1B()">; @@ -142,9 +138,6 @@ return (uint32_t)N->getZExtValue() < (1 << 20); }]>; -// FIXME check subtarget. Currently we check if the immediate -// is in the common subset of legal immediate values for both -// XS1A and XS1B. def immMskBitp : PatLeaf<(imm), [{ uint32_t value = (uint32_t)N->getZExtValue(); if (!isMask_32(value)) { @@ -157,9 +150,6 @@ || msksize == 32; }]>; -// FIXME check subtarget. Currently we check if the immediate -// is in the common subset of legal immediate values for both -// XS1A and XS1B. def immBitp : PatLeaf<(imm), [{ uint32_t value = (uint32_t)N->getZExtValue(); return (value >= 1 && value <= 8) @@ -524,13 +514,6 @@ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>; -let Predicates = [HasXS1A] in -def MACC_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "macc $dst1, $dst2, $src1, $src2, $src3, $src4", - []>; - // Register - U6 //let Uses = [DP] in ... Modified: llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp?rev=83381&r1=83380&r2=83381&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp Tue Oct 6 11:01:09 2009 @@ -17,8 +17,7 @@ using namespace llvm; XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS) - : IsXS1A(false), - IsXS1B(false) + : IsXS1B(false) { std::string CPU = "xs1b-generic"; Modified: llvm/trunk/lib/Target/XCore/XCoreSubtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreSubtarget.h?rev=83381&r1=83380&r2=83381&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreSubtarget.h (original) +++ llvm/trunk/lib/Target/XCore/XCoreSubtarget.h Tue Oct 6 11:01:09 2009 @@ -22,7 +22,6 @@ namespace llvm { class XCoreSubtarget : public TargetSubtarget { - bool IsXS1A; bool IsXS1B; public: @@ -31,7 +30,6 @@ /// XCoreSubtarget(const std::string &TT, const std::string &FS); - bool isXS1A() const { return IsXS1A; } bool isXS1B() const { return IsXS1B; } /// ParseSubtargetFeatures - Parses features string setting specified Modified: llvm/trunk/lib/Target/XCore/XCoreTargetObjectFile.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreTargetObjectFile.cpp?rev=83381&r1=83380&r2=83381&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreTargetObjectFile.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreTargetObjectFile.cpp Tue Oct 6 11:01:09 2009 @@ -52,20 +52,13 @@ // and can be placed in the standard data / bss sections. TLSDataSection = DataSection; TLSBSSSection = BSSSection; - - if (TM.getSubtarget().isXS1A()) - ReadOnlySection = // FIXME: Why is this a writable section for XS1A? - MCSectionXCore::Create(".dp.rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | - MCSectionXCore::SHF_DP_SECTION, - SectionKind::getDataRel(), false, getContext()); - else - ReadOnlySection = - MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getReadOnlyWithRel(), false, - getContext()); + + ReadOnlySection = + MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionXCore::SHF_CP_SECTION, + SectionKind::getReadOnlyWithRel(), false, + getContext()); // Dynamic linking is not supported. Data with relocations is placed in the // same section as data without relocations. From nicolas.geoffray at lip6.fr Tue Oct 6 11:15:38 2009 From: nicolas.geoffray at lip6.fr (Nicolas Geoffray) Date: Tue, 06 Oct 2009 18:15:38 +0200 Subject: [llvm-commits] [PATCH] Bug fix for comma-separated in CommandLine.cpp Message-ID: <4ACB6D2A.1080700@lip6.fr> Dear all, This simple patch fixes a bug in the CommaSeparated option in CommandLine. The current code does the following: For an arg written like this: -myarg=foo,bar It does myarg.push_back("foo") and myargs.push_back("foo,bar"). The patch fixes the issue. I'm not sure if it follows the logic of all options in CommandLine.cpp, so I refer to someone who better knows the code. Thanks, Nicolas -------------- next part -------------- A non-text attachment was scrubbed... Name: commasep.patch Type: text/x-patch Size: 401 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091006/4dabafeb/attachment.bin From richard at xmos.com Tue Oct 6 11:17:57 2009 From: richard at xmos.com (Richard Osborne) Date: Tue, 06 Oct 2009 16:17:57 -0000 Subject: [llvm-commits] [llvm] r83383 - in /llvm/trunk/lib/Target/XCore: XCore.td XCoreInstrInfo.td XCoreSubtarget.cpp XCoreSubtarget.h Message-ID: <200910061617.n96GHvAm031022@zion.cs.uiuc.edu> Author: friedgold Date: Tue Oct 6 11:17:57 2009 New Revision: 83383 URL: http://llvm.org/viewvc/llvm-project?rev=83383&view=rev Log: Remove xs1b predicate since it is no longer needed to differentiate betweem xs1a and xs1b. Modified: llvm/trunk/lib/Target/XCore/XCore.td llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp llvm/trunk/lib/Target/XCore/XCoreSubtarget.h Modified: llvm/trunk/lib/Target/XCore/XCore.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCore.td?rev=83383&r1=83382&r2=83383&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCore.td (original) +++ llvm/trunk/lib/Target/XCore/XCore.td Tue Oct 6 11:17:57 2009 @@ -30,22 +30,14 @@ } //===----------------------------------------------------------------------===// -// XCore Subtarget features. -//===----------------------------------------------------------------------===// - -def FeatureXS1B - : SubtargetFeature<"xs1b", "IsXS1B", "true", - "Enable XS1B instructions">; - -//===----------------------------------------------------------------------===// // XCore processors supported. //===----------------------------------------------------------------------===// class Proc Features> : Processor; -def : Proc<"generic", [FeatureXS1B]>; -def : Proc<"xs1b-generic", [FeatureXS1B]>; +def : Proc<"generic", []>; +def : Proc<"xs1b-generic", []>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td?rev=83383&r1=83382&r2=83383&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td (original) +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td Tue Oct 6 11:17:57 2009 @@ -23,14 +23,6 @@ include "XCoreInstrFormats.td" //===----------------------------------------------------------------------===// -// Feature predicates. -//===----------------------------------------------------------------------===// - -// HasXS1B - This predicate is true when the target processor supports XS1B -// instructions. -def HasXS1B : Predicate<"Subtarget.isXS1B()">; - -//===----------------------------------------------------------------------===// // XCore specific DAG Nodes. // @@ -473,7 +465,7 @@ } // Four operand long -let Predicates = [HasXS1B], Constraints = "$src1 = $dst1,$src2 = $dst2" in { +let Constraints = "$src1 = $dst1,$src2 = $dst2" in { def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4), @@ -489,7 +481,6 @@ // Five operand long -let Predicates = [HasXS1B] in { def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), "ladd $dst1, $dst2, $src1, $src2, $src3", @@ -504,7 +495,6 @@ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), "ldiv $dst1, $dst2, $src1, $src2, $src3", []>; -} // Six operand long @@ -661,13 +651,12 @@ } //let Uses = [CP] in ... -let Predicates = [HasXS1B], Defs = [R11], neverHasSideEffects = 1, - isReMaterializable = 1 in +let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a), "ldaw r11, cp[$a]", []>; -let Predicates = [HasXS1B], Defs = [R11], isReMaterializable = 1 in +let Defs = [R11], isReMaterializable = 1 in def LDAWCP_lu6: _FLRU6< (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", Modified: llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp?rev=83383&r1=83382&r2=83383&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreSubtarget.cpp Tue Oct 6 11:17:57 2009 @@ -13,14 +13,8 @@ #include "XCoreSubtarget.h" #include "XCore.h" -#include "XCoreGenSubtarget.inc" using namespace llvm; XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS) - : IsXS1B(false) { - std::string CPU = "xs1b-generic"; - - // Parse features string. - ParseSubtargetFeatures(FS, CPU); } Modified: llvm/trunk/lib/Target/XCore/XCoreSubtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreSubtarget.h?rev=83383&r1=83382&r2=83383&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreSubtarget.h (original) +++ llvm/trunk/lib/Target/XCore/XCoreSubtarget.h Tue Oct 6 11:17:57 2009 @@ -22,15 +22,12 @@ namespace llvm { class XCoreSubtarget : public TargetSubtarget { - bool IsXS1B; public: /// This constructor initializes the data members to match that /// of the specified triple. /// XCoreSubtarget(const std::string &TT, const std::string &FS); - - bool isXS1B() const { return IsXS1B; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. From bob.wilson at apple.com Tue Oct 6 11:45:06 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 06 Oct 2009 16:45:06 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83387 - in /llvm-gcc-4.2/trunk/gcc/config/arm: arm.c arm_neon.h neon-gen.ml Message-ID: <200910061645.n96Gj776002218@zion.cs.uiuc.edu> Author: bwilson Date: Tue Oct 6 11:45:06 2009 New Revision: 83387 URL: http://llvm.org/viewvc/llvm-project?rev=83387&view=rev Log: Fix the type checking problems introduced when we moved from inline functions to preprocessor macros. For each macro, assign non-immediate operands to local variables of the correct type so that the frontend will detect errors and give reasonable messages for them. The extra copies should be optimized away, and for unoptimized code, they should be roughly comparable to what we had before with inline functions. Also partially revert my previous patch to use types from instead of having special builtin types. Sandeep reported that from newlib has some unexpected types which did not match the types used for some of the NEON builtins. (Now I see why GCC did it that way!) When the header is used in C++, this means we need to use reinterpret_cast to avoid errors when converting between the internal types and the types. Modified: llvm-gcc-4.2/trunk/gcc/config/arm/arm.c llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml Modified: llvm-gcc-4.2/trunk/gcc/config/arm/arm.c URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm.c?rev=83387&r1=83386&r2=83387&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/arm.c (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/arm.c Tue Oct 6 11:45:06 2009 @@ -16862,6 +16862,8 @@ /* LLVM LOCAL begin pr5037 use standard type nodes */ tree neon_intQI_type_node = intQI_type_node; tree neon_intHI_type_node = intHI_type_node; + tree neon_polyQI_type_node = intQI_type_node; + tree neon_polyHI_type_node = intHI_type_node; tree neon_intSI_type_node = intSI_type_node; tree neon_intDI_type_node = intDI_type_node; tree neon_float_type_node = float_type_node; @@ -16918,7 +16920,11 @@ tree V2DI_type_node = build_vector_type_for_mode (neon_intDI_type_node, V2DImode); - /* LLVM LOCAL pr5037 removed unused type nodes */ + /* Unsigned integer types for various mode sizes. */ + tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); + tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); + tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); + tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); /* LLVM LOCAL begin multi-vector types */ #ifdef ENABLE_LLVM @@ -17804,7 +17810,33 @@ tree dreg_types[5], qreg_types[5]; /* APPLE LOCAL begin 7083296 Build without warnings. */ - /* LLVM LOCAL pr5037 removed builtin element types */ + /* Define typedefs which exactly correspond to the modes we are basing vector + types on. If you change these names you'll need to change + the table used by arm_mangle_type too. */ + (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node, + "__builtin_neon_qi"); + (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, + "__builtin_neon_hi"); + (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, + "__builtin_neon_si"); + (*lang_hooks.types.register_builtin_type) (neon_float_type_node, + "__builtin_neon_sf"); + (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node, + "__builtin_neon_di"); + + (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node, + "__builtin_neon_poly8"); + (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, + "__builtin_neon_poly16"); + + (*lang_hooks.types.register_builtin_type) (intUQI_type_node, + "__builtin_neon_uqi"); + (*lang_hooks.types.register_builtin_type) (intUHI_type_node, + "__builtin_neon_uhi"); + (*lang_hooks.types.register_builtin_type) (intUSI_type_node, + "__builtin_neon_usi"); + (*lang_hooks.types.register_builtin_type) (intUDI_type_node, + "__builtin_neon_udi"); /* LLVM LOCAL begin multi-vector types */ (*lang_hooks.types.register_builtin_type) (V8QI2_type_node, Modified: llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h?rev=83387&r1=83386&r2=83387&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h Tue Oct 6 11:45:06 2009 @@ -39,36 +39,39 @@ #ifdef __cplusplus extern "C" { +#define __neon_ptr_cast(ty, ptr) reinterpret_cast(ptr) +#else +#define __neon_ptr_cast(ty, ptr) (ty)(ptr) #endif #include -typedef float float32_t; -typedef signed char poly8_t; -typedef signed short poly16_t; - -typedef int8_t __neon_int8x8_t __attribute__ ((__vector_size__ (8))); -typedef int16_t __neon_int16x4_t __attribute__ ((__vector_size__ (8))); -typedef int32_t __neon_int32x2_t __attribute__ ((__vector_size__ (8))); -typedef int64_t __neon_int64x1_t __attribute__ ((__vector_size__ (8))); -typedef float32_t __neon_float32x2_t __attribute__ ((__vector_size__ (8))); -typedef poly8_t __neon_poly8x8_t __attribute__ ((__vector_size__ (8))); -typedef poly16_t __neon_poly16x4_t __attribute__ ((__vector_size__ (8))); -typedef uint8_t __neon_uint8x8_t __attribute__ ((__vector_size__ (8))); -typedef uint16_t __neon_uint16x4_t __attribute__ ((__vector_size__ (8))); -typedef uint32_t __neon_uint32x2_t __attribute__ ((__vector_size__ (8))); -typedef uint64_t __neon_uint64x1_t __attribute__ ((__vector_size__ (8))); -typedef int8_t __neon_int8x16_t __attribute__ ((__vector_size__ (16))); -typedef int16_t __neon_int16x8_t __attribute__ ((__vector_size__ (16))); -typedef int32_t __neon_int32x4_t __attribute__ ((__vector_size__ (16))); -typedef int64_t __neon_int64x2_t __attribute__ ((__vector_size__ (16))); -typedef float32_t __neon_float32x4_t __attribute__ ((__vector_size__ (16))); -typedef poly8_t __neon_poly8x16_t __attribute__ ((__vector_size__ (16))); -typedef poly16_t __neon_poly16x8_t __attribute__ ((__vector_size__ (16))); -typedef uint8_t __neon_uint8x16_t __attribute__ ((__vector_size__ (16))); -typedef uint16_t __neon_uint16x8_t __attribute__ ((__vector_size__ (16))); -typedef uint32_t __neon_uint32x4_t __attribute__ ((__vector_size__ (16))); -typedef uint64_t __neon_uint64x2_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_qi __neon_int8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_hi __neon_int16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_si __neon_int32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_di __neon_int64x1_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_sf __neon_float32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_poly8 __neon_poly8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_poly16 __neon_poly16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_uqi __neon_uint8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_uhi __neon_uint16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_usi __neon_uint32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_udi __neon_uint64x1_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_qi __neon_int8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_hi __neon_int16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_si __neon_int32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_di __neon_int64x2_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_sf __neon_float32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_poly8 __neon_poly8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_poly16 __neon_poly16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_uqi __neon_uint8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_uhi __neon_uint16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_usi __neon_uint32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_udi __neon_uint64x2_t __attribute__ ((__vector_size__ (16))); + +typedef __builtin_neon_sf float32_t; +typedef __builtin_neon_poly8 poly8_t; +typedef __builtin_neon_poly16 poly16_t; typedef struct __simd64_int8_t { @@ -513,12786 +516,15905 @@ #define vadd_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vadd_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vadd_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vadd_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vadd_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vaddv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vadd_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vadd_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vadd_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vadd_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vaddv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vaddq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddl_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddlv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddlv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddl_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddlv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddlv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddl_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddlv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddlv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddl_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddlv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddlv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddl_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddlv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddlv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddl_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddlv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddlv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddw_s8(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddwv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddwv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddw_s16(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddwv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddwv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddw_s32(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddwv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddwv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddw_u8(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddwv8qi ((__neon_int16x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddwv8qi ((__neon_int16x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddw_u16(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddwv4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddwv4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddw_u32(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddwv2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddwv2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vhadd_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhaddv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhadd_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhaddv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhadd_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhaddv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhadd_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhaddv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vhadd_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhaddv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vhadd_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhaddv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vhaddq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhaddv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhaddq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhaddv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhaddq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhaddv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhaddq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhaddv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vhaddq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhaddv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vhaddq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhaddv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vrhadd_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8qi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vhaddv8qi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrhadd_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vhaddv4hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrhadd_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv2si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vhaddv2si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrhadd_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vhaddv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 2); \ __rv.__i; \ }) #define vrhadd_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vhaddv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 2); \ __rv.__i; \ }) #define vrhadd_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vhaddv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 2); \ __rv.__i; \ }) #define vrhaddq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv16qi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vhaddv16qi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrhaddq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vhaddv8hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrhaddq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vhaddv4si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrhaddq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vhaddv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 2); \ __rv.__i; \ }) #define vrhaddq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vhaddv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 2); \ __rv.__i; \ }) #define vrhaddq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhaddv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vhaddv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 2); \ __rv.__i; \ }) #define vqadd_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqadd_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqadd_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqadd_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqadd_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vqadd_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vqadd_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vqadd_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vqaddq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqaddq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqaddq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqaddq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqaddv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqaddq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vqaddq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vqaddq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vqaddq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqaddv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqaddv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddhn_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddhnv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddhn_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddhnv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddhn_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vaddhnv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vaddhn_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddhnv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddhn_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddhnv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vaddhn_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vaddhnv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vraddhn_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv8hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vaddhnv8hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vraddhn_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv4si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vaddhnv4si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vraddhn_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv2di (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vaddhnv2di (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vraddhn_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vaddhnv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 2); \ __rv.__i; \ }) #define vraddhn_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vaddhnv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 2); \ __rv.__i; \ }) #define vraddhn_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vaddhnv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vaddhnv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 2); \ __rv.__i; \ }) #define vmul_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmulv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmul_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmulv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmul_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmulv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmul_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vmulv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vmul_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmulv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vmul_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmulv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vmul_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmulv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vmul_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 4); \ + __rv.__o = __builtin_neon_vmulv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 4); \ __rv.__i; \ }) #define vmulq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmulv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmulq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmulv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmulq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmulv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmulq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vmulv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vmulq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmulv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vmulq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmulv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vmulq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmulv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vmulq_p8(__a, __b) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmulv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 4); \ + __rv.__o = __builtin_neon_vmulv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 4); \ __rv.__i; \ }) #define vqdmulh_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqdmulhv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqdmulh_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqdmulhv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqdmulhq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqdmulhv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqdmulhq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqdmulhv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqrdmulh_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv4hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqdmulhv4hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrdmulh_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv2si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqdmulhv2si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrdmulhq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv8hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqdmulhv8hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrdmulhq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulhv4si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqdmulhv4si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vmull_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmullv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmullv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmull_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmullv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmullv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmull_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmullv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmullv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmull_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmullv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmullv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vmull_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmullv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmullv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vmull_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmullv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmullv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vmull_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmullv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 4); \ + __rv.__o = __builtin_neon_vmullv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 4); \ __rv.__i; \ }) #define vqdmull_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmullv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqdmullv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqdmull_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmullv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqdmullv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmla_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav8qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlav8qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmla_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlav4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmla_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlav2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmla_f32(__a, __b, __c) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ + float32x2_t __cx = __c; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav2sf (__a.val, __b.val, __c.val, 5); \ + __rv.__o = __builtin_neon_vmlav2sf (__ax.val, __bx.val, __cx.val, 5); \ __rv.__i; \ }) #define vmla_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlav8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vmla_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlav4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vmla_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlav2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlaq_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ + int8x16_t __cx = __c; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav16qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlav16qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlaq_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16x8_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav8hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlav8hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlaq_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav4si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlav4si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlaq_f32(__a, __b, __c) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ + float32x4_t __cx = __c; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav4sf (__a.val, __b.val, __c.val, 5); \ + __rv.__o = __builtin_neon_vmlav4sf (__ax.val, __bx.val, __cx.val, 5); \ __rv.__i; \ }) #define vmlaq_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ + uint8x16_t __cx = __c; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, (__neon_int8x16_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlav16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, (__neon_int8x16_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlaq_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16x8_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (__neon_int16x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlav8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__neon_int16x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlaq_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlav4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (__neon_int32x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlav4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__neon_int32x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlal_s8(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlalv8qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlalv8qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlal_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlalv4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlalv4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlal_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlalv2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlalv2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlal_u8(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlalv8qi ((__neon_int16x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlalv8qi ((__neon_int16x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlal_u16(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlalv4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlalv4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlal_u32(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlalv2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlalv2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, 0); \ __rv.__i; \ }) #define vqdmlal_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlalv4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vqdmlalv4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vqdmlal_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlalv2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vqdmlalv2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmls_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv8qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlsv8qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmls_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlsv4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmls_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlsv2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmls_f32(__a, __b, __c) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ + float32x2_t __cx = __c; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv2sf (__a.val, __b.val, __c.val, 5); \ + __rv.__o = __builtin_neon_vmlsv2sf (__ax.val, __bx.val, __cx.val, 5); \ __rv.__i; \ }) #define vmls_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlsv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vmls_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlsv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vmls_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlsv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlsq_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ + int8x16_t __cx = __c; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv16qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlsv16qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlsq_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16x8_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv8hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlsv8hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlsq_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv4si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlsv4si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlsq_f32(__a, __b, __c) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ + float32x4_t __cx = __c; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv4sf (__a.val, __b.val, __c.val, 5); \ + __rv.__o = __builtin_neon_vmlsv4sf (__ax.val, __bx.val, __cx.val, 5); \ __rv.__i; \ }) #define vmlsq_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ + uint8x16_t __cx = __c; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, (__neon_int8x16_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlsv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, (__neon_int8x16_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlsq_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16x8_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (__neon_int16x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlsv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__neon_int16x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlsq_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (__neon_int32x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlsv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__neon_int32x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlsl_s8(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlslv8qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlslv8qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlsl_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlslv4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlslv4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlsl_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlslv2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vmlslv2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vmlsl_u8(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlslv8qi ((__neon_int16x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlslv8qi ((__neon_int16x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlsl_u16(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlslv4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlslv4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vmlsl_u32(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlslv2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vmlslv2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, 0); \ __rv.__i; \ }) #define vqdmlsl_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlslv4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vqdmlslv4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vqdmlsl_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlslv2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vqdmlslv2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vsub_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsub_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsub_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsub_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsub_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vsubv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vsub_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vsub_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vsub_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vsub_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vsubv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vsubq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubl_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsublv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsublv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubl_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsublv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsublv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubl_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsublv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsublv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubl_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsublv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsublv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubl_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsublv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsublv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubl_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsublv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsublv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubw_s8(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubwv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubwv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubw_s16(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubwv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubwv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubw_s32(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubwv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubwv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubw_u8(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubwv8qi ((__neon_int16x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubwv8qi ((__neon_int16x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubw_u16(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubwv4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubwv4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubw_u32(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubwv2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubwv2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vhsub_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhsubv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhsub_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhsubv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhsub_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhsubv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhsub_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhsubv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vhsub_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhsubv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vhsub_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhsubv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vhsubq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhsubv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhsubq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhsubv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhsubq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vhsubv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vhsubq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhsubv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vhsubq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhsubv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vhsubq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vhsubv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vhsubv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsub_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsub_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsub_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsub_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsub_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsub_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsub_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsub_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsubq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsubq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsubq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsubq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqsubv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqsubq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsubq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsubq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vqsubq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqsubv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vqsubv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubhn_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubhnv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubhn_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubhnv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubhn_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vsubhnv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vsubhn_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubhnv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubhn_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubhnv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vsubhn_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vsubhnv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vrsubhn_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv8hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vsubhnv8hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrsubhn_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv4si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vsubhnv4si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrsubhn_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv2di (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vsubhnv2di (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrsubhn_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vsubhnv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 2); \ __rv.__i; \ }) #define vrsubhn_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vsubhnv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 2); \ __rv.__i; \ }) #define vrsubhn_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsubhnv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 2); \ + __rv.__o = __builtin_neon_vsubhnv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 2); \ __rv.__i; \ }) #define vceq_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vceqv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vceq_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vceqv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vceq_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vceqv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vceq_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vceqv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vceq_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vceqv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vceq_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vceqv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vceq_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vceqv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vceq_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 4); \ + __rv.__o = __builtin_neon_vceqv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 4); \ __rv.__i; \ }) #define vceqq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vceqv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vceqq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vceqv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vceqq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vceqv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vceqq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vceqv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vceqq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vceqv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vceqq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vceqv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vceqq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vceqv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vceqq_p8(__a, __b) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vceqv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 4); \ + __rv.__o = __builtin_neon_vceqv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 4); \ __rv.__i; \ }) #define vcge_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgev8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcge_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgev4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcge_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgev2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcge_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcgev2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcge_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgev8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vcge_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgev4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vcge_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgev2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgeq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgev16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgeq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgev8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgeq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgev4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgeq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcgev4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcgeq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgev16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgeq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgev8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgeq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgev4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vcle_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8qi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgev8qi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcle_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4hi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgev4hi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcle_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev2si (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgev2si (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcle_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev2sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcgev2sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vcle_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8qi ((__neon_int8x8_t) __b.val, (__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgev8qi ((__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vcle_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4hi ((__neon_int16x4_t) __b.val, (__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgev4hi ((__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vcle_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev2si ((__neon_int32x2_t) __b.val, (__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgev2si ((__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vcleq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev16qi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgev16qi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcleq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8hi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgev8hi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcleq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4si (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgev4si (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcleq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcgev4sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vcleq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev16qi ((__neon_int8x16_t) __b.val, (__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgev16qi ((__neon_int8x16_t) __bx.val, (__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vcleq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev8hi ((__neon_int16x8_t) __b.val, (__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgev8hi ((__neon_int16x8_t) __bx.val, (__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vcleq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgev4si ((__neon_int32x4_t) __b.val, (__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgev4si ((__neon_int32x4_t) __bx.val, (__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vcgt_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgtv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgt_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgtv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgt_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgtv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgt_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcgtv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcgt_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgtv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgt_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgtv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgt_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgtv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgtq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgtv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgtq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgtv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgtq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vcgtv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vcgtq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcgtv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcgtq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgtv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgtq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgtv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vcgtq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vcgtv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vclt_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8qi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgtv8qi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vclt_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4hi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgtv4hi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vclt_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv2si (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgtv2si (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vclt_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv2sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcgtv2sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vclt_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8qi ((__neon_int8x8_t) __b.val, (__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgtv8qi ((__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vclt_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4hi ((__neon_int16x4_t) __b.val, (__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgtv4hi ((__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vclt_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv2si ((__neon_int32x2_t) __b.val, (__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgtv2si ((__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vcltq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv16qi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgtv16qi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcltq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8hi (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgtv8hi (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcltq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4si (__b.val, __a.val, 1); \ + __rv.__o = __builtin_neon_vcgtv4si (__bx.val, __ax.val, 1); \ __rv.__i; \ }) #define vcltq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcgtv4sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vcltq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv16qi ((__neon_int8x16_t) __b.val, (__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgtv16qi ((__neon_int8x16_t) __bx.val, (__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vcltq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv8hi ((__neon_int16x8_t) __b.val, (__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgtv8hi ((__neon_int16x8_t) __bx.val, (__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vcltq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcgtv4si ((__neon_int32x4_t) __b.val, (__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcgtv4si ((__neon_int32x4_t) __bx.val, (__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vcage_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagev2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcagev2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcageq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagev4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcagev4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcale_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagev2sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcagev2sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vcaleq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagev4sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcagev4sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vcagt_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagtv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcagtv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcagtq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagtv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vcagtv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vcalt_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagtv2sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcagtv2sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vcaltq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcagtv4sf (__b.val, __a.val, 5); \ + __rv.__o = __builtin_neon_vcagtv4sf (__bx.val, __ax.val, 5); \ __rv.__i; \ }) #define vtst_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vtstv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vtst_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vtstv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vtst_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vtstv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vtst_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vtstv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vtst_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vtstv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vtst_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vtstv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vtst_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 4); \ + __rv.__o = __builtin_neon_vtstv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 4); \ __rv.__i; \ }) #define vtstq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vtstv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vtstq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vtstv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vtstq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vtstv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vtstq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vtstv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vtstq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vtstv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vtstq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vtstv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vtstq_p8(__a, __b) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtstv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 4); \ + __rv.__o = __builtin_neon_vtstv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 4); \ __rv.__i; \ }) #define vabd_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabd_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabd_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabd_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vabdv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vabd_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vabd_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vabd_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vabdq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabdq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabdq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabdq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vabdv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vabdq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vabdq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vabdq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vabdl_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdlv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdlv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabdl_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdlv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdlv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabdl_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdlv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vabdlv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vabdl_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdlv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdlv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vabdl_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdlv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdlv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vabdl_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabdlv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vabdlv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vaba_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav8qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabav8qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vaba_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabav4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vaba_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabav2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vaba_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabav8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vaba_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabav4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vaba_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabav2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, 0); \ __rv.__i; \ }) #define vabaq_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ + int8x16_t __cx = __c; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav16qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabav16qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vabaq_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16x8_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav8hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabav8hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vabaq_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav4si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabav4si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vabaq_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ + uint8x16_t __cx = __c; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, (__neon_int8x16_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabav16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, (__neon_int8x16_t) __cx.val, 0); \ __rv.__i; \ }) #define vabaq_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16x8_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (__neon_int16x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabav8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__neon_int16x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vabaq_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabav4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (__neon_int32x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabav4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__neon_int32x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vabal_s8(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabalv8qi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabalv8qi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vabal_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabalv4hi (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabalv4hi (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vabal_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabalv2si (__a.val, __b.val, __c.val, 1); \ + __rv.__o = __builtin_neon_vabalv2si (__ax.val, __bx.val, __cx.val, 1); \ __rv.__i; \ }) #define vabal_u8(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabalv8qi ((__neon_int16x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabalv8qi ((__neon_int16x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val, 0); \ __rv.__i; \ }) #define vabal_u16(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabalv4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabalv4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, 0); \ __rv.__i; \ }) #define vabal_u32(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabalv2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, 0); \ + __rv.__o = __builtin_neon_vabalv2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, 0); \ __rv.__i; \ }) #define vmax_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmaxv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmax_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmaxv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmax_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmaxv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmax_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vmaxv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vmax_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmaxv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vmax_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmaxv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vmax_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmaxv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vmaxq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmaxv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmaxq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmaxv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmaxq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vmaxv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmaxq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vmaxv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vmaxq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmaxv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vmaxq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmaxv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vmaxq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmaxv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vmaxv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vmin_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vminv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmin_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vminv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmin_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vminv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vmin_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vminv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vmin_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vminv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vmin_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vminv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vmin_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vminv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vminq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vminv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vminq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vminv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vminq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vminv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vminq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vminv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vminq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vminv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vminq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vminv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vminq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vminv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vminv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadd_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpaddv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadd_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpaddv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadd_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpaddv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadd_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vpaddv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vpadd_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpaddv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadd_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpaddv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadd_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpaddv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vpaddl_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vpaddlv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vpaddl_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vpaddlv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vpaddl_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vpaddlv2si (__ax.val, 1); \ __rv.__i; \ }) #define vpaddl_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vpaddlv8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vpaddl_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv4hi ((__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vpaddlv4hi ((__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vpaddl_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vpaddlv2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vpaddlq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vpaddlv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vpaddlq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vpaddlv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vpaddlq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vpaddlv4si (__ax.val, 1); \ __rv.__i; \ }) #define vpaddlq_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv16qi ((__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vpaddlv16qi ((__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vpaddlq_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv8hi ((__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vpaddlv8hi ((__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vpaddlq_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpaddlv4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vpaddlv4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vpadal_s8(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpadalv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadal_s16(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpadalv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadal_s32(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpadalv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadal_u8(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv8qi ((__neon_int16x4_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpadalv8qi ((__neon_int16x4_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadal_u16(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv4hi ((__neon_int32x2_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpadalv4hi ((__neon_int32x2_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadal_u32(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv2si ((__neon_int64x1_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpadalv2si ((__neon_int64x1_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadalq_s8(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpadalv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadalq_s16(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpadalv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadalq_s32(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpadalv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpadalq_u8(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv16qi ((__neon_int16x8_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpadalv16qi ((__neon_int16x8_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadalq_u16(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv8hi ((__neon_int32x4_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpadalv8hi ((__neon_int32x4_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vpadalq_u32(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpadalv4si ((__neon_int64x2_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpadalv4si ((__neon_int64x2_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vpmax_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpmaxv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpmaxv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpmax_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpmaxv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpmaxv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpmax_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpmaxv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpmaxv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpmax_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpmaxv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vpmaxv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vpmax_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpmaxv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpmaxv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vpmax_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpmaxv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpmaxv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vpmax_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpmaxv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpmaxv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vpmin_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpminv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpminv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpmin_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpminv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpminv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpmin_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpminv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vpminv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vpmin_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpminv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vpminv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vpmin_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpminv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpminv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vpmin_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpminv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpminv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vpmin_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vpminv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vpminv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vrecps_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrecpsv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vrecpsv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vrecpsq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrecpsv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vrecpsv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vrsqrts_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrsqrtsv2sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vrsqrtsv2sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vrsqrtsq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrsqrtsv4sf (__a.val, __b.val, 5); \ + __rv.__o = __builtin_neon_vrsqrtsv4sf (__ax.val, __bx.val, 5); \ __rv.__i; \ }) #define vshl_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshl_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshl_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshl_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshl_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8qi ((__neon_int8x8_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv8qi ((__neon_int8x8_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vshl_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4hi ((__neon_int16x4_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv4hi ((__neon_int16x4_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vshl_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2si ((__neon_int32x2_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv2si ((__neon_int32x2_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vshl_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv1di ((__neon_int64x1_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv1di ((__neon_int64x1_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vshlq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshlq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshlq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshlq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vshlv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vshlq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv16qi ((__neon_int8x16_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv16qi ((__neon_int8x16_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vshlq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8hi ((__neon_int16x8_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv8hi ((__neon_int16x8_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vshlq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4si ((__neon_int32x4_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv4si ((__neon_int32x4_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vshlq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2di ((__neon_int64x2_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vshlv2di ((__neon_int64x2_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vrshl_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8qi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv8qi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshl_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv4hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshl_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv2si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshl_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv1di (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv1di (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshl_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8qi ((__neon_int8x8_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv8qi ((__neon_int8x8_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vrshl_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4hi ((__neon_int16x4_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv4hi ((__neon_int16x4_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vrshl_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2si ((__neon_int32x2_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv2si ((__neon_int32x2_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vrshl_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv1di ((__neon_int64x1_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv1di ((__neon_int64x1_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vrshlq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv16qi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv16qi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshlq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv8hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshlq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv4si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshlq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2di (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vshlv2di (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vrshlq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv16qi ((__neon_int8x16_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv16qi ((__neon_int8x16_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vrshlq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv8hi ((__neon_int16x8_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv8hi ((__neon_int16x8_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vrshlq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv4si ((__neon_int32x4_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv4si ((__neon_int32x4_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vrshlq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshlv2di ((__neon_int64x2_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vshlv2di ((__neon_int64x2_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqshl_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshl_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshl_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshl_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshl_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8qi ((__neon_int8x8_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv8qi ((__neon_int8x8_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqshl_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4hi ((__neon_int16x4_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv4hi ((__neon_int16x4_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqshl_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2si ((__neon_int32x2_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv2si ((__neon_int32x2_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqshl_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv1di ((__neon_int64x1_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv1di ((__neon_int64x1_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqshlq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshlq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshlq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshlq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vqshlv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vqshlq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv16qi ((__neon_int8x16_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv16qi ((__neon_int8x16_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqshlq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8hi ((__neon_int16x8_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv8hi ((__neon_int16x8_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqshlq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4si ((__neon_int32x4_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv4si ((__neon_int32x4_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqshlq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2di ((__neon_int64x2_t) __a.val, __b.val, 0); \ + __rv.__o = __builtin_neon_vqshlv2di ((__neon_int64x2_t) __ax.val, __bx.val, 0); \ __rv.__i; \ }) #define vqrshl_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8qi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv8qi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshl_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv4hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshl_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv2si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshl_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv1di (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv1di (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshl_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8qi ((__neon_int8x8_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv8qi ((__neon_int8x8_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqrshl_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4hi ((__neon_int16x4_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv4hi ((__neon_int16x4_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqrshl_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2si ((__neon_int32x2_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv2si ((__neon_int32x2_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqrshl_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv1di ((__neon_int64x1_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv1di ((__neon_int64x1_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqrshlq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv16qi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv16qi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshlq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8hi (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv8hi (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshlq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4si (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv4si (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshlq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2di (__a.val, __b.val, 3); \ + __rv.__o = __builtin_neon_vqshlv2di (__ax.val, __bx.val, 3); \ __rv.__i; \ }) #define vqrshlq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv16qi ((__neon_int8x16_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv16qi ((__neon_int8x16_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqrshlq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv8hi ((__neon_int16x8_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv8hi ((__neon_int16x8_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqrshlq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv4si ((__neon_int32x4_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv4si ((__neon_int32x4_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vqrshlq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlv2di ((__neon_int64x2_t) __a.val, __b.val, 2); \ + __rv.__o = __builtin_neon_vqshlv2di ((__neon_int64x2_t) __ax.val, __bx.val, 2); \ __rv.__i; \ }) #define vshr_n_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv8qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshr_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv4hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshr_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv2si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshr_n_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv1di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv1di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshr_n_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8qi ((__neon_int8x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv8qi ((__neon_int8x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshr_n_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4hi ((__neon_int16x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv4hi ((__neon_int16x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshr_n_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2si ((__neon_int32x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv2si ((__neon_int32x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshr_n_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv1di ((__neon_int64x1_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv1di ((__neon_int64x1_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshrq_n_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv16qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv16qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshrq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv8hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshrq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshrq_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshr_nv2di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshrq_n_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv16qi ((__neon_int8x16_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv16qi ((__neon_int8x16_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshrq_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8hi ((__neon_int16x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv8hi ((__neon_int16x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshrq_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4si ((__neon_int32x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv4si ((__neon_int32x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshrq_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2di ((__neon_int64x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshr_nv2di ((__neon_int64x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vrshr_n_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8qi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv8qi (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshr_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4hi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv4hi (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshr_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2si (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv2si (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshr_n_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv1di (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv1di (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshr_n_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8qi ((__neon_int8x8_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv8qi ((__neon_int8x8_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshr_n_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4hi ((__neon_int16x4_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv4hi ((__neon_int16x4_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshr_n_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2si ((__neon_int32x2_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv2si ((__neon_int32x2_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshr_n_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv1di ((__neon_int64x1_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv1di ((__neon_int64x1_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshrq_n_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv16qi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv16qi (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshrq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8hi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv8hi (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshrq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4si (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv4si (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshrq_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2di (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshr_nv2di (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshrq_n_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv16qi ((__neon_int8x16_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv16qi ((__neon_int8x16_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshrq_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv8hi ((__neon_int16x8_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv8hi ((__neon_int16x8_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshrq_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv4si ((__neon_int32x4_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv4si ((__neon_int32x4_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshrq_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshr_nv2di ((__neon_int64x2_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshr_nv2di ((__neon_int64x2_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vshrn_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshrn_nv8hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshrn_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshrn_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshrn_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv2di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshrn_nv2di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshrn_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv8hi ((__neon_int16x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshrn_nv8hi ((__neon_int16x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshrn_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv4si ((__neon_int32x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshrn_nv4si ((__neon_int32x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshrn_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv2di ((__neon_int64x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshrn_nv2di ((__neon_int64x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vrshrn_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv8hi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshrn_nv8hi (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshrn_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv4si (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshrn_nv4si (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshrn_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv2di (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vshrn_nv2di (__ax.val, __b, 3); \ __rv.__i; \ }) #define vrshrn_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv8hi ((__neon_int16x8_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshrn_nv8hi ((__neon_int16x8_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshrn_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv4si ((__neon_int32x4_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshrn_nv4si ((__neon_int32x4_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vrshrn_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshrn_nv2di ((__neon_int64x2_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vshrn_nv2di ((__neon_int64x2_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vqshrn_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshrn_nv8hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshrn_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshrn_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshrn_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv2di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshrn_nv2di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshrn_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv8hi ((__neon_int16x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshrn_nv8hi ((__neon_int16x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshrn_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv4si ((__neon_int32x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshrn_nv4si ((__neon_int32x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshrn_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv2di ((__neon_int64x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshrn_nv2di ((__neon_int64x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqrshrn_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv8hi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqshrn_nv8hi (__ax.val, __b, 3); \ __rv.__i; \ }) #define vqrshrn_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv4si (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqshrn_nv4si (__ax.val, __b, 3); \ __rv.__i; \ }) #define vqrshrn_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv2di (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqshrn_nv2di (__ax.val, __b, 3); \ __rv.__i; \ }) #define vqrshrn_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv8hi ((__neon_int16x8_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vqshrn_nv8hi ((__neon_int16x8_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vqrshrn_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv4si ((__neon_int32x4_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vqshrn_nv4si ((__neon_int32x4_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vqrshrn_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrn_nv2di ((__neon_int64x2_t) __a.val, __b, 2); \ + __rv.__o = __builtin_neon_vqshrn_nv2di ((__neon_int64x2_t) __ax.val, __b, 2); \ __rv.__i; \ }) #define vqshrun_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrun_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshrun_nv8hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshrun_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrun_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshrun_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshrun_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrun_nv2di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshrun_nv2di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqrshrun_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrun_nv8hi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqshrun_nv8hi (__ax.val, __b, 3); \ __rv.__i; \ }) #define vqrshrun_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrun_nv4si (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqshrun_nv4si (__ax.val, __b, 3); \ __rv.__i; \ }) #define vqrshrun_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshrun_nv2di (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqshrun_nv2di (__ax.val, __b, 3); \ __rv.__i; \ }) #define vshl_n_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv8qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv8qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshl_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv4hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshl_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv2si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshl_n_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv1di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv1di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshl_n_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv8qi ((__neon_int8x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv8qi ((__neon_int8x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshl_n_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv4hi ((__neon_int16x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv4hi ((__neon_int16x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshl_n_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv2si ((__neon_int32x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv2si ((__neon_int32x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshl_n_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv1di ((__neon_int64x1_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv1di ((__neon_int64x1_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshlq_n_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv16qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv16qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshlq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv8hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshlq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshlq_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv2di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshl_nv2di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshlq_n_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv16qi ((__neon_int8x16_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv16qi ((__neon_int8x16_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshlq_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv8hi ((__neon_int16x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv8hi ((__neon_int16x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshlq_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv4si ((__neon_int32x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv4si ((__neon_int32x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshlq_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshl_nv2di ((__neon_int64x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshl_nv2di ((__neon_int64x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshl_n_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv8qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv8qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshl_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv4hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshl_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv2si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshl_n_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv1di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv1di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshl_n_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv8qi ((__neon_int8x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv8qi ((__neon_int8x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshl_n_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv4hi ((__neon_int16x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv4hi ((__neon_int16x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshl_n_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv2si ((__neon_int32x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv2si ((__neon_int32x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshl_n_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv1di ((__neon_int64x1_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv1di ((__neon_int64x1_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshlq_n_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv16qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv16qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshlq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv8hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshlq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshlq_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv2di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshl_nv2di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshlq_n_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv16qi ((__neon_int8x16_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv16qi ((__neon_int8x16_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshlq_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv8hi ((__neon_int16x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv8hi ((__neon_int16x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshlq_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv4si ((__neon_int32x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv4si ((__neon_int32x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshlq_n_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshl_nv2di ((__neon_int64x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vqshl_nv2di ((__neon_int64x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vqshlu_n_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv8qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv8qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshlu_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv4hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshlu_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv2si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshlu_n_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv1di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv1di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshluq_n_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv16qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv16qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshluq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv8hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshluq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vqshluq_n_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqshlu_nv2di (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqshlu_nv2di (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshll_n_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshll_nv8qi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshll_nv8qi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshll_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshll_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshll_nv4hi (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshll_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshll_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vshll_nv2si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vshll_n_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshll_nv8qi ((__neon_int8x8_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshll_nv8qi ((__neon_int8x8_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshll_n_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshll_nv4hi ((__neon_int16x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshll_nv4hi ((__neon_int16x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vshll_n_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vshll_nv2si ((__neon_int32x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vshll_nv2si ((__neon_int32x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vsra_n_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8qi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv8qi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsra_n_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv4hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsra_n_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv2si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsra_n_s64(__a, __b, __c) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv1di (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv1di (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsra_n_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vsra_n_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vsra_n_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vsra_n_u64(__a, __b, __c) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vsraq_n_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv16qi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv16qi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsraq_n_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv8hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsraq_n_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv4si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsraq_n_s64(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2di (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vsra_nv2di (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vsraq_n_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vsraq_n_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vsraq_n_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vsraq_n_u64(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vsra_nv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vrsra_n_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8qi (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv8qi (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsra_n_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4hi (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv4hi (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsra_n_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2si (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv2si (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsra_n_s64(__a, __b, __c) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv1di (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv1di (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsra_n_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vrsra_n_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vrsra_n_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vrsra_n_u64(__a, __b, __c) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vrsraq_n_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv16qi (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv16qi (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsraq_n_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8hi (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv8hi (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsraq_n_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4si (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv4si (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsraq_n_s64(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2di (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vsra_nv2di (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vrsraq_n_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vrsraq_n_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vrsraq_n_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vrsraq_n_u64(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsra_nv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, __c, 2); \ + __rv.__o = __builtin_neon_vsra_nv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, __c, 2); \ __rv.__i; \ }) #define vsri_n_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv8qi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv8qi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv4hi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv4hi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv2si (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv2si (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_s64(__a, __b, __c) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv1di (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv1di (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_u64(__a, __b, __c) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_p8(__a, __b, __c) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsri_n_p16(__a, __b, __c) \ ({ \ + poly16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv16qi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv16qi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv8hi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv8hi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv4si (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv4si (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_s64(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv2di (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv2di (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_u64(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_p8(__a, __b, __c) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vsriq_n_p16(__a, __b, __c) \ ({ \ + poly16x8_t __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsri_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsri_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv8qi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv8qi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv4hi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv4hi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv2si (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv2si (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_s64(__a, __b, __c) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv1di (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv1di (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_u64(__a, __b, __c) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_p8(__a, __b, __c) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsli_n_p16(__a, __b, __c) \ ({ \ + poly16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv16qi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv16qi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv8hi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv8hi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv4si (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv4si (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_s64(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv2di (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv2di (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_u64(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_p8(__a, __b, __c) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vsliq_n_p16(__a, __b, __c) \ ({ \ + poly16x8_t __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vsli_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vsli_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vabs_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vabsv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vabs_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vabsv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vabs_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vabsv2si (__ax.val, 1); \ __rv.__i; \ }) #define vabs_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv2sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vabsv2sf (__ax.val, 5); \ __rv.__i; \ }) #define vabsq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vabsv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vabsq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vabsv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vabsq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vabsv4si (__ax.val, 1); \ __rv.__i; \ }) #define vabsq_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vabsv4sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vabsv4sf (__ax.val, 5); \ __rv.__i; \ }) #define vqabs_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqabsv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqabsv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vqabs_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqabsv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqabsv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vqabs_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqabsv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vqabsv2si (__ax.val, 1); \ __rv.__i; \ }) #define vqabsq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqabsv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqabsv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vqabsq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqabsv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqabsv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vqabsq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqabsv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vqabsv4si (__ax.val, 1); \ __rv.__i; \ }) #define vneg_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vnegv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vneg_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vnegv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vneg_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vnegv2si (__ax.val, 1); \ __rv.__i; \ }) #define vneg_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv2sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vnegv2sf (__ax.val, 5); \ __rv.__i; \ }) #define vnegq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vnegv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vnegq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vnegv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vnegq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vnegv4si (__ax.val, 1); \ __rv.__i; \ }) #define vnegq_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vnegv4sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vnegv4sf (__ax.val, 5); \ __rv.__i; \ }) #define vqneg_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqnegv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqnegv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vqneg_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqnegv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqnegv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vqneg_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqnegv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vqnegv2si (__ax.val, 1); \ __rv.__i; \ }) #define vqnegq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqnegv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqnegv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vqnegq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqnegv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqnegv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vqnegq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqnegv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vqnegv4si (__ax.val, 1); \ __rv.__i; \ }) #define vmvn_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vmvnv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vmvn_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vmvnv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vmvn_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vmvnv2si (__ax.val, 1); \ __rv.__i; \ }) #define vmvn_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmvnv8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vmvn_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv4hi ((__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmvnv4hi ((__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vmvn_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmvnv2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vmvn_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv8qi ((__neon_int8x8_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vmvnv8qi ((__neon_int8x8_t) __ax.val, 4); \ __rv.__i; \ }) #define vmvnq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vmvnv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vmvnq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vmvnv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vmvnq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vmvnv4si (__ax.val, 1); \ __rv.__i; \ }) #define vmvnq_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv16qi ((__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmvnv16qi ((__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vmvnq_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv8hi ((__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmvnv8hi ((__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vmvnq_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmvnv4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vmvnq_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmvnv16qi ((__neon_int8x16_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vmvnv16qi ((__neon_int8x16_t) __ax.val, 4); \ __rv.__i; \ }) #define vcls_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclsv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclsv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vcls_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclsv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclsv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vcls_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclsv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vclsv2si (__ax.val, 1); \ __rv.__i; \ }) #define vclsq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclsv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclsv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vclsq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclsv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclsv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vclsq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclsv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vclsv4si (__ax.val, 1); \ __rv.__i; \ }) #define vclz_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclzv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vclz_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclzv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vclz_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vclzv2si (__ax.val, 1); \ __rv.__i; \ }) #define vclz_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vclzv8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vclz_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv4hi ((__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vclzv4hi ((__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vclz_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vclzv2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vclzq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclzv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vclzq_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vclzv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vclzq_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vclzv4si (__ax.val, 1); \ __rv.__i; \ }) #define vclzq_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv16qi ((__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vclzv16qi ((__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vclzq_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv8hi ((__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vclzv8hi ((__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vclzq_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vclzv4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vclzv4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vcnt_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcntv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vcntv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vcnt_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcntv8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcntv8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vcnt_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcntv8qi ((__neon_int8x8_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vcntv8qi ((__neon_int8x8_t) __ax.val, 4); \ __rv.__i; \ }) #define vcntq_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcntv16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vcntv16qi (__ax.val, 1); \ __rv.__i; \ }) #define vcntq_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcntv16qi ((__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcntv16qi ((__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vcntq_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcntv16qi ((__neon_int8x16_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vcntv16qi ((__neon_int8x16_t) __ax.val, 4); \ __rv.__i; \ }) #define vrecpe_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrecpev2sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vrecpev2sf (__ax.val, 5); \ __rv.__i; \ }) #define vrecpe_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrecpev2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrecpev2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vrecpeq_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrecpev4sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vrecpev4sf (__ax.val, 5); \ __rv.__i; \ }) #define vrecpeq_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrecpev4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrecpev4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vrsqrte_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrsqrtev2sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vrsqrtev2sf (__ax.val, 5); \ __rv.__i; \ }) #define vrsqrte_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrsqrtev2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrsqrtev2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vrsqrteq_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrsqrtev4sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vrsqrtev4sf (__ax.val, 5); \ __rv.__i; \ }) #define vrsqrteq_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrsqrtev4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrsqrtev4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vget_lane_s8(__a, __b) \ - (int8_t)__builtin_neon_vget_lanev8qi (__a.val, __b, 1) + ({ \ + int8x8_t __ax = __a; \ + (int8_t)__builtin_neon_vget_lanev8qi (__ax.val, __b, 1); \ + }) #define vget_lane_s16(__a, __b) \ - (int16_t)__builtin_neon_vget_lanev4hi (__a.val, __b, 1) + ({ \ + int16x4_t __ax = __a; \ + (int16_t)__builtin_neon_vget_lanev4hi (__ax.val, __b, 1); \ + }) #define vget_lane_s32(__a, __b) \ - (int32_t)__builtin_neon_vget_lanev2si (__a.val, __b, 1) + ({ \ + int32x2_t __ax = __a; \ + (int32_t)__builtin_neon_vget_lanev2si (__ax.val, __b, 1); \ + }) #define vget_lane_f32(__a, __b) \ - (float32_t)__builtin_neon_vget_lanev2sf (__a.val, __b, 5) + ({ \ + float32x2_t __ax = __a; \ + (float32_t)__builtin_neon_vget_lanev2sf (__ax.val, __b, 5); \ + }) #define vget_lane_u8(__a, __b) \ - (uint8_t)__builtin_neon_vget_lanev8qi ((__neon_int8x8_t) __a.val, __b, 0) + ({ \ + uint8x8_t __ax = __a; \ + (uint8_t)__builtin_neon_vget_lanev8qi ((__neon_int8x8_t) __ax.val, __b, 0); \ + }) #define vget_lane_u16(__a, __b) \ - (uint16_t)__builtin_neon_vget_lanev4hi ((__neon_int16x4_t) __a.val, __b, 0) + ({ \ + uint16x4_t __ax = __a; \ + (uint16_t)__builtin_neon_vget_lanev4hi ((__neon_int16x4_t) __ax.val, __b, 0); \ + }) #define vget_lane_u32(__a, __b) \ - (uint32_t)__builtin_neon_vget_lanev2si ((__neon_int32x2_t) __a.val, __b, 0) + ({ \ + uint32x2_t __ax = __a; \ + (uint32_t)__builtin_neon_vget_lanev2si ((__neon_int32x2_t) __ax.val, __b, 0); \ + }) #define vget_lane_p8(__a, __b) \ - (poly8_t)__builtin_neon_vget_lanev8qi ((__neon_int8x8_t) __a.val, __b, 4) + ({ \ + poly8x8_t __ax = __a; \ + (poly8_t)__builtin_neon_vget_lanev8qi ((__neon_int8x8_t) __ax.val, __b, 4); \ + }) #define vget_lane_p16(__a, __b) \ - (poly16_t)__builtin_neon_vget_lanev4hi ((__neon_int16x4_t) __a.val, __b, 4) + ({ \ + poly16x4_t __ax = __a; \ + (poly16_t)__builtin_neon_vget_lanev4hi ((__neon_int16x4_t) __ax.val, __b, 4); \ + }) #define vget_lane_s64(__a, __b) \ - (int64_t)__builtin_neon_vget_lanev1di (__a.val, __b, 1) + ({ \ + int64x1_t __ax = __a; \ + (int64_t)__builtin_neon_vget_lanev1di (__ax.val, __b, 1); \ + }) #define vget_lane_u64(__a, __b) \ - (uint64_t)__builtin_neon_vget_lanev1di ((__neon_int64x1_t) __a.val, __b, 0) + ({ \ + uint64x1_t __ax = __a; \ + (uint64_t)__builtin_neon_vget_lanev1di ((__neon_int64x1_t) __ax.val, __b, 0); \ + }) #define vgetq_lane_s8(__a, __b) \ - (int8_t)__builtin_neon_vget_lanev16qi (__a.val, __b, 1) + ({ \ + int8x16_t __ax = __a; \ + (int8_t)__builtin_neon_vget_lanev16qi (__ax.val, __b, 1); \ + }) #define vgetq_lane_s16(__a, __b) \ - (int16_t)__builtin_neon_vget_lanev8hi (__a.val, __b, 1) + ({ \ + int16x8_t __ax = __a; \ + (int16_t)__builtin_neon_vget_lanev8hi (__ax.val, __b, 1); \ + }) #define vgetq_lane_s32(__a, __b) \ - (int32_t)__builtin_neon_vget_lanev4si (__a.val, __b, 1) + ({ \ + int32x4_t __ax = __a; \ + (int32_t)__builtin_neon_vget_lanev4si (__ax.val, __b, 1); \ + }) #define vgetq_lane_f32(__a, __b) \ - (float32_t)__builtin_neon_vget_lanev4sf (__a.val, __b, 5) + ({ \ + float32x4_t __ax = __a; \ + (float32_t)__builtin_neon_vget_lanev4sf (__ax.val, __b, 5); \ + }) #define vgetq_lane_u8(__a, __b) \ - (uint8_t)__builtin_neon_vget_lanev16qi ((__neon_int8x16_t) __a.val, __b, 0) + ({ \ + uint8x16_t __ax = __a; \ + (uint8_t)__builtin_neon_vget_lanev16qi ((__neon_int8x16_t) __ax.val, __b, 0); \ + }) #define vgetq_lane_u16(__a, __b) \ - (uint16_t)__builtin_neon_vget_lanev8hi ((__neon_int16x8_t) __a.val, __b, 0) + ({ \ + uint16x8_t __ax = __a; \ + (uint16_t)__builtin_neon_vget_lanev8hi ((__neon_int16x8_t) __ax.val, __b, 0); \ + }) #define vgetq_lane_u32(__a, __b) \ - (uint32_t)__builtin_neon_vget_lanev4si ((__neon_int32x4_t) __a.val, __b, 0) + ({ \ + uint32x4_t __ax = __a; \ + (uint32_t)__builtin_neon_vget_lanev4si ((__neon_int32x4_t) __ax.val, __b, 0); \ + }) #define vgetq_lane_p8(__a, __b) \ - (poly8_t)__builtin_neon_vget_lanev16qi ((__neon_int8x16_t) __a.val, __b, 4) + ({ \ + poly8x16_t __ax = __a; \ + (poly8_t)__builtin_neon_vget_lanev16qi ((__neon_int8x16_t) __ax.val, __b, 4); \ + }) #define vgetq_lane_p16(__a, __b) \ - (poly16_t)__builtin_neon_vget_lanev8hi ((__neon_int16x8_t) __a.val, __b, 4) + ({ \ + poly16x8_t __ax = __a; \ + (poly16_t)__builtin_neon_vget_lanev8hi ((__neon_int16x8_t) __ax.val, __b, 4); \ + }) #define vgetq_lane_s64(__a, __b) \ - (int64_t)__builtin_neon_vget_lanev2di (__a.val, __b, 1) + ({ \ + int64x2_t __ax = __a; \ + (int64_t)__builtin_neon_vget_lanev2di (__ax.val, __b, 1); \ + }) #define vgetq_lane_u64(__a, __b) \ - (uint64_t)__builtin_neon_vget_lanev2di ((__neon_int64x2_t) __a.val, __b, 0) + ({ \ + uint64x2_t __ax = __a; \ + (uint64_t)__builtin_neon_vget_lanev2di ((__neon_int64x2_t) __ax.val, __b, 0); \ + }) #define vset_lane_s8(__a, __b, __c) \ ({ \ + int8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev8qi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_s16(__a, __b, __c) \ ({ \ + int16_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev4hi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_s32(__a, __b, __c) \ ({ \ + int32_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev2si (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev2si ((__builtin_neon_si) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_f32(__a, __b, __c) \ ({ \ + float32_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev2sf (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev2sf (__ax, __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_u8(__a, __b, __c) \ ({ \ + uint8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev8qi ((int8_t) __a, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __ax, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_u16(__a, __b, __c) \ ({ \ + uint16_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev4hi ((int16_t) __a, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __ax, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_u32(__a, __b, __c) \ ({ \ + uint32_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev2si ((int32_t) __a, (__neon_int32x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev2si ((__builtin_neon_si) __ax, (__neon_int32x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_p8(__a, __b, __c) \ ({ \ + poly8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev8qi ((int8_t) __a, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __ax, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_p16(__a, __b, __c) \ ({ \ + poly16_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev4hi ((int16_t) __a, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __ax, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_s64(__a, __b, __c) \ ({ \ + int64_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev1di (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev1di ((__builtin_neon_di) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vset_lane_u64(__a, __b, __c) \ ({ \ + uint64_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev1di ((int64_t) __a, (__neon_int64x1_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev1di ((__builtin_neon_di) __ax, (__neon_int64x1_t) __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_s8(__a, __b, __c) \ ({ \ + int8_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev16qi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_s16(__a, __b, __c) \ ({ \ + int16_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev8hi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_s32(__a, __b, __c) \ ({ \ + int32_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev4si (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev4si ((__builtin_neon_si) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_f32(__a, __b, __c) \ ({ \ + float32_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev4sf (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev4sf (__ax, __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_u8(__a, __b, __c) \ ({ \ + uint8_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev16qi ((int8_t) __a, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __ax, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_u16(__a, __b, __c) \ ({ \ + uint16_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev8hi ((int16_t) __a, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __ax, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_u32(__a, __b, __c) \ ({ \ + uint32_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev4si ((int32_t) __a, (__neon_int32x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev4si ((__builtin_neon_si) __ax, (__neon_int32x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_p8(__a, __b, __c) \ ({ \ + poly8_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev16qi ((int8_t) __a, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __ax, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_p16(__a, __b, __c) \ ({ \ + poly16_t __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev8hi ((int16_t) __a, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __ax, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_s64(__a, __b, __c) \ ({ \ + int64_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev2di (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev2di ((__builtin_neon_di) __ax, __bx.val, __c); \ __rv.__i; \ }) #define vsetq_lane_u64(__a, __b, __c) \ ({ \ + uint64_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vset_lanev2di ((int64_t) __a, (__neon_int64x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vset_lanev2di ((__builtin_neon_di) __ax, (__neon_int64x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vcreate_s8(__a) \ ({ \ + uint64_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev8qi ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev8qi ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_s16(__a) \ ({ \ + uint64_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev4hi ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev4hi ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_s32(__a) \ ({ \ + uint64_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev2si ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev2si ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_s64(__a) \ ({ \ + uint64_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev1di ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev1di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_f32(__a) \ ({ \ + uint64_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev2sf ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev2sf ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_u8(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev8qi ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev8qi ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_u16(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev4hi ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev4hi ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_u32(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev2si ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev2si ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_u64(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev1di ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev1di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_p8(__a) \ ({ \ + uint64_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev8qi ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev8qi ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vcreate_p16(__a) \ ({ \ + uint64_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcreatev4hi ((int64_t) __a); \ + __rv.__o = __builtin_neon_vcreatev4hi ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vdup_n_s8(__a) \ ({ \ + int8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8qi (__a); \ + __rv.__o = __builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vdup_n_s16(__a) \ ({ \ + int16_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4hi (__a); \ + __rv.__o = __builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vdup_n_s32(__a) \ ({ \ + int32_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2si (__a); \ + __rv.__o = __builtin_neon_vdup_nv2si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vdup_n_f32(__a) \ ({ \ + float32_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2sf (__a); \ + __rv.__o = __builtin_neon_vdup_nv2sf (__ax); \ __rv.__i; \ }) #define vdup_n_u8(__a) \ ({ \ + uint8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vdup_n_u16(__a) \ ({ \ + uint16_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vdup_n_u32(__a) \ ({ \ + uint32_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2si ((int32_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv2si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vdup_n_p8(__a) \ ({ \ + poly8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vdup_n_p16(__a) \ ({ \ + poly16_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vdup_n_s64(__a) \ ({ \ + int64_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv1di (__a); \ + __rv.__o = __builtin_neon_vdup_nv1di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vdup_n_u64(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv1di ((int64_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv1di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vdupq_n_s8(__a) \ ({ \ + int8_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv16qi (__a); \ + __rv.__o = __builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vdupq_n_s16(__a) \ ({ \ + int16_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8hi (__a); \ + __rv.__o = __builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vdupq_n_s32(__a) \ ({ \ + int32_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4si (__a); \ + __rv.__o = __builtin_neon_vdup_nv4si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vdupq_n_f32(__a) \ ({ \ + float32_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4sf (__a); \ + __rv.__o = __builtin_neon_vdup_nv4sf (__ax); \ __rv.__i; \ }) #define vdupq_n_u8(__a) \ ({ \ + uint8_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv16qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vdupq_n_u16(__a) \ ({ \ + uint16_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vdupq_n_u32(__a) \ ({ \ + uint32_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4si ((int32_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv4si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vdupq_n_p8(__a) \ ({ \ + poly8_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv16qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vdupq_n_p16(__a) \ ({ \ + poly16_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vdupq_n_s64(__a) \ ({ \ + int64_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2di (__a); \ + __rv.__o = __builtin_neon_vdup_nv2di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vdupq_n_u64(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2di ((int64_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv2di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vmov_n_s8(__a) \ ({ \ + int8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8qi (__a); \ + __rv.__o = __builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vmov_n_s16(__a) \ ({ \ + int16_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4hi (__a); \ + __rv.__o = __builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vmov_n_s32(__a) \ ({ \ + int32_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2si (__a); \ + __rv.__o = __builtin_neon_vdup_nv2si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vmov_n_f32(__a) \ ({ \ + float32_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2sf (__a); \ + __rv.__o = __builtin_neon_vdup_nv2sf (__ax); \ __rv.__i; \ }) #define vmov_n_u8(__a) \ ({ \ + uint8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vmov_n_u16(__a) \ ({ \ + uint16_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vmov_n_u32(__a) \ ({ \ + uint32_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2si ((int32_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv2si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vmov_n_p8(__a) \ ({ \ + poly8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vmov_n_p16(__a) \ ({ \ + poly16_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vmov_n_s64(__a) \ ({ \ + int64_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv1di (__a); \ + __rv.__o = __builtin_neon_vdup_nv1di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vmov_n_u64(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv1di ((int64_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv1di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vmovq_n_s8(__a) \ ({ \ + int8_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv16qi (__a); \ + __rv.__o = __builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vmovq_n_s16(__a) \ ({ \ + int16_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8hi (__a); \ + __rv.__o = __builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vmovq_n_s32(__a) \ ({ \ + int32_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4si (__a); \ + __rv.__o = __builtin_neon_vdup_nv4si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vmovq_n_f32(__a) \ ({ \ + float32_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4sf (__a); \ + __rv.__o = __builtin_neon_vdup_nv4sf (__ax); \ __rv.__i; \ }) #define vmovq_n_u8(__a) \ ({ \ + uint8_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv16qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vmovq_n_u16(__a) \ ({ \ + uint16_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vmovq_n_u32(__a) \ ({ \ + uint32_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv4si ((int32_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv4si ((__builtin_neon_si) __ax); \ __rv.__i; \ }) #define vmovq_n_p8(__a) \ ({ \ + poly8_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv16qi ((int8_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __ax); \ __rv.__i; \ }) #define vmovq_n_p16(__a) \ ({ \ + poly16_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv8hi ((int16_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __ax); \ __rv.__i; \ }) #define vmovq_n_s64(__a) \ ({ \ + int64_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2di (__a); \ + __rv.__o = __builtin_neon_vdup_nv2di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vmovq_n_u64(__a) \ ({ \ + uint64_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_nv2di ((int64_t) __a); \ + __rv.__o = __builtin_neon_vdup_nv2di ((__builtin_neon_di) __ax); \ __rv.__i; \ }) #define vdup_lane_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev8qi (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev8qi (__ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev4hi (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev4hi (__ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev2si (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev2si (__ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev2sf (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev2sf (__ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev8qi ((__neon_int8x8_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev8qi ((__neon_int8x8_t) __ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev4hi ((__neon_int16x4_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev4hi ((__neon_int16x4_t) __ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev2si ((__neon_int32x2_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev2si ((__neon_int32x2_t) __ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev8qi ((__neon_int8x8_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev8qi ((__neon_int8x8_t) __ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_p16(__a, __b) \ ({ \ + poly16x4_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev4hi ((__neon_int16x4_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev4hi ((__neon_int16x4_t) __ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev1di (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev1di (__ax.val, __b); \ __rv.__i; \ }) #define vdup_lane_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev1di ((__neon_int64x1_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev1di ((__neon_int64x1_t) __ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev16qi (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev16qi (__ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev8hi (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev8hi (__ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev4si (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev4si (__ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev4sf (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev4sf (__ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev16qi ((__neon_int8x8_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev16qi ((__neon_int8x8_t) __ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev8hi ((__neon_int16x4_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev8hi ((__neon_int16x4_t) __ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev4si ((__neon_int32x2_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev4si ((__neon_int32x2_t) __ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev16qi ((__neon_int8x8_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev16qi ((__neon_int8x8_t) __ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_p16(__a, __b) \ ({ \ + poly16x4_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev8hi ((__neon_int16x4_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev8hi ((__neon_int16x4_t) __ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev2di (__a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev2di (__ax.val, __b); \ __rv.__i; \ }) #define vdupq_lane_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vdup_lanev2di ((__neon_int64x1_t) __a.val, __b); \ + __rv.__o = __builtin_neon_vdup_lanev2di ((__neon_int64x1_t) __ax.val, __b); \ __rv.__i; \ }) #define vcombine_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev8qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vcombinev8qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vcombine_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev4hi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vcombinev4hi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vcombine_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev2si (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vcombinev2si (__ax.val, __bx.val); \ __rv.__i; \ }) #define vcombine_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev1di (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vcombinev1di (__ax.val, __bx.val); \ __rv.__i; \ }) #define vcombine_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev2sf (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vcombinev2sf (__ax.val, __bx.val); \ __rv.__i; \ }) #define vcombine_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vcombinev8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vcombine_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vcombinev4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vcombine_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val); \ + __rv.__o = __builtin_neon_vcombinev2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val); \ __rv.__i; \ }) #define vcombine_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val); \ + __rv.__o = __builtin_neon_vcombinev1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val); \ __rv.__i; \ }) #define vcombine_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vcombinev8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vcombine_p16(__a, __b) \ ({ \ + poly16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcombinev4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vcombinev4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vget_high_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv16qi (__a.val); \ + __rv.__o = __builtin_neon_vget_highv16qi (__ax.val); \ __rv.__i; \ }) #define vget_high_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv8hi (__a.val); \ + __rv.__o = __builtin_neon_vget_highv8hi (__ax.val); \ __rv.__i; \ }) #define vget_high_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv4si (__a.val); \ + __rv.__o = __builtin_neon_vget_highv4si (__ax.val); \ __rv.__i; \ }) #define vget_high_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv2di (__a.val); \ + __rv.__o = __builtin_neon_vget_highv2di (__ax.val); \ __rv.__i; \ }) #define vget_high_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv4sf (__a.val); \ + __rv.__o = __builtin_neon_vget_highv4sf (__ax.val); \ __rv.__i; \ }) #define vget_high_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vget_highv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vget_high_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vget_highv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vget_high_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vget_highv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vget_high_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vget_highv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vget_high_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vget_highv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vget_high_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_highv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vget_highv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vget_low_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv16qi (__a.val); \ + __rv.__o = __builtin_neon_vget_lowv16qi (__ax.val); \ __rv.__i; \ }) #define vget_low_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv8hi (__a.val); \ + __rv.__o = __builtin_neon_vget_lowv8hi (__ax.val); \ __rv.__i; \ }) #define vget_low_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv4si (__a.val); \ + __rv.__o = __builtin_neon_vget_lowv4si (__ax.val); \ __rv.__i; \ }) #define vget_low_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv2di (__a.val); \ + __rv.__o = __builtin_neon_vget_lowv2di (__ax.val); \ __rv.__i; \ }) #define vget_low_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv4sf (__a.val); \ + __rv.__o = __builtin_neon_vget_lowv4sf (__ax.val); \ __rv.__i; \ }) #define vget_low_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vget_lowv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vget_low_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vget_lowv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vget_low_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vget_lowv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vget_low_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vget_lowv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vget_low_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vget_lowv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vget_low_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vget_lowv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vget_lowv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vcvt_s32_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv2sf (__a.val, 1); \ + __rv.__o = __builtin_neon_vcvtv2sf (__ax.val, 1); \ __rv.__i; \ }) #define vcvt_f32_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vcvtv2si (__ax.val, 1); \ __rv.__i; \ }) #define vcvt_f32_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcvtv2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vcvt_u32_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv2sf (__a.val, 0); \ + __rv.__o = __builtin_neon_vcvtv2sf (__ax.val, 0); \ __rv.__i; \ }) #define vcvtq_s32_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv4sf (__a.val, 1); \ + __rv.__o = __builtin_neon_vcvtv4sf (__ax.val, 1); \ __rv.__i; \ }) #define vcvtq_f32_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vcvtv4si (__ax.val, 1); \ __rv.__i; \ }) #define vcvtq_f32_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vcvtv4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vcvtq_u32_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvtv4sf (__a.val, 0); \ + __rv.__o = __builtin_neon_vcvtv4sf (__ax.val, 0); \ __rv.__i; \ }) #define vcvt_n_s32_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv2sf (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vcvt_nv2sf (__ax.val, __b, 1); \ __rv.__i; \ }) #define vcvt_n_f32_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vcvt_nv2si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vcvt_n_f32_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv2si ((__neon_int32x2_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vcvt_nv2si ((__neon_int32x2_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vcvt_n_u32_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv2sf (__a.val, __b, 0); \ + __rv.__o = __builtin_neon_vcvt_nv2sf (__ax.val, __b, 0); \ __rv.__i; \ }) #define vcvtq_n_s32_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv4sf (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vcvt_nv4sf (__ax.val, __b, 1); \ __rv.__i; \ }) #define vcvtq_n_f32_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vcvt_nv4si (__ax.val, __b, 1); \ __rv.__i; \ }) #define vcvtq_n_f32_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv4si ((__neon_int32x4_t) __a.val, __b, 0); \ + __rv.__o = __builtin_neon_vcvt_nv4si ((__neon_int32x4_t) __ax.val, __b, 0); \ __rv.__i; \ }) #define vcvtq_n_u32_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vcvt_nv4sf (__a.val, __b, 0); \ + __rv.__o = __builtin_neon_vcvt_nv4sf (__ax.val, __b, 0); \ __rv.__i; \ }) #define vmovn_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovnv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vmovnv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vmovn_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovnv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vmovnv4si (__ax.val, 1); \ __rv.__i; \ }) #define vmovn_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovnv2di (__a.val, 1); \ + __rv.__o = __builtin_neon_vmovnv2di (__ax.val, 1); \ __rv.__i; \ }) #define vmovn_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovnv8hi ((__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmovnv8hi ((__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vmovn_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovnv4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmovnv4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vmovn_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovnv2di ((__neon_int64x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmovnv2di ((__neon_int64x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vqmovn_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovnv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqmovnv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vqmovn_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovnv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vqmovnv4si (__ax.val, 1); \ __rv.__i; \ }) #define vqmovn_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovnv2di (__a.val, 1); \ + __rv.__o = __builtin_neon_vqmovnv2di (__ax.val, 1); \ __rv.__i; \ }) #define vqmovn_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovnv8hi ((__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vqmovnv8hi ((__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vqmovn_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovnv4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vqmovnv4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vqmovn_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovnv2di ((__neon_int64x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vqmovnv2di ((__neon_int64x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vqmovun_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovunv8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vqmovunv8hi (__ax.val, 1); \ __rv.__i; \ }) #define vqmovun_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovunv4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vqmovunv4si (__ax.val, 1); \ __rv.__i; \ }) #define vqmovun_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqmovunv2di (__a.val, 1); \ + __rv.__o = __builtin_neon_vqmovunv2di (__ax.val, 1); \ __rv.__i; \ }) #define vmovl_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovlv8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vmovlv8qi (__ax.val, 1); \ __rv.__i; \ }) #define vmovl_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovlv4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vmovlv4hi (__ax.val, 1); \ __rv.__i; \ }) #define vmovl_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovlv2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vmovlv2si (__ax.val, 1); \ __rv.__i; \ }) #define vmovl_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovlv8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmovlv8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vmovl_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovlv4hi ((__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmovlv4hi ((__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vmovl_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmovlv2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vmovlv2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vtbl1_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl1v8qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtbl1v8qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtbl1_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl1v8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl1v8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbl1_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl1v8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl1v8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbl2_s8(__a, __b) \ ({ \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl2v8qi (__au.__o, __b.val); \ + __rv.__o = __builtin_neon_vtbl2v8qi (__au.__o, __bx.val); \ __rv.__i; \ }) #define vtbl2_u8(__a, __b) \ ({ \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl2v8qi (__au.__o, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl2v8qi (__au.__o, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbl2_p8(__a, __b) \ ({ \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \ + uint8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl2v8qi (__au.__o, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl2v8qi (__au.__o, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbl3_s8(__a, __b) \ ({ \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl3v8qi (__au.__o, __b.val); \ + __rv.__o = __builtin_neon_vtbl3v8qi (__au.__o, __bx.val); \ __rv.__i; \ }) #define vtbl3_u8(__a, __b) \ ({ \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl3v8qi (__au.__o, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl3v8qi (__au.__o, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbl3_p8(__a, __b) \ ({ \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \ + uint8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl3v8qi (__au.__o, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl3v8qi (__au.__o, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbl4_s8(__a, __b) \ ({ \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl4v8qi (__au.__o, __b.val); \ + __rv.__o = __builtin_neon_vtbl4v8qi (__au.__o, __bx.val); \ __rv.__i; \ }) #define vtbl4_u8(__a, __b) \ ({ \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl4v8qi (__au.__o, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl4v8qi (__au.__o, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbl4_p8(__a, __b) \ ({ \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \ + uint8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbl4v8qi (__au.__o, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtbl4v8qi (__au.__o, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtbx1_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx1v8qi (__a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vtbx1v8qi (__ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vtbx1_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx1v8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx1v8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vtbx1_p8(__a, __b, __c) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx1v8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx1v8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vtbx2_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx2v8qi (__a.val, __bu.__o, __c.val); \ + __rv.__o = __builtin_neon_vtbx2v8qi (__ax.val, __bu.__o, __cx.val); \ __rv.__i; \ }) #define vtbx2_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx2v8qi ((__neon_int8x8_t) __a.val, __bu.__o, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx2v8qi ((__neon_int8x8_t) __ax.val, __bu.__o, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vtbx2_p8(__a, __b, __c) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + uint8x8_t __cx = __c; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx2v8qi ((__neon_int8x8_t) __a.val, __bu.__o, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx2v8qi ((__neon_int8x8_t) __ax.val, __bu.__o, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vtbx3_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx3v8qi (__a.val, __bu.__o, __c.val); \ + __rv.__o = __builtin_neon_vtbx3v8qi (__ax.val, __bu.__o, __cx.val); \ __rv.__i; \ }) #define vtbx3_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx3v8qi ((__neon_int8x8_t) __a.val, __bu.__o, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx3v8qi ((__neon_int8x8_t) __ax.val, __bu.__o, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vtbx3_p8(__a, __b, __c) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + uint8x8_t __cx = __c; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx3v8qi ((__neon_int8x8_t) __a.val, __bu.__o, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx3v8qi ((__neon_int8x8_t) __ax.val, __bu.__o, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vtbx4_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx4v8qi (__a.val, __bu.__o, __c.val); \ + __rv.__o = __builtin_neon_vtbx4v8qi (__ax.val, __bu.__o, __cx.val); \ __rv.__i; \ }) #define vtbx4_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx4v8qi ((__neon_int8x8_t) __a.val, __bu.__o, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx4v8qi ((__neon_int8x8_t) __ax.val, __bu.__o, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vtbx4_p8(__a, __b, __c) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + uint8x8_t __cx = __c; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtbx4v8qi ((__neon_int8x8_t) __a.val, __bu.__o, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vtbx4v8qi ((__neon_int8x8_t) __ax.val, __bu.__o, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vmul_lane_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmul_lanev4hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vmul_lane_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmul_lanev2si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vmul_lane_f32(__a, __b, __c) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev2sf (__a.val, __b.val, __c, 5); \ + __rv.__o = __builtin_neon_vmul_lanev2sf (__ax.val, __bx.val, __c, 5); \ __rv.__i; \ }) #define vmul_lane_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vmul_lanev4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vmul_lane_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vmul_lanev2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vmulq_lane_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev8hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmul_lanev8hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vmulq_lane_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev4si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmul_lanev4si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vmulq_lane_f32(__a, __b, __c) \ ({ \ + float32x4_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev4sf (__a.val, __b.val, __c, 5); \ + __rv.__o = __builtin_neon_vmul_lanev4sf (__ax.val, __bx.val, __c, 5); \ __rv.__i; \ }) #define vmulq_lane_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev8hi ((__neon_int16x8_t) __a.val, (__neon_int16x4_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vmul_lanev8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x4_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vmulq_lane_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_lanev4si ((__neon_int32x4_t) __a.val, (__neon_int32x2_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vmul_lanev4si ((__neon_int32x4_t) __ax.val, (__neon_int32x2_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vmla_lane_s16(__a, __b, __c, __d) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev4hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmla_lanev4hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmla_lane_s32(__a, __b, __c, __d) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev2si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmla_lanev2si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmla_lane_f32(__a, __b, __c, __d) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ + float32x2_t __cx = __c; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev2sf (__a.val, __b.val, __c.val, __d, 5); \ + __rv.__o = __builtin_neon_vmla_lanev2sf (__ax.val, __bx.val, __cx.val, __d, 5); \ __rv.__i; \ }) #define vmla_lane_u16(__a, __b, __c, __d) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmla_lanev4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmla_lane_u32(__a, __b, __c, __d) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmla_lanev2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlaq_lane_s16(__a, __b, __c, __d) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev8hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmla_lanev8hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlaq_lane_s32(__a, __b, __c, __d) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev4si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmla_lanev4si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlaq_lane_f32(__a, __b, __c, __d) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ + float32x2_t __cx = __c; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev4sf (__a.val, __b.val, __c.val, __d, 5); \ + __rv.__o = __builtin_neon_vmla_lanev4sf (__ax.val, __bx.val, __cx.val, __d, 5); \ __rv.__i; \ }) #define vmlaq_lane_u16(__a, __b, __c, __d) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (__neon_int16x4_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmla_lanev8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__neon_int16x4_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlaq_lane_u32(__a, __b, __c, __d) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_lanev4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (__neon_int32x2_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmla_lanev4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__neon_int32x2_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlal_lane_s16(__a, __b, __c, __d) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_lanev4hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmlal_lanev4hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlal_lane_s32(__a, __b, __c, __d) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_lanev2si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmlal_lanev2si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlal_lane_u16(__a, __b, __c, __d) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_lanev4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmlal_lanev4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlal_lane_u32(__a, __b, __c, __d) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_lanev2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmlal_lanev2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vqdmlal_lane_s16(__a, __b, __c, __d) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlal_lanev4hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vqdmlal_lanev4hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vqdmlal_lane_s32(__a, __b, __c, __d) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlal_lanev2si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vqdmlal_lanev2si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmls_lane_s16(__a, __b, __c, __d) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev4hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmls_lanev4hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmls_lane_s32(__a, __b, __c, __d) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev2si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmls_lanev2si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmls_lane_f32(__a, __b, __c, __d) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ + float32x2_t __cx = __c; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev2sf (__a.val, __b.val, __c.val, __d, 5); \ + __rv.__o = __builtin_neon_vmls_lanev2sf (__ax.val, __bx.val, __cx.val, __d, 5); \ __rv.__i; \ }) #define vmls_lane_u16(__a, __b, __c, __d) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmls_lanev4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmls_lane_u32(__a, __b, __c, __d) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmls_lanev2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlsq_lane_s16(__a, __b, __c, __d) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev8hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmls_lanev8hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlsq_lane_s32(__a, __b, __c, __d) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev4si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmls_lanev4si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlsq_lane_f32(__a, __b, __c, __d) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ + float32x2_t __cx = __c; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev4sf (__a.val, __b.val, __c.val, __d, 5); \ + __rv.__o = __builtin_neon_vmls_lanev4sf (__ax.val, __bx.val, __cx.val, __d, 5); \ __rv.__i; \ }) #define vmlsq_lane_u16(__a, __b, __c, __d) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (__neon_int16x4_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmls_lanev8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__neon_int16x4_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlsq_lane_u32(__a, __b, __c, __d) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_lanev4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (__neon_int32x2_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmls_lanev4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__neon_int32x2_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlsl_lane_s16(__a, __b, __c, __d) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_lanev4hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmlsl_lanev4hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlsl_lane_s32(__a, __b, __c, __d) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_lanev2si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vmlsl_lanev2si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmlsl_lane_u16(__a, __b, __c, __d) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_lanev4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmlsl_lanev4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vmlsl_lane_u32(__a, __b, __c, __d) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_lanev2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val, __d, 0); \ + __rv.__o = __builtin_neon_vmlsl_lanev2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val, __d, 0); \ __rv.__i; \ }) #define vqdmlsl_lane_s16(__a, __b, __c, __d) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlsl_lanev4hi (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vqdmlsl_lanev4hi (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vqdmlsl_lane_s32(__a, __b, __c, __d) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlsl_lanev2si (__a.val, __b.val, __c.val, __d, 1); \ + __rv.__o = __builtin_neon_vqdmlsl_lanev2si (__ax.val, __bx.val, __cx.val, __d, 1); \ __rv.__i; \ }) #define vmull_lane_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_lanev4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmull_lanev4hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vmull_lane_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_lanev2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmull_lanev2si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vmull_lane_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_lanev4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vmull_lanev4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vmull_lane_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_lanev2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, __c, 0); \ + __rv.__o = __builtin_neon_vmull_lanev2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, __c, 0); \ __rv.__i; \ }) #define vqdmull_lane_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmull_lanev4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmull_lanev4hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vqdmull_lane_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmull_lanev2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmull_lanev2si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vqdmulhq_lane_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev8hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmulh_lanev8hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vqdmulhq_lane_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev4si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmulh_lanev4si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vqdmulh_lane_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmulh_lanev4hi (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vqdmulh_lane_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmulh_lanev2si (__ax.val, __bx.val, __c, 1); \ __rv.__i; \ }) #define vqrdmulhq_lane_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev8hi (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vqdmulh_lanev8hi (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vqrdmulhq_lane_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev4si (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vqdmulh_lanev4si (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vqrdmulh_lane_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev4hi (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vqdmulh_lanev4hi (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vqrdmulh_lane_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_lanev2si (__a.val, __b.val, __c, 3); \ + __rv.__o = __builtin_neon_vqdmulh_lanev2si (__ax.val, __bx.val, __c, 3); \ __rv.__i; \ }) #define vmul_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vmul_nv4hi (__ax.val, (__builtin_neon_hi) __bx, 1); \ __rv.__i; \ }) #define vmul_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vmul_nv2si (__ax.val, (__builtin_neon_si) __bx, 1); \ __rv.__i; \ }) #define vmul_n_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv2sf (__a.val, __b, 5); \ + __rv.__o = __builtin_neon_vmul_nv2sf (__ax.val, __bx, 5); \ __rv.__i; \ }) #define vmul_n_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv4hi ((__neon_int16x4_t) __a.val, (int16_t) __b, 0); \ + __rv.__o = __builtin_neon_vmul_nv4hi ((__neon_int16x4_t) __ax.val, (__builtin_neon_hi) __bx, 0); \ __rv.__i; \ }) #define vmul_n_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv2si ((__neon_int32x2_t) __a.val, (int32_t) __b, 0); \ + __rv.__o = __builtin_neon_vmul_nv2si ((__neon_int32x2_t) __ax.val, (__builtin_neon_si) __bx, 0); \ __rv.__i; \ }) #define vmulq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vmul_nv8hi (__ax.val, (__builtin_neon_hi) __bx, 1); \ __rv.__i; \ }) #define vmulq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vmul_nv4si (__ax.val, (__builtin_neon_si) __bx, 1); \ __rv.__i; \ }) #define vmulq_n_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv4sf (__a.val, __b, 5); \ + __rv.__o = __builtin_neon_vmul_nv4sf (__ax.val, __bx, 5); \ __rv.__i; \ }) #define vmulq_n_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv8hi ((__neon_int16x8_t) __a.val, (int16_t) __b, 0); \ + __rv.__o = __builtin_neon_vmul_nv8hi ((__neon_int16x8_t) __ax.val, (__builtin_neon_hi) __bx, 0); \ __rv.__i; \ }) #define vmulq_n_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmul_nv4si ((__neon_int32x4_t) __a.val, (int32_t) __b, 0); \ + __rv.__o = __builtin_neon_vmul_nv4si ((__neon_int32x4_t) __ax.val, (__builtin_neon_si) __bx, 0); \ __rv.__i; \ }) #define vmull_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vmull_nv4hi (__ax.val, (__builtin_neon_hi) __bx, 1); \ __rv.__i; \ }) #define vmull_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vmull_nv2si (__ax.val, (__builtin_neon_si) __bx, 1); \ __rv.__i; \ }) #define vmull_n_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_nv4hi ((__neon_int16x4_t) __a.val, (int16_t) __b, 0); \ + __rv.__o = __builtin_neon_vmull_nv4hi ((__neon_int16x4_t) __ax.val, (__builtin_neon_hi) __bx, 0); \ __rv.__i; \ }) #define vmull_n_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmull_nv2si ((__neon_int32x2_t) __a.val, (int32_t) __b, 0); \ + __rv.__o = __builtin_neon_vmull_nv2si ((__neon_int32x2_t) __ax.val, (__builtin_neon_si) __bx, 0); \ __rv.__i; \ }) #define vqdmull_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmull_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqdmull_nv4hi (__ax.val, (__builtin_neon_hi) __bx, 1); \ __rv.__i; \ }) #define vqdmull_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmull_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqdmull_nv2si (__ax.val, (__builtin_neon_si) __bx, 1); \ __rv.__i; \ }) #define vqdmulhq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv8hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqdmulh_nv8hi (__ax.val, (__builtin_neon_hi) __bx, 1); \ __rv.__i; \ }) #define vqdmulhq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv4si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqdmulh_nv4si (__ax.val, (__builtin_neon_si) __bx, 1); \ __rv.__i; \ }) #define vqdmulh_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv4hi (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqdmulh_nv4hi (__ax.val, (__builtin_neon_hi) __bx, 1); \ __rv.__i; \ }) #define vqdmulh_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv2si (__a.val, __b, 1); \ + __rv.__o = __builtin_neon_vqdmulh_nv2si (__ax.val, (__builtin_neon_si) __bx, 1); \ __rv.__i; \ }) #define vqrdmulhq_n_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv8hi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqdmulh_nv8hi (__ax.val, (__builtin_neon_hi) __bx, 3); \ __rv.__i; \ }) #define vqrdmulhq_n_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv4si (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqdmulh_nv4si (__ax.val, (__builtin_neon_si) __bx, 3); \ __rv.__i; \ }) #define vqrdmulh_n_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv4hi (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqdmulh_nv4hi (__ax.val, (__builtin_neon_hi) __bx, 3); \ __rv.__i; \ }) #define vqrdmulh_n_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmulh_nv2si (__a.val, __b, 3); \ + __rv.__o = __builtin_neon_vqdmulh_nv2si (__ax.val, (__builtin_neon_si) __bx, 3); \ __rv.__i; \ }) #define vmla_n_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmla_nv4hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vmla_n_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmla_nv2si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vmla_n_f32(__a, __b, __c) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ + float32_t __cx = __c; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv2sf (__a.val, __b.val, __c, 5); \ + __rv.__o = __builtin_neon_vmla_nv2sf (__ax.val, __bx.val, __cx, 5); \ __rv.__i; \ }) #define vmla_n_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (int16_t) __c, 0); \ + __rv.__o = __builtin_neon_vmla_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__builtin_neon_hi) __cx, 0); \ __rv.__i; \ }) #define vmla_n_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (int32_t) __c, 0); \ + __rv.__o = __builtin_neon_vmla_nv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__builtin_neon_si) __cx, 0); \ __rv.__i; \ }) #define vmlaq_n_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv8hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmla_nv8hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vmlaq_n_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv4si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmla_nv4si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vmlaq_n_f32(__a, __b, __c) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ + float32_t __cx = __c; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv4sf (__a.val, __b.val, __c, 5); \ + __rv.__o = __builtin_neon_vmla_nv4sf (__ax.val, __bx.val, __cx, 5); \ __rv.__i; \ }) #define vmlaq_n_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (int16_t) __c, 0); \ + __rv.__o = __builtin_neon_vmla_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__builtin_neon_hi) __cx, 0); \ __rv.__i; \ }) #define vmlaq_n_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmla_nv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (int32_t) __c, 0); \ + __rv.__o = __builtin_neon_vmla_nv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__builtin_neon_si) __cx, 0); \ __rv.__i; \ }) #define vmlal_n_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_nv4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmlal_nv4hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vmlal_n_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_nv2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmlal_nv2si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vmlal_n_u16(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_nv4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, (int16_t) __c, 0); \ + __rv.__o = __builtin_neon_vmlal_nv4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__builtin_neon_hi) __cx, 0); \ __rv.__i; \ }) #define vmlal_n_u32(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlal_nv2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, (int32_t) __c, 0); \ + __rv.__o = __builtin_neon_vmlal_nv2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__builtin_neon_si) __cx, 0); \ __rv.__i; \ }) #define vqdmlal_n_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlal_nv4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmlal_nv4hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vqdmlal_n_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlal_nv2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmlal_nv2si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vmls_n_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmls_nv4hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vmls_n_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmls_nv2si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vmls_n_f32(__a, __b, __c) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ + float32_t __cx = __c; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv2sf (__a.val, __b.val, __c, 5); \ + __rv.__o = __builtin_neon_vmls_nv2sf (__ax.val, __bx.val, __cx, 5); \ __rv.__i; \ }) #define vmls_n_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (int16_t) __c, 0); \ + __rv.__o = __builtin_neon_vmls_nv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__builtin_neon_hi) __cx, 0); \ __rv.__i; \ }) #define vmls_n_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (int32_t) __c, 0); \ + __rv.__o = __builtin_neon_vmls_nv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__builtin_neon_si) __cx, 0); \ __rv.__i; \ }) #define vmlsq_n_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv8hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmls_nv8hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vmlsq_n_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv4si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmls_nv4si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vmlsq_n_f32(__a, __b, __c) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ + float32_t __cx = __c; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv4sf (__a.val, __b.val, __c, 5); \ + __rv.__o = __builtin_neon_vmls_nv4sf (__ax.val, __bx.val, __cx, 5); \ __rv.__i; \ }) #define vmlsq_n_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (int16_t) __c, 0); \ + __rv.__o = __builtin_neon_vmls_nv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__builtin_neon_hi) __cx, 0); \ __rv.__i; \ }) #define vmlsq_n_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmls_nv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (int32_t) __c, 0); \ + __rv.__o = __builtin_neon_vmls_nv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__builtin_neon_si) __cx, 0); \ __rv.__i; \ }) #define vmlsl_n_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_nv4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmlsl_nv4hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vmlsl_n_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_nv2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vmlsl_nv2si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vmlsl_n_u16(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_nv4hi ((__neon_int32x4_t) __a.val, (__neon_int16x4_t) __b.val, (int16_t) __c, 0); \ + __rv.__o = __builtin_neon_vmlsl_nv4hi ((__neon_int32x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__builtin_neon_hi) __cx, 0); \ __rv.__i; \ }) #define vmlsl_n_u32(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vmlsl_nv2si ((__neon_int64x2_t) __a.val, (__neon_int32x2_t) __b.val, (int32_t) __c, 0); \ + __rv.__o = __builtin_neon_vmlsl_nv2si ((__neon_int64x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__builtin_neon_si) __cx, 0); \ __rv.__i; \ }) #define vqdmlsl_n_s16(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlsl_nv4hi (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmlsl_nv4hi (__ax.val, __bx.val, (__builtin_neon_hi) __cx, 1); \ __rv.__i; \ }) #define vqdmlsl_n_s32(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vqdmlsl_nv2si (__a.val, __b.val, __c, 1); \ + __rv.__o = __builtin_neon_vqdmlsl_nv2si (__ax.val, __bx.val, (__builtin_neon_si) __cx, 1); \ __rv.__i; \ }) #define vext_s8(__a, __b, __c) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv8qi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv8qi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vext_s16(__a, __b, __c) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv4hi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv4hi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vext_s32(__a, __b, __c) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv2si (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv2si (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vext_s64(__a, __b, __c) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv1di (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv1di (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vext_f32(__a, __b, __c) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv2sf (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv2sf (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vext_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vext_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vext_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vext_u64(__a, __b, __c) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, __c); \ __rv.__i; \ }) #define vext_p8(__a, __b, __c) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vext_p16(__a, __b, __c) \ ({ \ + poly16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vextq_s8(__a, __b, __c) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv16qi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv16qi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vextq_s16(__a, __b, __c) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv8hi (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv8hi (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vextq_s32(__a, __b, __c) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv4si (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv4si (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vextq_s64(__a, __b, __c) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv2di (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv2di (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vextq_f32(__a, __b, __c) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv4sf (__a.val, __b.val, __c); \ + __rv.__o = __builtin_neon_vextv4sf (__ax.val, __bx.val, __c); \ __rv.__i; \ }) #define vextq_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vextq_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vextq_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vextq_u64(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vextq_p8(__a, __b, __c) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vextq_p16(__a, __b, __c) \ ({ \ + poly16x8_t __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vextv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vextv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vrev64_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev64v8qi (__ax.val, 1); \ __rv.__i; \ }) #define vrev64_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev64v4hi (__ax.val, 1); \ __rv.__i; \ }) #define vrev64_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v2si (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev64v2si (__ax.val, 1); \ __rv.__i; \ }) #define vrev64_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v2sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vrev64v2sf (__ax.val, 5); \ __rv.__i; \ }) #define vrev64_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev64v8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev64_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v4hi ((__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev64v4hi ((__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev64_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v2si ((__neon_int32x2_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev64v2si ((__neon_int32x2_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev64_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v8qi ((__neon_int8x8_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev64v8qi ((__neon_int8x8_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev64_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v4hi ((__neon_int16x4_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev64v4hi ((__neon_int16x4_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev64q_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev64v16qi (__ax.val, 1); \ __rv.__i; \ }) #define vrev64q_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev64v8hi (__ax.val, 1); \ __rv.__i; \ }) #define vrev64q_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v4si (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev64v4si (__ax.val, 1); \ __rv.__i; \ }) #define vrev64q_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v4sf (__a.val, 5); \ + __rv.__o = __builtin_neon_vrev64v4sf (__ax.val, 5); \ __rv.__i; \ }) #define vrev64q_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v16qi ((__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev64v16qi ((__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev64q_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v8hi ((__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev64v8hi ((__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev64q_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v4si ((__neon_int32x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev64v4si ((__neon_int32x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev64q_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v16qi ((__neon_int8x16_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev64v16qi ((__neon_int8x16_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev64q_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev64v8hi ((__neon_int16x8_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev64v8hi ((__neon_int16x8_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev32_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev32v8qi (__ax.val, 1); \ __rv.__i; \ }) #define vrev32_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v4hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev32v4hi (__ax.val, 1); \ __rv.__i; \ }) #define vrev32_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev32v8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev32_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v4hi ((__neon_int16x4_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev32v4hi ((__neon_int16x4_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev32_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v8qi ((__neon_int8x8_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev32v8qi ((__neon_int8x8_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev32_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v4hi ((__neon_int16x4_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev32v4hi ((__neon_int16x4_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev32q_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev32v16qi (__ax.val, 1); \ __rv.__i; \ }) #define vrev32q_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v8hi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev32v8hi (__ax.val, 1); \ __rv.__i; \ }) #define vrev32q_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v16qi ((__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev32v16qi ((__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev32q_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v8hi ((__neon_int16x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev32v8hi ((__neon_int16x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev32q_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v16qi ((__neon_int8x16_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev32v16qi ((__neon_int8x16_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev32q_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev32v8hi ((__neon_int16x8_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev32v8hi ((__neon_int16x8_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev16_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev16v8qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev16v8qi (__ax.val, 1); \ __rv.__i; \ }) #define vrev16_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev16v8qi ((__neon_int8x8_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev16v8qi ((__neon_int8x8_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev16_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev16v8qi ((__neon_int8x8_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev16v8qi ((__neon_int8x8_t) __ax.val, 4); \ __rv.__i; \ }) #define vrev16q_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev16v16qi (__a.val, 1); \ + __rv.__o = __builtin_neon_vrev16v16qi (__ax.val, 1); \ __rv.__i; \ }) #define vrev16q_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev16v16qi ((__neon_int8x16_t) __a.val, 0); \ + __rv.__o = __builtin_neon_vrev16v16qi ((__neon_int8x16_t) __ax.val, 0); \ __rv.__i; \ }) #define vrev16q_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vrev16v16qi ((__neon_int8x16_t) __a.val, 4); \ + __rv.__o = __builtin_neon_vrev16v16qi ((__neon_int8x16_t) __ax.val, 4); \ __rv.__i; \ }) #define vbsl_s8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ + int8x8_t __cx = __c; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv8qi ((__neon_int8x8_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv8qi ((__neon_int8x8_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbsl_s16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ + int16x4_t __cx = __c; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv4hi ((__neon_int16x4_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv4hi ((__neon_int16x4_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbsl_s32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ + int32x2_t __cx = __c; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv2si ((__neon_int32x2_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv2si ((__neon_int32x2_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbsl_s64(__a, __b, __c) \ ({ \ + uint64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ + int64x1_t __cx = __c; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv1di ((__neon_int64x1_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv1di ((__neon_int64x1_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbsl_f32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ + float32x2_t __cx = __c; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv2sf ((__neon_int32x2_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv2sf ((__neon_int32x2_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbsl_u8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ + uint8x8_t __cx = __c; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vbsl_u16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ + uint16x4_t __cx = __c; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val); \ __rv.__i; \ }) #define vbsl_u32(__a, __b, __c) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ + uint32x2_t __cx = __c; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, (__neon_int32x2_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, (__neon_int32x2_t) __cx.val); \ __rv.__i; \ }) #define vbsl_u64(__a, __b, __c) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ + uint64x1_t __cx = __c; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, (__neon_int64x1_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, (__neon_int64x1_t) __cx.val); \ __rv.__i; \ }) #define vbsl_p8(__a, __b, __c) \ ({ \ + uint8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ + poly8x8_t __cx = __c; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, (__neon_int8x8_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, (__neon_int8x8_t) __cx.val); \ __rv.__i; \ }) #define vbsl_p16(__a, __b, __c) \ ({ \ + uint16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ + poly16x4_t __cx = __c; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, (__neon_int16x4_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, (__neon_int16x4_t) __cx.val); \ __rv.__i; \ }) #define vbslq_s8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ + int8x16_t __cx = __c; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv16qi ((__neon_int8x16_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv16qi ((__neon_int8x16_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbslq_s16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ + int16x8_t __cx = __c; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv8hi ((__neon_int16x8_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv8hi ((__neon_int16x8_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbslq_s32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ + int32x4_t __cx = __c; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv4si ((__neon_int32x4_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv4si ((__neon_int32x4_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbslq_s64(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ + int64x2_t __cx = __c; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv2di ((__neon_int64x2_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv2di ((__neon_int64x2_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbslq_f32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ + float32x4_t __cx = __c; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv4sf ((__neon_int32x4_t) __a.val, __b.val, __c.val); \ + __rv.__o = __builtin_neon_vbslv4sf ((__neon_int32x4_t) __ax.val, __bx.val, __cx.val); \ __rv.__i; \ }) #define vbslq_u8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ + uint8x16_t __cx = __c; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, (__neon_int8x16_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, (__neon_int8x16_t) __cx.val); \ __rv.__i; \ }) #define vbslq_u16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ + uint16x8_t __cx = __c; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (__neon_int16x8_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__neon_int16x8_t) __cx.val); \ __rv.__i; \ }) #define vbslq_u32(__a, __b, __c) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ + uint32x4_t __cx = __c; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, (__neon_int32x4_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, (__neon_int32x4_t) __cx.val); \ __rv.__i; \ }) #define vbslq_u64(__a, __b, __c) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ + uint64x2_t __cx = __c; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, (__neon_int64x2_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, (__neon_int64x2_t) __cx.val); \ __rv.__i; \ }) #define vbslq_p8(__a, __b, __c) \ ({ \ + uint8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ + poly8x16_t __cx = __c; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, (__neon_int8x16_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, (__neon_int8x16_t) __cx.val); \ __rv.__i; \ }) #define vbslq_p16(__a, __b, __c) \ ({ \ + uint16x8_t __ax = __a; \ + poly16x8_t __bx = __b; \ + poly16x8_t __cx = __c; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbslv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, (__neon_int16x8_t) __c.val); \ + __rv.__o = __builtin_neon_vbslv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, (__neon_int16x8_t) __cx.val); \ __rv.__i; \ }) #define vtrn_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv8qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv8qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrn_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv4hi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv4hi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrn_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv2si (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv2si (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrn_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv2sf (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv2sf (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrn_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtrn_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vtrn_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val); \ __rv.__i; \ }) #define vtrn_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vtrn_p16(__a, __b) \ ({ \ + poly16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vtrnq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv16qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv16qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrnq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv8hi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv8hi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrnq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv4si (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv4si (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrnq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv4sf (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vtrnv4sf (__ax.val, __bx.val); \ __rv.__i; \ }) #define vtrnq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val); \ __rv.__i; \ }) #define vtrnq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val); \ __rv.__i; \ }) #define vtrnq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val); \ __rv.__i; \ }) #define vtrnq_p8(__a, __b) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val); \ __rv.__i; \ }) #define vtrnq_p16(__a, __b) \ ({ \ + poly16x8_t __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vtrnv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val); \ + __rv.__o = __builtin_neon_vtrnv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val); \ __rv.__i; \ }) #define vzip_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv8qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv8qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzip_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv4hi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv4hi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzip_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv2si (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv2si (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzip_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv2sf (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv2sf (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzip_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vzip_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vzip_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val); \ __rv.__i; \ }) #define vzip_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vzip_p16(__a, __b) \ ({ \ + poly16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vzipq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv16qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv16qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzipq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv8hi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv8hi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzipq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv4si (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv4si (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzipq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv4sf (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vzipv4sf (__ax.val, __bx.val); \ __rv.__i; \ }) #define vzipq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val); \ __rv.__i; \ }) #define vzipq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val); \ __rv.__i; \ }) #define vzipq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val); \ __rv.__i; \ }) #define vzipq_p8(__a, __b) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val); \ __rv.__i; \ }) #define vzipq_p16(__a, __b) \ ({ \ + poly16x8_t __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vzipv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val); \ + __rv.__o = __builtin_neon_vzipv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val); \ __rv.__i; \ }) #define vuzp_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv8qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv8qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzp_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv4hi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv4hi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzp_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv2si (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv2si (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzp_f32(__a, __b) \ ({ \ + float32x2_t __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv2sf (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv2sf (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzp_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vuzp_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vuzp_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val); \ __rv.__i; \ }) #define vuzp_p8(__a, __b) \ ({ \ + poly8x8_t __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val); \ __rv.__i; \ }) #define vuzp_p16(__a, __b) \ ({ \ + poly16x4_t __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val); \ __rv.__i; \ }) #define vuzpq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv16qi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv16qi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzpq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv8hi (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv8hi (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzpq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv4si (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv4si (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzpq_f32(__a, __b) \ ({ \ + float32x4_t __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv4sf (__a.val, __b.val); \ + __rv.__o = __builtin_neon_vuzpv4sf (__ax.val, __bx.val); \ __rv.__i; \ }) #define vuzpq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val); \ __rv.__i; \ }) #define vuzpq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val); \ __rv.__i; \ }) #define vuzpq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val); \ __rv.__i; \ }) #define vuzpq_p8(__a, __b) \ ({ \ + poly8x16_t __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val); \ __rv.__i; \ }) #define vuzpq_p16(__a, __b) \ ({ \ + poly16x8_t __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vuzpv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val); \ + __rv.__o = __builtin_neon_vuzpv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val); \ __rv.__i; \ }) #define vld1_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v8qi (__a); \ + __rv.__o = __builtin_neon_vld1v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v4hi (__a); \ + __rv.__o = __builtin_neon_vld1v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v2si (__a); \ + __rv.__o = __builtin_neon_vld1v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v1di (__a); \ + __rv.__o = __builtin_neon_vld1v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld1_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v2sf (__a); \ + __rv.__o = __builtin_neon_vld1v2sf (__ax); \ __rv.__i; \ }) #define vld1_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld1v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld1v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld1_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1q_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v16qi (__a); \ + __rv.__o = __builtin_neon_vld1v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1q_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v8hi (__a); \ + __rv.__o = __builtin_neon_vld1v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1q_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v4si (__a); \ + __rv.__o = __builtin_neon_vld1v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1q_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v2di (__a); \ + __rv.__o = __builtin_neon_vld1v2di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld1q_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v4sf (__a); \ + __rv.__o = __builtin_neon_vld1v4sf (__ax); \ __rv.__i; \ }) #define vld1q_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1q_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1q_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v4si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld1v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1q_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v2di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld1v2di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld1q_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1q_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1_lane_s8(__a, __b, __c) \ ({ \ + const int8_t * __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev8qi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev4hi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev2si (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ + float32x2_t __bx = __b; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev2sf (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev2sf (__ax, __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_u8(__a, __b, __c) \ ({ \ + const uint8_t * __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev8qi ((const int8_t *) __a, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev4hi ((const int16_t *) __a, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev2si ((const int32_t *) __a, (__neon_int32x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), (__neon_int32x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_p8(__a, __b, __c) \ ({ \ + const poly8_t * __ax = __a; \ + poly8x8_t __bx = __b; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev8qi ((const int8_t *) __a, (__neon_int8x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ + poly16x4_t __bx = __b; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev4hi ((const int16_t *) __a, (__neon_int16x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_s64(__a, __b, __c) \ ({ \ + const int64_t * __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev1di (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev1di (__neon_ptr_cast(const __builtin_neon_di *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1_lane_u64(__a, __b, __c) \ ({ \ + const uint64_t * __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev1di ((const int64_t *) __a, (__neon_int64x1_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev1di (__neon_ptr_cast(const __builtin_neon_di *, __ax), (__neon_int64x1_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_s8(__a, __b, __c) \ ({ \ + const int8_t * __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev16qi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev8hi (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev4si (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ + float32x4_t __bx = __b; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev4sf (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev4sf (__ax, __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_u8(__a, __b, __c) \ ({ \ + const uint8_t * __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev16qi ((const int8_t *) __a, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev8hi ((const int16_t *) __a, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev4si ((const int32_t *) __a, (__neon_int32x4_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), (__neon_int32x4_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_p8(__a, __b, __c) \ ({ \ + const poly8_t * __ax = __a; \ + poly8x16_t __bx = __b; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev16qi ((const int8_t *) __a, (__neon_int8x16_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ + poly16x8_t __bx = __b; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev8hi ((const int16_t *) __a, (__neon_int16x8_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_s64(__a, __b, __c) \ ({ \ + const int64_t * __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev2di (__a, __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev2di (__neon_ptr_cast(const __builtin_neon_di *, __ax), __bx.val, __c); \ __rv.__i; \ }) #define vld1q_lane_u64(__a, __b, __c) \ ({ \ + const uint64_t * __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_lanev2di ((const int64_t *) __a, (__neon_int64x2_t) __b.val, __c); \ + __rv.__o = __builtin_neon_vld1_lanev2di (__neon_ptr_cast(const __builtin_neon_di *, __ax), (__neon_int64x2_t) __bx.val, __c); \ __rv.__i; \ }) #define vld1_dup_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv8qi (__a); \ + __rv.__o = __builtin_neon_vld1_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1_dup_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv4hi (__a); \ + __rv.__o = __builtin_neon_vld1_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1_dup_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv2si (__a); \ + __rv.__o = __builtin_neon_vld1_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1_dup_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv2sf (__a); \ + __rv.__o = __builtin_neon_vld1_dupv2sf (__ax); \ __rv.__i; \ }) #define vld1_dup_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1_dup_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1_dup_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1_dup_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1_dup_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1_dup_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv1di (__a); \ + __rv.__o = __builtin_neon_vld1_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld1_dup_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv16qi (__a); \ + __rv.__o = __builtin_neon_vld1_dupv16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv8hi (__a); \ + __rv.__o = __builtin_neon_vld1_dupv8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv4si (__a); \ + __rv.__o = __builtin_neon_vld1_dupv4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv4sf (__a); \ + __rv.__o = __builtin_neon_vld1_dupv4sf (__ax); \ __rv.__i; \ }) #define vld1q_dup_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv4si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv2di (__a); \ + __rv.__o = __builtin_neon_vld1_dupv2di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld1q_dup_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld1_dupv2di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld1_dupv2di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vst1_s8(__a, __b) \ - __builtin_neon_vst1v8qi (__a, __b.val) + ({ \ + int8_t * __ax = __a; \ + int8x8_t __bx = __b; \ + __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val); \ + }) #define vst1_s16(__a, __b) \ - __builtin_neon_vst1v4hi (__a, __b.val) + ({ \ + int16_t * __ax = __a; \ + int16x4_t __bx = __b; \ + __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val); \ + }) #define vst1_s32(__a, __b) \ - __builtin_neon_vst1v2si (__a, __b.val) + ({ \ + int32_t * __ax = __a; \ + int32x2_t __bx = __b; \ + __builtin_neon_vst1v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val); \ + }) #define vst1_s64(__a, __b) \ - __builtin_neon_vst1v1di (__a, __b.val) + ({ \ + int64_t * __ax = __a; \ + int64x1_t __bx = __b; \ + __builtin_neon_vst1v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val); \ + }) #define vst1_f32(__a, __b) \ - __builtin_neon_vst1v2sf (__a, __b.val) + ({ \ + float32_t * __ax = __a; \ + float32x2_t __bx = __b; \ + __builtin_neon_vst1v2sf (__ax, __bx.val); \ + }) #define vst1_u8(__a, __b) \ - __builtin_neon_vst1v8qi ((int8_t *) __a, (__neon_int8x8_t) __b.val) + ({ \ + uint8_t * __ax = __a; \ + uint8x8_t __bx = __b; \ + __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val); \ + }) #define vst1_u16(__a, __b) \ - __builtin_neon_vst1v4hi ((int16_t *) __a, (__neon_int16x4_t) __b.val) + ({ \ + uint16_t * __ax = __a; \ + uint16x4_t __bx = __b; \ + __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val); \ + }) #define vst1_u32(__a, __b) \ - __builtin_neon_vst1v2si ((int32_t *) __a, (__neon_int32x2_t) __b.val) + ({ \ + uint32_t * __ax = __a; \ + uint32x2_t __bx = __b; \ + __builtin_neon_vst1v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x2_t) __bx.val); \ + }) #define vst1_u64(__a, __b) \ - __builtin_neon_vst1v1di ((int64_t *) __a, (__neon_int64x1_t) __b.val) + ({ \ + uint64_t * __ax = __a; \ + uint64x1_t __bx = __b; \ + __builtin_neon_vst1v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x1_t) __bx.val); \ + }) #define vst1_p8(__a, __b) \ - __builtin_neon_vst1v8qi ((int8_t *) __a, (__neon_int8x8_t) __b.val) + ({ \ + poly8_t * __ax = __a; \ + poly8x8_t __bx = __b; \ + __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val); \ + }) #define vst1_p16(__a, __b) \ - __builtin_neon_vst1v4hi ((int16_t *) __a, (__neon_int16x4_t) __b.val) + ({ \ + poly16_t * __ax = __a; \ + poly16x4_t __bx = __b; \ + __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val); \ + }) #define vst1q_s8(__a, __b) \ - __builtin_neon_vst1v16qi (__a, __b.val) + ({ \ + int8_t * __ax = __a; \ + int8x16_t __bx = __b; \ + __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val); \ + }) #define vst1q_s16(__a, __b) \ - __builtin_neon_vst1v8hi (__a, __b.val) + ({ \ + int16_t * __ax = __a; \ + int16x8_t __bx = __b; \ + __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val); \ + }) #define vst1q_s32(__a, __b) \ - __builtin_neon_vst1v4si (__a, __b.val) + ({ \ + int32_t * __ax = __a; \ + int32x4_t __bx = __b; \ + __builtin_neon_vst1v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val); \ + }) #define vst1q_s64(__a, __b) \ - __builtin_neon_vst1v2di (__a, __b.val) + ({ \ + int64_t * __ax = __a; \ + int64x2_t __bx = __b; \ + __builtin_neon_vst1v2di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val); \ + }) #define vst1q_f32(__a, __b) \ - __builtin_neon_vst1v4sf (__a, __b.val) + ({ \ + float32_t * __ax = __a; \ + float32x4_t __bx = __b; \ + __builtin_neon_vst1v4sf (__ax, __bx.val); \ + }) #define vst1q_u8(__a, __b) \ - __builtin_neon_vst1v16qi ((int8_t *) __a, (__neon_int8x16_t) __b.val) + ({ \ + uint8_t * __ax = __a; \ + uint8x16_t __bx = __b; \ + __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val); \ + }) #define vst1q_u16(__a, __b) \ - __builtin_neon_vst1v8hi ((int16_t *) __a, (__neon_int16x8_t) __b.val) + ({ \ + uint16_t * __ax = __a; \ + uint16x8_t __bx = __b; \ + __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val); \ + }) #define vst1q_u32(__a, __b) \ - __builtin_neon_vst1v4si ((int32_t *) __a, (__neon_int32x4_t) __b.val) + ({ \ + uint32_t * __ax = __a; \ + uint32x4_t __bx = __b; \ + __builtin_neon_vst1v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x4_t) __bx.val); \ + }) #define vst1q_u64(__a, __b) \ - __builtin_neon_vst1v2di ((int64_t *) __a, (__neon_int64x2_t) __b.val) + ({ \ + uint64_t * __ax = __a; \ + uint64x2_t __bx = __b; \ + __builtin_neon_vst1v2di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x2_t) __bx.val); \ + }) #define vst1q_p8(__a, __b) \ - __builtin_neon_vst1v16qi ((int8_t *) __a, (__neon_int8x16_t) __b.val) + ({ \ + poly8_t * __ax = __a; \ + poly8x16_t __bx = __b; \ + __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val); \ + }) #define vst1q_p16(__a, __b) \ - __builtin_neon_vst1v8hi ((int16_t *) __a, (__neon_int16x8_t) __b.val) + ({ \ + poly16_t * __ax = __a; \ + poly16x8_t __bx = __b; \ + __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val); \ + }) #define vst1_lane_s8(__a, __b, __c) \ - __builtin_neon_vst1_lanev8qi (__a, __b.val, __c) + ({ \ + int8_t * __ax = __a; \ + int8x8_t __bx = __b; \ + __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val, __c); \ + }) #define vst1_lane_s16(__a, __b, __c) \ - __builtin_neon_vst1_lanev4hi (__a, __b.val, __c) + ({ \ + int16_t * __ax = __a; \ + int16x4_t __bx = __b; \ + __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val, __c); \ + }) #define vst1_lane_s32(__a, __b, __c) \ - __builtin_neon_vst1_lanev2si (__a, __b.val, __c) + ({ \ + int32_t * __ax = __a; \ + int32x2_t __bx = __b; \ + __builtin_neon_vst1_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val, __c); \ + }) #define vst1_lane_f32(__a, __b, __c) \ - __builtin_neon_vst1_lanev2sf (__a, __b.val, __c) + ({ \ + float32_t * __ax = __a; \ + float32x2_t __bx = __b; \ + __builtin_neon_vst1_lanev2sf (__ax, __bx.val, __c); \ + }) #define vst1_lane_u8(__a, __b, __c) \ - __builtin_neon_vst1_lanev8qi ((int8_t *) __a, (__neon_int8x8_t) __b.val, __c) + ({ \ + uint8_t * __ax = __a; \ + uint8x8_t __bx = __b; \ + __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val, __c); \ + }) #define vst1_lane_u16(__a, __b, __c) \ - __builtin_neon_vst1_lanev4hi ((int16_t *) __a, (__neon_int16x4_t) __b.val, __c) + ({ \ + uint16_t * __ax = __a; \ + uint16x4_t __bx = __b; \ + __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val, __c); \ + }) #define vst1_lane_u32(__a, __b, __c) \ - __builtin_neon_vst1_lanev2si ((int32_t *) __a, (__neon_int32x2_t) __b.val, __c) + ({ \ + uint32_t * __ax = __a; \ + uint32x2_t __bx = __b; \ + __builtin_neon_vst1_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x2_t) __bx.val, __c); \ + }) #define vst1_lane_p8(__a, __b, __c) \ - __builtin_neon_vst1_lanev8qi ((int8_t *) __a, (__neon_int8x8_t) __b.val, __c) + ({ \ + poly8_t * __ax = __a; \ + poly8x8_t __bx = __b; \ + __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x8_t) __bx.val, __c); \ + }) #define vst1_lane_p16(__a, __b, __c) \ - __builtin_neon_vst1_lanev4hi ((int16_t *) __a, (__neon_int16x4_t) __b.val, __c) + ({ \ + poly16_t * __ax = __a; \ + poly16x4_t __bx = __b; \ + __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x4_t) __bx.val, __c); \ + }) #define vst1_lane_s64(__a, __b, __c) \ - __builtin_neon_vst1_lanev1di (__a, __b.val, __c) + ({ \ + int64_t * __ax = __a; \ + int64x1_t __bx = __b; \ + __builtin_neon_vst1_lanev1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val, __c); \ + }) #define vst1_lane_u64(__a, __b, __c) \ - __builtin_neon_vst1_lanev1di ((int64_t *) __a, (__neon_int64x1_t) __b.val, __c) + ({ \ + uint64_t * __ax = __a; \ + uint64x1_t __bx = __b; \ + __builtin_neon_vst1_lanev1di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x1_t) __bx.val, __c); \ + }) #define vst1q_lane_s8(__a, __b, __c) \ - __builtin_neon_vst1_lanev16qi (__a, __b.val, __c) + ({ \ + int8_t * __ax = __a; \ + int8x16_t __bx = __b; \ + __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bx.val, __c); \ + }) #define vst1q_lane_s16(__a, __b, __c) \ - __builtin_neon_vst1_lanev8hi (__a, __b.val, __c) + ({ \ + int16_t * __ax = __a; \ + int16x8_t __bx = __b; \ + __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bx.val, __c); \ + }) #define vst1q_lane_s32(__a, __b, __c) \ - __builtin_neon_vst1_lanev4si (__a, __b.val, __c) + ({ \ + int32_t * __ax = __a; \ + int32x4_t __bx = __b; \ + __builtin_neon_vst1_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bx.val, __c); \ + }) #define vst1q_lane_f32(__a, __b, __c) \ - __builtin_neon_vst1_lanev4sf (__a, __b.val, __c) + ({ \ + float32_t * __ax = __a; \ + float32x4_t __bx = __b; \ + __builtin_neon_vst1_lanev4sf (__ax, __bx.val, __c); \ + }) #define vst1q_lane_u8(__a, __b, __c) \ - __builtin_neon_vst1_lanev16qi ((int8_t *) __a, (__neon_int8x16_t) __b.val, __c) + ({ \ + uint8_t * __ax = __a; \ + uint8x16_t __bx = __b; \ + __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val, __c); \ + }) #define vst1q_lane_u16(__a, __b, __c) \ - __builtin_neon_vst1_lanev8hi ((int16_t *) __a, (__neon_int16x8_t) __b.val, __c) + ({ \ + uint16_t * __ax = __a; \ + uint16x8_t __bx = __b; \ + __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val, __c); \ + }) #define vst1q_lane_u32(__a, __b, __c) \ - __builtin_neon_vst1_lanev4si ((int32_t *) __a, (__neon_int32x4_t) __b.val, __c) + ({ \ + uint32_t * __ax = __a; \ + uint32x4_t __bx = __b; \ + __builtin_neon_vst1_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), (__neon_int32x4_t) __bx.val, __c); \ + }) #define vst1q_lane_p8(__a, __b, __c) \ - __builtin_neon_vst1_lanev16qi ((int8_t *) __a, (__neon_int8x16_t) __b.val, __c) + ({ \ + poly8_t * __ax = __a; \ + poly8x16_t __bx = __b; \ + __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), (__neon_int8x16_t) __bx.val, __c); \ + }) #define vst1q_lane_p16(__a, __b, __c) \ - __builtin_neon_vst1_lanev8hi ((int16_t *) __a, (__neon_int16x8_t) __b.val, __c) + ({ \ + poly16_t * __ax = __a; \ + poly16x8_t __bx = __b; \ + __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), (__neon_int16x8_t) __bx.val, __c); \ + }) #define vst1q_lane_s64(__a, __b, __c) \ - __builtin_neon_vst1_lanev2di (__a, __b.val, __c) + ({ \ + int64_t * __ax = __a; \ + int64x2_t __bx = __b; \ + __builtin_neon_vst1_lanev2di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bx.val, __c); \ + }) #define vst1q_lane_u64(__a, __b, __c) \ - __builtin_neon_vst1_lanev2di ((int64_t *) __a, (__neon_int64x2_t) __b.val, __c) + ({ \ + uint64_t * __ax = __a; \ + uint64x2_t __bx = __b; \ + __builtin_neon_vst1_lanev2di (__neon_ptr_cast(__builtin_neon_di *, __ax), (__neon_int64x2_t) __bx.val, __c); \ + }) #define vld2_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v8qi (__a); \ + __rv.__o = __builtin_neon_vld2v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v4hi (__a); \ + __rv.__o = __builtin_neon_vld2v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v2si (__a); \ + __rv.__o = __builtin_neon_vld2v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld2_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v2sf (__a); \ + __rv.__o = __builtin_neon_vld2v2sf (__ax); \ __rv.__i; \ }) #define vld2_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld2v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld2v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld2v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld2_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld2v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld2v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v1di (__a); \ + __rv.__o = __builtin_neon_vld2v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld2_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld2v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld2q_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v16qi (__a); \ + __rv.__o = __builtin_neon_vld2v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2q_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v8hi (__a); \ + __rv.__o = __builtin_neon_vld2v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2q_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v4si (__a); \ + __rv.__o = __builtin_neon_vld2v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld2q_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v4sf (__a); \ + __rv.__o = __builtin_neon_vld2v4sf (__ax); \ __rv.__i; \ }) #define vld2q_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld2v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2q_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld2v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2q_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v4si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld2v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld2q_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld2v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2q_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld2v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2_lane_s8(__a, __b, __c) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev8qi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev4hi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev2si (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev2sf (__ax, __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_u8(__a, __b, __c) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev8qi ((const int8_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev4hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev2si ((const int32_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_p8(__a, __b, __c) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev8qi ((const int8_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev4hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2q_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev8hi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2q_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev4si (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2q_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev4sf (__ax, __bu.__o, __c); \ __rv.__i; \ }) #define vld2q_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev8hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2q_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev4si ((const int32_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2q_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_lanev8hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld2_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld2_dup_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv8qi (__a); \ + __rv.__o = __builtin_neon_vld2_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2_dup_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv4hi (__a); \ + __rv.__o = __builtin_neon_vld2_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2_dup_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv2si (__a); \ + __rv.__o = __builtin_neon_vld2_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld2_dup_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv2sf (__a); \ + __rv.__o = __builtin_neon_vld2_dupv2sf (__ax); \ __rv.__i; \ }) #define vld2_dup_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld2_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2_dup_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld2_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2_dup_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld2_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld2_dup_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld2_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld2_dup_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld2_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld2_dup_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv1di (__a); \ + __rv.__o = __builtin_neon_vld2_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld2_dup_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld2_dupv1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld2_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vst2_s8(__a, __b) \ ({ \ + int8_t * __ax = __a; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v8qi (__a, __bu.__o); \ + __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst2_s16(__a, __b) \ ({ \ + int16_t * __ax = __a; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v4hi (__a, __bu.__o); \ + __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst2_s32(__a, __b) \ ({ \ + int32_t * __ax = __a; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v2si (__a, __bu.__o); \ + __builtin_neon_vst2v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst2_f32(__a, __b) \ ({ \ + float32_t * __ax = __a; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v2sf (__a, __bu.__o); \ + __builtin_neon_vst2v2sf (__ax, __bu.__o); \ }) #define vst2_u8(__a, __b) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v8qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst2_u16(__a, __b) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v4hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst2_u32(__a, __b) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v2si ((int32_t *) __a, __bu.__o); \ + __builtin_neon_vst2v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst2_p8(__a, __b) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v8qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst2_p16(__a, __b) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v4hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst2_s64(__a, __b) \ ({ \ + int64_t * __ax = __a; \ union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v1di (__a, __bu.__o); \ + __builtin_neon_vst2v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \ }) #define vst2_u64(__a, __b) \ ({ \ + uint64_t * __ax = __a; \ union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v1di ((int64_t *) __a, __bu.__o); \ + __builtin_neon_vst2v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \ }) #define vst2q_s8(__a, __b) \ ({ \ + int8_t * __ax = __a; \ union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v16qi (__a, __bu.__o); \ + __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst2q_s16(__a, __b) \ ({ \ + int16_t * __ax = __a; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v8hi (__a, __bu.__o); \ + __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst2q_s32(__a, __b) \ ({ \ + int32_t * __ax = __a; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v4si (__a, __bu.__o); \ + __builtin_neon_vst2v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst2q_f32(__a, __b) \ ({ \ + float32_t * __ax = __a; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v4sf (__a, __bu.__o); \ + __builtin_neon_vst2v4sf (__ax, __bu.__o); \ }) #define vst2q_u8(__a, __b) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v16qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst2q_u16(__a, __b) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v8hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst2q_u32(__a, __b) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v4si ((int32_t *) __a, __bu.__o); \ + __builtin_neon_vst2v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst2q_p8(__a, __b) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v16qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst2q_p16(__a, __b) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2v8hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst2_lane_s8(__a, __b, __c) \ ({ \ + int8_t * __ax = __a; \ union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev8qi (__a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst2_lane_s16(__a, __b, __c) \ ({ \ + int16_t * __ax = __a; \ union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev4hi (__a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst2_lane_s32(__a, __b, __c) \ ({ \ + int32_t * __ax = __a; \ union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev2si (__a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst2_lane_f32(__a, __b, __c) \ ({ \ + float32_t * __ax = __a; \ union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev2sf (__ax, __bu.__o, __c); \ }) #define vst2_lane_u8(__a, __b, __c) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev8qi ((int8_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst2_lane_u16(__a, __b, __c) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev4hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst2_lane_u32(__a, __b, __c) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev2si ((int32_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst2_lane_p8(__a, __b, __c) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev8qi ((int8_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst2_lane_p16(__a, __b, __c) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev4hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst2q_lane_s16(__a, __b, __c) \ ({ \ + int16_t * __ax = __a; \ union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev8hi (__a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst2q_lane_s32(__a, __b, __c) \ ({ \ + int32_t * __ax = __a; \ union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev4si (__a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst2q_lane_f32(__a, __b, __c) \ ({ \ + float32_t * __ax = __a; \ union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev4sf (__ax, __bu.__o, __c); \ }) #define vst2q_lane_u16(__a, __b, __c) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev8hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst2q_lane_u32(__a, __b, __c) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev4si ((int32_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst2q_lane_p16(__a, __b, __c) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ - __builtin_neon_vst2_lanev8hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vld3_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v8qi (__a); \ + __rv.__o = __builtin_neon_vld3v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v4hi (__a); \ + __rv.__o = __builtin_neon_vld3v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v2si (__a); \ + __rv.__o = __builtin_neon_vld3v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld3_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v2sf (__a); \ + __rv.__o = __builtin_neon_vld3v2sf (__ax); \ __rv.__i; \ }) #define vld3_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld3v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld3v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld3v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld3_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld3v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld3v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v1di (__a); \ + __rv.__o = __builtin_neon_vld3v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld3_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld3v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld3q_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v16qi (__a); \ + __rv.__o = __builtin_neon_vld3v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3q_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v8hi (__a); \ + __rv.__o = __builtin_neon_vld3v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3q_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v4si (__a); \ + __rv.__o = __builtin_neon_vld3v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld3q_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v4sf (__a); \ + __rv.__o = __builtin_neon_vld3v4sf (__ax); \ __rv.__i; \ }) #define vld3q_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld3v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3q_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld3v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3q_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v4si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld3v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld3q_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld3v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3q_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld3v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3_lane_s8(__a, __b, __c) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev8qi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev4hi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev2si (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \ union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev2sf (__ax, __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_u8(__a, __b, __c) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev8qi ((const int8_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev4hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev2si ((const int32_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_p8(__a, __b, __c) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev8qi ((const int8_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev4hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3q_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev8hi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3q_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev4si (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3q_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \ union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev4sf (__ax, __bu.__o, __c); \ __rv.__i; \ }) #define vld3q_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev8hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3q_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev4si ((const int32_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3q_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_lanev8hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld3_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld3_dup_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv8qi (__a); \ + __rv.__o = __builtin_neon_vld3_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3_dup_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv4hi (__a); \ + __rv.__o = __builtin_neon_vld3_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3_dup_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv2si (__a); \ + __rv.__o = __builtin_neon_vld3_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld3_dup_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv2sf (__a); \ + __rv.__o = __builtin_neon_vld3_dupv2sf (__ax); \ __rv.__i; \ }) #define vld3_dup_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld3_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3_dup_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld3_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3_dup_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld3_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld3_dup_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld3_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld3_dup_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld3_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld3_dup_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv1di (__a); \ + __rv.__o = __builtin_neon_vld3_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld3_dup_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld3_dupv1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld3_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vst3_s8(__a, __b) \ ({ \ + int8_t * __ax = __a; \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v8qi (__a, __bu.__o); \ + __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst3_s16(__a, __b) \ ({ \ + int16_t * __ax = __a; \ union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v4hi (__a, __bu.__o); \ + __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst3_s32(__a, __b) \ ({ \ + int32_t * __ax = __a; \ union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v2si (__a, __bu.__o); \ + __builtin_neon_vst3v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst3_f32(__a, __b) \ ({ \ + float32_t * __ax = __a; \ union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v2sf (__a, __bu.__o); \ + __builtin_neon_vst3v2sf (__ax, __bu.__o); \ }) #define vst3_u8(__a, __b) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v8qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst3_u16(__a, __b) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v4hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst3_u32(__a, __b) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v2si ((int32_t *) __a, __bu.__o); \ + __builtin_neon_vst3v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst3_p8(__a, __b) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v8qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst3_p16(__a, __b) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v4hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst3_s64(__a, __b) \ ({ \ + int64_t * __ax = __a; \ union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v1di (__a, __bu.__o); \ + __builtin_neon_vst3v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \ }) #define vst3_u64(__a, __b) \ ({ \ + uint64_t * __ax = __a; \ union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v1di ((int64_t *) __a, __bu.__o); \ + __builtin_neon_vst3v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \ }) #define vst3q_s8(__a, __b) \ ({ \ + int8_t * __ax = __a; \ union { int8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v16qi (__a, __bu.__o); \ + __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst3q_s16(__a, __b) \ ({ \ + int16_t * __ax = __a; \ union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v8hi (__a, __bu.__o); \ + __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst3q_s32(__a, __b) \ ({ \ + int32_t * __ax = __a; \ union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v4si (__a, __bu.__o); \ + __builtin_neon_vst3v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst3q_f32(__a, __b) \ ({ \ + float32_t * __ax = __a; \ union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v4sf (__a, __bu.__o); \ + __builtin_neon_vst3v4sf (__ax, __bu.__o); \ }) #define vst3q_u8(__a, __b) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v16qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst3q_u16(__a, __b) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v8hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst3q_u32(__a, __b) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v4si ((int32_t *) __a, __bu.__o); \ + __builtin_neon_vst3v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst3q_p8(__a, __b) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v16qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst3q_p16(__a, __b) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3v8hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst3_lane_s8(__a, __b, __c) \ ({ \ + int8_t * __ax = __a; \ union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev8qi (__a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst3_lane_s16(__a, __b, __c) \ ({ \ + int16_t * __ax = __a; \ union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev4hi (__a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst3_lane_s32(__a, __b, __c) \ ({ \ + int32_t * __ax = __a; \ union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev2si (__a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst3_lane_f32(__a, __b, __c) \ ({ \ + float32_t * __ax = __a; \ union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev2sf (__ax, __bu.__o, __c); \ }) #define vst3_lane_u8(__a, __b, __c) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev8qi ((int8_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst3_lane_u16(__a, __b, __c) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev4hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst3_lane_u32(__a, __b, __c) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev2si ((int32_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst3_lane_p8(__a, __b, __c) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev8qi ((int8_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst3_lane_p16(__a, __b, __c) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev4hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst3q_lane_s16(__a, __b, __c) \ ({ \ + int16_t * __ax = __a; \ union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev8hi (__a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst3q_lane_s32(__a, __b, __c) \ ({ \ + int32_t * __ax = __a; \ union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev4si (__a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst3q_lane_f32(__a, __b, __c) \ ({ \ + float32_t * __ax = __a; \ union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev4sf (__ax, __bu.__o, __c); \ }) #define vst3q_lane_u16(__a, __b, __c) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev8hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst3q_lane_u32(__a, __b, __c) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev4si ((int32_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst3q_lane_p16(__a, __b, __c) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ - __builtin_neon_vst3_lanev8hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vld4_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v8qi (__a); \ + __rv.__o = __builtin_neon_vld4v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v4hi (__a); \ + __rv.__o = __builtin_neon_vld4v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v2si (__a); \ + __rv.__o = __builtin_neon_vld4v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld4_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v2sf (__a); \ + __rv.__o = __builtin_neon_vld4v2sf (__ax); \ __rv.__i; \ }) #define vld4_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld4v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld4v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld4v2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld4_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld4v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld4v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v1di (__a); \ + __rv.__o = __builtin_neon_vld4v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld4_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld4v1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld4q_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v16qi (__a); \ + __rv.__o = __builtin_neon_vld4v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4q_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v8hi (__a); \ + __rv.__o = __builtin_neon_vld4v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4q_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v4si (__a); \ + __rv.__o = __builtin_neon_vld4v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld4q_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v4sf (__a); \ + __rv.__o = __builtin_neon_vld4v4sf (__ax); \ __rv.__i; \ }) #define vld4q_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld4v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4q_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld4v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4q_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v4si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld4v4si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld4q_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v16qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld4v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4q_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4v8hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld4v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4_lane_s8(__a, __b, __c) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev8qi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev4hi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev2si (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \ union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev2sf (__ax, __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_u8(__a, __b, __c) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev8qi ((const int8_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev4hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev2si ((const int32_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_p8(__a, __b, __c) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev8qi ((const int8_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev4hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4q_lane_s16(__a, __b, __c) \ ({ \ + const int16_t * __ax = __a; \ union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev8hi (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4q_lane_s32(__a, __b, __c) \ ({ \ + const int32_t * __ax = __a; \ union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev4si (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4q_lane_f32(__a, __b, __c) \ ({ \ + const float32_t * __ax = __a; \ union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \ union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev4sf (__ax, __bu.__o, __c); \ __rv.__i; \ }) #define vld4q_lane_u16(__a, __b, __c) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev8hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4q_lane_u32(__a, __b, __c) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev4si ((const int32_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4q_lane_p16(__a, __b, __c) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_lanev8hi ((const int16_t *) __a, __bu.__o, __c); \ + __rv.__o = __builtin_neon_vld4_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax), __bu.__o, __c); \ __rv.__i; \ }) #define vld4_dup_s8(__a) \ ({ \ + const int8_t * __ax = __a; \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv8qi (__a); \ + __rv.__o = __builtin_neon_vld4_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4_dup_s16(__a) \ ({ \ + const int16_t * __ax = __a; \ union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv4hi (__a); \ + __rv.__o = __builtin_neon_vld4_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4_dup_s32(__a) \ ({ \ + const int32_t * __ax = __a; \ union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv2si (__a); \ + __rv.__o = __builtin_neon_vld4_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld4_dup_f32(__a) \ ({ \ + const float32_t * __ax = __a; \ union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv2sf (__a); \ + __rv.__o = __builtin_neon_vld4_dupv2sf (__ax); \ __rv.__i; \ }) #define vld4_dup_u8(__a) \ ({ \ + const uint8_t * __ax = __a; \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld4_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4_dup_u16(__a) \ ({ \ + const uint16_t * __ax = __a; \ union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld4_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4_dup_u32(__a) \ ({ \ + const uint32_t * __ax = __a; \ union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv2si ((const int32_t *) __a); \ + __rv.__o = __builtin_neon_vld4_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __ax)); \ __rv.__i; \ }) #define vld4_dup_p8(__a) \ ({ \ + const poly8_t * __ax = __a; \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv8qi ((const int8_t *) __a); \ + __rv.__o = __builtin_neon_vld4_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __ax)); \ __rv.__i; \ }) #define vld4_dup_p16(__a) \ ({ \ + const poly16_t * __ax = __a; \ union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv4hi ((const int16_t *) __a); \ + __rv.__o = __builtin_neon_vld4_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __ax)); \ __rv.__i; \ }) #define vld4_dup_s64(__a) \ ({ \ + const int64_t * __ax = __a; \ union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv1di (__a); \ + __rv.__o = __builtin_neon_vld4_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vld4_dup_u64(__a) \ ({ \ + const uint64_t * __ax = __a; \ union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vld4_dupv1di ((const int64_t *) __a); \ + __rv.__o = __builtin_neon_vld4_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __ax)); \ __rv.__i; \ }) #define vst4_s8(__a, __b) \ ({ \ + int8_t * __ax = __a; \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v8qi (__a, __bu.__o); \ + __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst4_s16(__a, __b) \ ({ \ + int16_t * __ax = __a; \ union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v4hi (__a, __bu.__o); \ + __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst4_s32(__a, __b) \ ({ \ + int32_t * __ax = __a; \ union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v2si (__a, __bu.__o); \ + __builtin_neon_vst4v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst4_f32(__a, __b) \ ({ \ + float32_t * __ax = __a; \ union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v2sf (__a, __bu.__o); \ + __builtin_neon_vst4v2sf (__ax, __bu.__o); \ }) #define vst4_u8(__a, __b) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v8qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst4_u16(__a, __b) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v4hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst4_u32(__a, __b) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v2si ((int32_t *) __a, __bu.__o); \ + __builtin_neon_vst4v2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst4_p8(__a, __b) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v8qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst4_p16(__a, __b) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v4hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst4_s64(__a, __b) \ ({ \ + int64_t * __ax = __a; \ union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v1di (__a, __bu.__o); \ + __builtin_neon_vst4v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \ }) #define vst4_u64(__a, __b) \ ({ \ + uint64_t * __ax = __a; \ union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v1di ((int64_t *) __a, __bu.__o); \ + __builtin_neon_vst4v1di (__neon_ptr_cast(__builtin_neon_di *, __ax), __bu.__o); \ }) #define vst4q_s8(__a, __b) \ ({ \ + int8_t * __ax = __a; \ union { int8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v16qi (__a, __bu.__o); \ + __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst4q_s16(__a, __b) \ ({ \ + int16_t * __ax = __a; \ union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v8hi (__a, __bu.__o); \ + __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst4q_s32(__a, __b) \ ({ \ + int32_t * __ax = __a; \ union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v4si (__a, __bu.__o); \ + __builtin_neon_vst4v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst4q_f32(__a, __b) \ ({ \ + float32_t * __ax = __a; \ union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v4sf (__a, __bu.__o); \ + __builtin_neon_vst4v4sf (__ax, __bu.__o); \ }) #define vst4q_u8(__a, __b) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v16qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst4q_u16(__a, __b) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v8hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst4q_u32(__a, __b) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v4si ((int32_t *) __a, __bu.__o); \ + __builtin_neon_vst4v4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o); \ }) #define vst4q_p8(__a, __b) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v16qi ((int8_t *) __a, __bu.__o); \ + __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o); \ }) #define vst4q_p16(__a, __b) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4v8hi ((int16_t *) __a, __bu.__o); \ + __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o); \ }) #define vst4_lane_s8(__a, __b, __c) \ ({ \ + int8_t * __ax = __a; \ union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev8qi (__a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst4_lane_s16(__a, __b, __c) \ ({ \ + int16_t * __ax = __a; \ union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev4hi (__a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst4_lane_s32(__a, __b, __c) \ ({ \ + int32_t * __ax = __a; \ union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev2si (__a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst4_lane_f32(__a, __b, __c) \ ({ \ + float32_t * __ax = __a; \ union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev2sf (__ax, __bu.__o, __c); \ }) #define vst4_lane_u8(__a, __b, __c) \ ({ \ + uint8_t * __ax = __a; \ union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev8qi ((int8_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst4_lane_u16(__a, __b, __c) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev4hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst4_lane_u32(__a, __b, __c) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev2si ((int32_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst4_lane_p8(__a, __b, __c) \ ({ \ + poly8_t * __ax = __a; \ union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev8qi ((int8_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __ax), __bu.__o, __c); \ }) #define vst4_lane_p16(__a, __b, __c) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev4hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst4q_lane_s16(__a, __b, __c) \ ({ \ + int16_t * __ax = __a; \ union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev8hi (__a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst4q_lane_s32(__a, __b, __c) \ ({ \ + int32_t * __ax = __a; \ union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev4si (__a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst4q_lane_f32(__a, __b, __c) \ ({ \ + float32_t * __ax = __a; \ union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev4sf (__ax, __bu.__o, __c); \ }) #define vst4q_lane_u16(__a, __b, __c) \ ({ \ + uint16_t * __ax = __a; \ union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev8hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vst4q_lane_u32(__a, __b, __c) \ ({ \ + uint32_t * __ax = __a; \ union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev4si ((int32_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __ax), __bu.__o, __c); \ }) #define vst4q_lane_p16(__a, __b, __c) \ ({ \ + poly16_t * __ax = __a; \ union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ - __builtin_neon_vst4_lanev8hi ((int16_t *) __a, __bu.__o, __c); \ + __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __ax), __bu.__o, __c); \ }) #define vand_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vand_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vand_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vand_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vand_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vand_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vand_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vand_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vandq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vandq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vandq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vandq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vandv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vandq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vandq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vandq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vandq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vandv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vandv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vorr_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorr_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorr_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorr_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorr_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vorr_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vorr_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vorr_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vorrq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorrq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorrq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorrq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vorrv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorrq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vorrq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vorrq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vorrq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vorrv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vorrv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define veor_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veor_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veor_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veor_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veor_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define veor_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define veor_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define veor_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define veorq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veorq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veorq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veorq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_veorv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define veorq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define veorq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define veorq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define veorq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_veorv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_veorv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vbic_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbic_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbic_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbic_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbic_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vbic_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vbic_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vbic_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vbicq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbicq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbicq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbicq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vbicv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vbicq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vbicq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vbicq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vbicq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vbicv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vbicv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vorn_s8(__a, __b) \ ({ \ + int8x8_t __ax = __a; \ + int8x8_t __bx = __b; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv8qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv8qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorn_s16(__a, __b) \ ({ \ + int16x4_t __ax = __a; \ + int16x4_t __bx = __b; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv4hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv4hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorn_s32(__a, __b) \ ({ \ + int32x2_t __ax = __a; \ + int32x2_t __bx = __b; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv2si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv2si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorn_s64(__a, __b) \ ({ \ + int64x1_t __ax = __a; \ + int64x1_t __bx = __b; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv1di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv1di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vorn_u8(__a, __b) \ ({ \ + uint8x8_t __ax = __a; \ + uint8x8_t __bx = __b; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv8qi ((__neon_int8x8_t) __a.val, (__neon_int8x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv8qi ((__neon_int8x8_t) __ax.val, (__neon_int8x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vorn_u16(__a, __b) \ ({ \ + uint16x4_t __ax = __a; \ + uint16x4_t __bx = __b; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv4hi ((__neon_int16x4_t) __a.val, (__neon_int16x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv4hi ((__neon_int16x4_t) __ax.val, (__neon_int16x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vorn_u32(__a, __b) \ ({ \ + uint32x2_t __ax = __a; \ + uint32x2_t __bx = __b; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv2si ((__neon_int32x2_t) __a.val, (__neon_int32x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv2si ((__neon_int32x2_t) __ax.val, (__neon_int32x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vorn_u64(__a, __b) \ ({ \ + uint64x1_t __ax = __a; \ + uint64x1_t __bx = __b; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv1di ((__neon_int64x1_t) __a.val, (__neon_int64x1_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv1di ((__neon_int64x1_t) __ax.val, (__neon_int64x1_t) __bx.val, 0); \ __rv.__i; \ }) #define vornq_s8(__a, __b) \ ({ \ + int8x16_t __ax = __a; \ + int8x16_t __bx = __b; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv16qi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv16qi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vornq_s16(__a, __b) \ ({ \ + int16x8_t __ax = __a; \ + int16x8_t __bx = __b; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv8hi (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv8hi (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vornq_s32(__a, __b) \ ({ \ + int32x4_t __ax = __a; \ + int32x4_t __bx = __b; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv4si (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv4si (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vornq_s64(__a, __b) \ ({ \ + int64x2_t __ax = __a; \ + int64x2_t __bx = __b; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv2di (__a.val, __b.val, 1); \ + __rv.__o = __builtin_neon_vornv2di (__ax.val, __bx.val, 1); \ __rv.__i; \ }) #define vornq_u8(__a, __b) \ ({ \ + uint8x16_t __ax = __a; \ + uint8x16_t __bx = __b; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv16qi ((__neon_int8x16_t) __a.val, (__neon_int8x16_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv16qi ((__neon_int8x16_t) __ax.val, (__neon_int8x16_t) __bx.val, 0); \ __rv.__i; \ }) #define vornq_u16(__a, __b) \ ({ \ + uint16x8_t __ax = __a; \ + uint16x8_t __bx = __b; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv8hi ((__neon_int16x8_t) __a.val, (__neon_int16x8_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv8hi ((__neon_int16x8_t) __ax.val, (__neon_int16x8_t) __bx.val, 0); \ __rv.__i; \ }) #define vornq_u32(__a, __b) \ ({ \ + uint32x4_t __ax = __a; \ + uint32x4_t __bx = __b; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv4si ((__neon_int32x4_t) __a.val, (__neon_int32x4_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv4si ((__neon_int32x4_t) __ax.val, (__neon_int32x4_t) __bx.val, 0); \ __rv.__i; \ }) #define vornq_u64(__a, __b) \ ({ \ + uint64x2_t __ax = __a; \ + uint64x2_t __bx = __b; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vornv2di ((__neon_int64x2_t) __a.val, (__neon_int64x2_t) __b.val, 0); \ + __rv.__o = __builtin_neon_vornv2di ((__neon_int64x2_t) __ax.val, (__neon_int64x2_t) __bx.val, 0); \ __rv.__i; \ }) #define vreinterpret_p8_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p8_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { poly8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p8_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { poly8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_p16_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { poly16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_p16_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { poly16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_f32_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { float32x2_t __i; __neon_float32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2sfv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2sfv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_f32_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { float32x4_t __i; __neon_float32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4sfv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4sfv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s64_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { int64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s64_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { int64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u64_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { uint64x1_t __i; __neon_int64x1_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv1div4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u64_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { uint64x2_t __i; __neon_int64x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2div8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s8_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { int8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s8_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { int8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s16_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { int16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s16_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { int16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_s32_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { int32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_s32_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { int32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u8_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { uint8x8_t __i; __neon_int8x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8qiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u8_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { uint8x16_t __i; __neon_int8x16_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv16qiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_u32(__a) \ ({ \ + uint32x2_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv2si ((__neon_int32x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv2si ((__neon_int32x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u16_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { uint16x4_t __i; __neon_int16x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4hiv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_u32(__a) \ ({ \ + uint32x4_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv4si ((__neon_int32x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv4si ((__neon_int32x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u16_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { uint16x8_t __i; __neon_int16x8_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv8hiv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_s8(__a) \ ({ \ + int8x8_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv8qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv8qi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_s16(__a) \ ({ \ + int16x4_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv4hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv4hi (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_s32(__a) \ ({ \ + int32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv2si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv2si (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_s64(__a) \ ({ \ + int64x1_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv1di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv1di (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_f32(__a) \ ({ \ + float32x2_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv2sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv2sf (__ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_u8(__a) \ ({ \ + uint8x8_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_u16(__a) \ ({ \ + uint16x4_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_u64(__a) \ ({ \ + uint64x1_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv1di ((__neon_int64x1_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv1di ((__neon_int64x1_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_p8(__a) \ ({ \ + poly8x8_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv8qi ((__neon_int8x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpret_u32_p16(__a) \ ({ \ + poly16x4_t __ax = __a; \ union { uint32x2_t __i; __neon_int32x2_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv2siv4hi ((__neon_int16x4_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_s8(__a) \ ({ \ + int8x16_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv16qi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv16qi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_s16(__a) \ ({ \ + int16x8_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv8hi (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv8hi (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_s32(__a) \ ({ \ + int32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv4si (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv4si (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_s64(__a) \ ({ \ + int64x2_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv2di (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv2di (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_f32(__a) \ ({ \ + float32x4_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv4sf (__a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv4sf (__ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_u8(__a) \ ({ \ + uint8x16_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_u16(__a) \ ({ \ + uint16x8_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_u64(__a) \ ({ \ + uint64x2_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv2di ((__neon_int64x2_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv2di ((__neon_int64x2_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_p8(__a) \ ({ \ + poly8x16_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv16qi ((__neon_int8x16_t) __ax.val); \ __rv.__i; \ }) #define vreinterpretq_u32_p16(__a) \ ({ \ + poly16x8_t __ax = __a; \ union { uint32x4_t __i; __neon_int32x4_t __o; } __rv; \ - __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __a.val); \ + __rv.__o = __builtin_neon_vreinterpretv4siv8hi ((__neon_int16x8_t) __ax.val); \ __rv.__i; \ }) Modified: llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml?rev=83387&r1=83386&r2=83387&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml Tue Oct 6 11:45:06 2009 @@ -72,19 +72,6 @@ amounts, etc.) can be checked for validity. GCC can check them after inlining, but LLVM does inlining separately. - This is not ideal for error messages. In the simple cases, llvm-gcc will - use the GCC builtin names instead of the user-visible ARM intrinsic names. - In cases where the macros use unions to convert argument types, the error - messages may not show any context information at all. - - The problems with error messages could be avoided if the compiler - recognized the intrinsics directly, but that is not trivial. The - user-visible intrinsics need to use the types defined by ARM that - distinguish the vector element signedness, whereas the LLVM intrinsics do - not care about signedness and also use different struct types (multiple - fields instead of arrays) that match the capabilities of tablegen-defined - intrinsics. - Some macros translate to simple intrinsic calls and should not end with semicolons, but for others, which use GCC's statement-expressions to include unions that convert argument and/or return types, the semicolons @@ -133,14 +120,15 @@ | T_uint32x4 -> T_int32x4 | T_uint64x1 -> T_int64x1 | T_uint64x2 -> T_int64x2 - (* LLVM LOCAL begin use standard type names *) - | T_uint8 -> T_int8 - | T_uint16 -> T_int16 - | T_uint32 -> T_int32 - | T_uint64 -> T_int64 - | T_poly8 -> T_int8 - | T_poly16 -> T_int16 - (* LLVM LOCAL end use standard type names *) + (* Cast to types defined by mode in arm.c, not random types pulled in from + the header in use. This fixes incompatible pointer errors when + compiling with C++. *) + | T_uint8 | T_int8 -> T_intQI + | T_uint16 | T_int16 -> T_intHI + | T_uint32 | T_int32 -> T_intSI + | T_uint64 | T_int64 -> T_intDI + | T_poly8 -> T_intQI + | T_poly16 -> T_intHI | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) | T_ptrto elt -> T_ptrto (signed_ctype elt) | T_const elt -> T_const (signed_ctype elt) @@ -165,7 +153,11 @@ let add_cast_with_prefix ctype cval stype_prefix = let stype = signed_ctype ctype in if ctype <> stype then - Printf.sprintf "(%s%s) %s" stype_prefix (string_of_vectype stype) cval + match stype with + T_ptrto elt -> + Printf.sprintf "__neon_ptr_cast(%s%s, %s)" stype_prefix (string_of_vectype stype) cval + | _ -> + Printf.sprintf "(%s%s) %s" stype_prefix (string_of_vectype stype) cval else cval @@ -237,9 +229,17 @@ | T_float32x2 | T_float32x4 | T_poly8x8 | T_poly8x16 | T_poly16x4 | T_poly16x8 -> - add_cast_with_prefix t (p ^ ".val") "__neon_" + let decl = Printf.sprintf "%s %s = %s" + (string_of_vectype t) (p ^ "x") p in + pdecls := decl :: !pdecls; + add_cast_with_prefix t (p ^ "x.val") "__neon_" + | T_immediate (lo, hi) -> p + | _ -> + let decl = Printf.sprintf "%s %s = %s" + (string_of_vectype t) (p ^ "x") p in + pdecls := decl :: !pdecls; + add_cast t (p ^ "x") in (* LLVM LOCAL end Extract vector operand from wrapper struct. *) - | _ -> add_cast t p in let plist = match ps with Arity0 _ -> [] | Arity1 (_, t1) -> [ptype t1 "__a"] @@ -353,50 +353,51 @@ FIXME: It's probably better to use stdint.h names here. *) -(* LLVM LOCAL begin Use stdint.h types for elements and add wrapper structs. *) let deftypes () = - (* Extra types not in . *) - Format.printf "typedef float float32_t;\n"; - Format.printf "typedef signed char poly8_t;\n"; - Format.printf "typedef signed short poly16_t;\n"; - Format.print_newline (); let typeinfo = [ (* Doubleword vector types. *) - "int8_t ", "int", 8, 8; - "int16_t", "int", 16, 4; - "int32_t", "int", 32, 2; - "int64_t", "int", 64, 1; - "float32_t", "float", 32, 2; - "poly8_t", "poly", 8, 8; - "poly16_t", "poly", 16, 4; - "uint8_t", "uint", 8, 8; - "uint16_t", "uint", 16, 4; - "uint32_t", "uint", 32, 2; - "uint64_t", "uint", 64, 1; + "__builtin_neon_qi", "int", 8, 8; + "__builtin_neon_hi", "int", 16, 4; + "__builtin_neon_si", "int", 32, 2; + "__builtin_neon_di", "int", 64, 1; + "__builtin_neon_sf", "float", 32, 2; + "__builtin_neon_poly8", "poly", 8, 8; + "__builtin_neon_poly16", "poly", 16, 4; + "__builtin_neon_uqi", "uint", 8, 8; + "__builtin_neon_uhi", "uint", 16, 4; + "__builtin_neon_usi", "uint", 32, 2; + "__builtin_neon_udi", "uint", 64, 1; (* Quadword vector types. *) - "int8_t", "int", 8, 16; - "int16_t", "int", 16, 8; - "int32_t", "int", 32, 4; - "int64_t", "int", 64, 2; - "float32_t", "float", 32, 4; - "poly8_t", "poly", 8, 16; - "poly16_t", "poly", 16, 8; - "uint8_t", "uint", 8, 16; - "uint16_t", "uint", 16, 8; - "uint32_t", "uint", 32, 4; - "uint64_t", "uint", 64, 2 + "__builtin_neon_qi", "int", 8, 16; + "__builtin_neon_hi", "int", 16, 8; + "__builtin_neon_si", "int", 32, 4; + "__builtin_neon_di", "int", 64, 2; + "__builtin_neon_sf", "float", 32, 4; + "__builtin_neon_poly8", "poly", 8, 16; + "__builtin_neon_poly16", "poly", 16, 8; + "__builtin_neon_uqi", "uint", 8, 16; + "__builtin_neon_uhi", "uint", 16, 8; + "__builtin_neon_usi", "uint", 32, 4; + "__builtin_neon_udi", "uint", 64, 2 ] in List.iter (fun (cbase, abase, esize, enum) -> let attr = match enum with (* LLVM LOCAL no special case for enum == 1 so int64x1_t is a vector *) - _ -> Printf.sprintf " \t__attribute__ ((__vector_size__ (%d)))" + _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))" (esize * enum / 8) in + (* LLVM LOCAL Add "__neon_" prefix. *) Format.printf "typedef %s __neon_%s%dx%d_t%s;@\n" cbase abase esize enum attr) typeinfo; Format.print_newline (); + (* Extra types not in . *) + Format.printf "typedef __builtin_neon_sf float32_t;\n"; + Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; + Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" +(* LLVM LOCAL begin Define containerized vector types. *) + ; List.iter (fun (cbase, abase, esize, enum) -> let typename = @@ -411,7 +412,7 @@ Format.printf " %s;" typename; end_function sfmt) typeinfo -(* LLVM LOCAL end Use stdint.h types for elements and add wrapper structs. *) +(* LLVM LOCAL end Define containerized vector types. *) (* Output structs containing arrays, for load & store instructions etc. *) @@ -491,6 +492,11 @@ ""; "#ifdef __cplusplus"; "extern \"C\" {"; +(* LLVM LOCAL begin Use reinterpret_cast for pointers in C++ *) +"#define __neon_ptr_cast(ty, ptr) reinterpret_cast(ptr)"; +"#else"; +"#define __neon_ptr_cast(ty, ptr) (ty)(ptr)"; +(* LLVM LOCAL end Use reinterpret_cast for pointers in C++ *) "#endif"; ""; "#include "; From sabre at nondot.org Tue Oct 6 11:59:46 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 06 Oct 2009 16:59:46 -0000 Subject: [llvm-commits] [llvm] r83390 - in /llvm/trunk: include/llvm/LinkAllPasses.h include/llvm/Transforms/Scalar.h lib/Transforms/Scalar/PredicateSimplifier.cpp test/Transforms/PredicateSimplifier/ Message-ID: <200910061659.n96GxkZt004157@zion.cs.uiuc.edu> Author: lattner Date: Tue Oct 6 11:59:46 2009 New Revision: 83390 URL: http://llvm.org/viewvc/llvm-project?rev=83390&view=rev Log: remove predicate simplifier, it never got the last bugs beaten out of it, and jump threading, condprop and gvn are now getting most of the benefit. This was approved by Nicholas and Nicolas. Removed: llvm/trunk/lib/Transforms/Scalar/PredicateSimplifier.cpp llvm/trunk/test/Transforms/PredicateSimplifier/ Modified: llvm/trunk/include/llvm/LinkAllPasses.h llvm/trunk/include/llvm/Transforms/Scalar.h Modified: llvm/trunk/include/llvm/LinkAllPasses.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LinkAllPasses.h?rev=83390&r1=83389&r2=83390&view=diff ============================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h (original) +++ llvm/trunk/include/llvm/LinkAllPasses.h Tue Oct 6 11:59:46 2009 @@ -118,7 +118,6 @@ (void) llvm::createRSProfilingPass(); (void) llvm::createIndMemRemPass(); (void) llvm::createInstCountPass(); - (void) llvm::createPredicateSimplifierPass(); (void) llvm::createCodeGenLICMPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createGVNPass(); Modified: llvm/trunk/include/llvm/Transforms/Scalar.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=83390&r1=83389&r2=83390&view=diff ============================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h (original) +++ llvm/trunk/include/llvm/Transforms/Scalar.h Tue Oct 6 11:59:46 2009 @@ -278,13 +278,6 @@ //===----------------------------------------------------------------------===// // -// PredicateSimplifier - This pass collapses duplicate variables into one -// canonical form, and tries to simplify expressions along the way. -// -FunctionPass *createPredicateSimplifierPass(); - -//===----------------------------------------------------------------------===// -// // GVN - This pass performs global value numbering and redundant load // elimination cotemporaneously. // Removed: llvm/trunk/lib/Transforms/Scalar/PredicateSimplifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/PredicateSimplifier.cpp?rev=83389&view=auto ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/PredicateSimplifier.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/PredicateSimplifier.cpp (removed) @@ -1,2704 +0,0 @@ -//===-- PredicateSimplifier.cpp - Path Sensitive Simplifier ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Path-sensitive optimizer. In a branch where x == y, replace uses of -// x with y. Permits further optimization, such as the elimination of -// the unreachable call: -// -// void test(int *p, int *q) -// { -// if (p != q) -// return; -// -// if (*p != *q) -// foo(); // unreachable -// } -// -//===----------------------------------------------------------------------===// -// -// The InequalityGraph focusses on four properties; equals, not equals, -// less-than and less-than-or-equals-to. The greater-than forms are also held -// just to allow walking from a lesser node to a greater one. These properties -// are stored in a lattice; LE can become LT or EQ, NE can become LT or GT. -// -// These relationships define a graph between values of the same type. Each -// Value is stored in a map table that retrieves the associated Node. This -// is how EQ relationships are stored; the map contains pointers from equal -// Value to the same node. The node contains a most canonical Value* form -// and the list of known relationships with other nodes. -// -// If two nodes are known to be inequal, then they will contain pointers to -// each other with an "NE" relationship. If node getNode(%x) is less than -// getNode(%y), then the %x node will contain <%y, GT> and %y will contain -// <%x, LT>. This allows us to tie nodes together into a graph like this: -// -// %a < %b < %c < %d -// -// with four nodes representing the properties. The InequalityGraph provides -// querying with "isRelatedBy" and mutators "addEquality" and "addInequality". -// To find a relationship, we start with one of the nodes any binary search -// through its list to find where the relationships with the second node start. -// Then we iterate through those to find the first relationship that dominates -// our context node. -// -// To create these properties, we wait until a branch or switch instruction -// implies that a particular value is true (or false). The VRPSolver is -// responsible for analyzing the variable and seeing what new inferences -// can be made from each property. For example: -// -// %P = icmp ne i32* %ptr, null -// %a = and i1 %P, %Q -// br i1 %a label %cond_true, label %cond_false -// -// For the true branch, the VRPSolver will start with %a EQ true and look at -// the definition of %a and find that it can infer that %P and %Q are both -// true. From %P being true, it can infer that %ptr NE null. For the false -// branch it can't infer anything from the "and" instruction. -// -// Besides branches, we can also infer properties from instruction that may -// have undefined behaviour in certain cases. For example, the dividend of -// a division may never be zero. After the division instruction, we may assume -// that the dividend is not equal to zero. -// -//===----------------------------------------------------------------------===// -// -// The ValueRanges class stores the known integer bounds of a Value. When we -// encounter i8 %a u< %b, the ValueRanges stores that %a = [1, 255] and -// %b = [0, 254]. -// -// It never stores an empty range, because that means that the code is -// unreachable. It never stores a single-element range since that's an equality -// relationship and better stored in the InequalityGraph, nor an empty range -// since that is better stored in UnreachableBlocks. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "predsimplify" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/ConstantRange.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/Local.h" -#include -#include -#include -using namespace llvm; - -STATISTIC(NumVarsReplaced, "Number of argument substitutions"); -STATISTIC(NumInstruction , "Number of instructions removed"); -STATISTIC(NumSimple , "Number of simple replacements"); -STATISTIC(NumBlocks , "Number of blocks marked unreachable"); -STATISTIC(NumSnuggle , "Number of comparisons snuggled"); - -static const ConstantRange empty(1, false); - -namespace { - class DomTreeDFS { - public: - class Node { - friend class DomTreeDFS; - public: - typedef std::vector::iterator iterator; - typedef std::vector::const_iterator const_iterator; - - unsigned getDFSNumIn() const { return DFSin; } - unsigned getDFSNumOut() const { return DFSout; } - - BasicBlock *getBlock() const { return BB; } - - iterator begin() { return Children.begin(); } - iterator end() { return Children.end(); } - - const_iterator begin() const { return Children.begin(); } - const_iterator end() const { return Children.end(); } - - bool dominates(const Node *N) const { - return DFSin <= N->DFSin && DFSout >= N->DFSout; - } - - bool DominatedBy(const Node *N) const { - return N->dominates(this); - } - - /// Sorts by the number of descendants. With this, you can iterate - /// through a sorted list and the first matching entry is the most - /// specific match for your basic block. The order provided is stable; - /// DomTreeDFS::Nodes with the same number of descendants are sorted by - /// DFS in number. - bool operator<(const Node &N) const { - unsigned spread = DFSout - DFSin; - unsigned N_spread = N.DFSout - N.DFSin; - if (spread == N_spread) return DFSin < N.DFSin; - return spread < N_spread; - } - bool operator>(const Node &N) const { return N < *this; } - - private: - unsigned DFSin, DFSout; - BasicBlock *BB; - - std::vector Children; - }; - - // XXX: this may be slow. Instead of using "new" for each node, consider - // putting them in a vector to keep them contiguous. - explicit DomTreeDFS(DominatorTree *DT) { - std::stack > S; - - Entry = new Node; - Entry->BB = DT->getRootNode()->getBlock(); - S.push(std::make_pair(Entry, DT->getRootNode())); - - NodeMap[Entry->BB] = Entry; - - while (!S.empty()) { - std::pair &Pair = S.top(); - Node *N = Pair.first; - DomTreeNode *DTNode = Pair.second; - S.pop(); - - for (DomTreeNode::iterator I = DTNode->begin(), E = DTNode->end(); - I != E; ++I) { - Node *NewNode = new Node; - NewNode->BB = (*I)->getBlock(); - N->Children.push_back(NewNode); - S.push(std::make_pair(NewNode, *I)); - - NodeMap[NewNode->BB] = NewNode; - } - } - - renumber(); - -#ifndef NDEBUG - DEBUG(dump()); -#endif - } - -#ifndef NDEBUG - virtual -#endif - ~DomTreeDFS() { - std::stack S; - - S.push(Entry); - while (!S.empty()) { - Node *N = S.top(); S.pop(); - - for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) - S.push(*I); - - delete N; - } - } - - /// getRootNode - This returns the entry node for the CFG of the function. - Node *getRootNode() const { return Entry; } - - /// getNodeForBlock - return the node for the specified basic block. - Node *getNodeForBlock(BasicBlock *BB) const { - if (!NodeMap.count(BB)) return 0; - return const_cast(this)->NodeMap[BB]; - } - - /// dominates - returns true if the basic block for I1 dominates that of - /// the basic block for I2. If the instructions belong to the same basic - /// block, the instruction first instruction sequentially in the block is - /// considered dominating. - bool dominates(Instruction *I1, Instruction *I2) { - BasicBlock *BB1 = I1->getParent(), - *BB2 = I2->getParent(); - if (BB1 == BB2) { - if (isa(I1)) return false; - if (isa(I2)) return true; - if ( isa(I1) && !isa(I2)) return true; - if (!isa(I1) && isa(I2)) return false; - - for (BasicBlock::const_iterator I = BB2->begin(), E = BB2->end(); - I != E; ++I) { - if (&*I == I1) return true; - else if (&*I == I2) return false; - } - assert(!"Instructions not found in parent BasicBlock?"); - } else { - Node *Node1 = getNodeForBlock(BB1), - *Node2 = getNodeForBlock(BB2); - return Node1 && Node2 && Node1->dominates(Node2); - } - return false; // Not reached - } - - private: - /// renumber - calculates the depth first search numberings and applies - /// them onto the nodes. - void renumber() { - std::stack > S; - unsigned n = 0; - - Entry->DFSin = ++n; - S.push(std::make_pair(Entry, Entry->begin())); - - while (!S.empty()) { - std::pair &Pair = S.top(); - Node *N = Pair.first; - Node::iterator &I = Pair.second; - - if (I == N->end()) { - N->DFSout = ++n; - S.pop(); - } else { - Node *Next = *I++; - Next->DFSin = ++n; - S.push(std::make_pair(Next, Next->begin())); - } - } - } - -#ifndef NDEBUG - virtual void dump() const { - dump(errs()); - } - - void dump(raw_ostream &os) const { - os << "Predicate simplifier DomTreeDFS: \n"; - dump(Entry, 0, os); - os << "\n\n"; - } - - void dump(Node *N, int depth, raw_ostream &os) const { - ++depth; - for (int i = 0; i < depth; ++i) { os << " "; } - os << "[" << depth << "] "; - - os << N->getBlock()->getNameStr() << " (" << N->getDFSNumIn() - << ", " << N->getDFSNumOut() << ")\n"; - - for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) - dump(*I, depth, os); - } -#endif - - Node *Entry; - std::map NodeMap; - }; - - // SLT SGT ULT UGT EQ - // 0 1 0 1 0 -- GT 10 - // 0 1 0 1 1 -- GE 11 - // 0 1 1 0 0 -- SGTULT 12 - // 0 1 1 0 1 -- SGEULE 13 - // 0 1 1 1 0 -- SGT 14 - // 0 1 1 1 1 -- SGE 15 - // 1 0 0 1 0 -- SLTUGT 18 - // 1 0 0 1 1 -- SLEUGE 19 - // 1 0 1 0 0 -- LT 20 - // 1 0 1 0 1 -- LE 21 - // 1 0 1 1 0 -- SLT 22 - // 1 0 1 1 1 -- SLE 23 - // 1 1 0 1 0 -- UGT 26 - // 1 1 0 1 1 -- UGE 27 - // 1 1 1 0 0 -- ULT 28 - // 1 1 1 0 1 -- ULE 29 - // 1 1 1 1 0 -- NE 30 - enum LatticeBits { - EQ_BIT = 1, UGT_BIT = 2, ULT_BIT = 4, SGT_BIT = 8, SLT_BIT = 16 - }; - enum LatticeVal { - GT = SGT_BIT | UGT_BIT, - GE = GT | EQ_BIT, - LT = SLT_BIT | ULT_BIT, - LE = LT | EQ_BIT, - NE = SLT_BIT | SGT_BIT | ULT_BIT | UGT_BIT, - SGTULT = SGT_BIT | ULT_BIT, - SGEULE = SGTULT | EQ_BIT, - SLTUGT = SLT_BIT | UGT_BIT, - SLEUGE = SLTUGT | EQ_BIT, - ULT = SLT_BIT | SGT_BIT | ULT_BIT, - UGT = SLT_BIT | SGT_BIT | UGT_BIT, - SLT = SLT_BIT | ULT_BIT | UGT_BIT, - SGT = SGT_BIT | ULT_BIT | UGT_BIT, - SLE = SLT | EQ_BIT, - SGE = SGT | EQ_BIT, - ULE = ULT | EQ_BIT, - UGE = UGT | EQ_BIT - }; - -#ifndef NDEBUG - /// validPredicate - determines whether a given value is actually a lattice - /// value. Only used in assertions or debugging. - static bool validPredicate(LatticeVal LV) { - switch (LV) { - case GT: case GE: case LT: case LE: case NE: - case SGTULT: case SGT: case SGEULE: - case SLTUGT: case SLT: case SLEUGE: - case ULT: case UGT: - case SLE: case SGE: case ULE: case UGE: - return true; - default: - return false; - } - } -#endif - - /// reversePredicate - reverse the direction of the inequality - static LatticeVal reversePredicate(LatticeVal LV) { - unsigned reverse = LV ^ (SLT_BIT|SGT_BIT|ULT_BIT|UGT_BIT); //preserve EQ_BIT - - if ((reverse & (SLT_BIT|SGT_BIT)) == 0) - reverse |= (SLT_BIT|SGT_BIT); - - if ((reverse & (ULT_BIT|UGT_BIT)) == 0) - reverse |= (ULT_BIT|UGT_BIT); - - LatticeVal Rev = static_cast(reverse); - assert(validPredicate(Rev) && "Failed reversing predicate."); - return Rev; - } - - /// ValueNumbering stores the scope-specific value numbers for a given Value. - class ValueNumbering { - - /// VNPair is a tuple of {Value, index number, DomTreeDFS::Node}. It - /// includes the comparison operators necessary to allow you to store it - /// in a sorted vector. - class VNPair { - public: - Value *V; - unsigned index; - DomTreeDFS::Node *Subtree; - - VNPair(Value *V, unsigned index, DomTreeDFS::Node *Subtree) - : V(V), index(index), Subtree(Subtree) {} - - bool operator==(const VNPair &RHS) const { - return V == RHS.V && Subtree == RHS.Subtree; - } - - bool operator<(const VNPair &RHS) const { - if (V != RHS.V) return V < RHS.V; - return *Subtree < *RHS.Subtree; - } - - bool operator<(Value *RHS) const { - return V < RHS; - } - - bool operator>(Value *RHS) const { - return V > RHS; - } - - friend bool operator<(Value *RHS, const VNPair &pair) { - return pair.operator>(RHS); - } - }; - - typedef std::vector VNMapType; - VNMapType VNMap; - - /// The canonical choice for value number at index. - std::vector Values; - - DomTreeDFS *DTDFS; - - public: -#ifndef NDEBUG - virtual ~ValueNumbering() {} - virtual void dump() { - print(errs()); - } - - void print(raw_ostream &os) { - for (unsigned i = 1; i <= Values.size(); ++i) { - os << i << " = "; - WriteAsOperand(os, Values[i-1]); - os << " {"; - for (unsigned j = 0; j < VNMap.size(); ++j) { - if (VNMap[j].index == i) { - WriteAsOperand(os, VNMap[j].V); - os << " (" << VNMap[j].Subtree->getDFSNumIn() << ") "; - } - } - os << "}\n"; - } - } -#endif - - /// compare - returns true if V1 is a better canonical value than V2. - bool compare(Value *V1, Value *V2) const { - if (isa(V1)) - return !isa(V2); - else if (isa(V2)) - return false; - else if (isa(V1)) - return !isa(V2); - else if (isa(V2)) - return false; - - Instruction *I1 = dyn_cast(V1); - Instruction *I2 = dyn_cast(V2); - - if (!I1 || !I2) - return V1->getNumUses() < V2->getNumUses(); - - return DTDFS->dominates(I1, I2); - } - - ValueNumbering(DomTreeDFS *DTDFS) : DTDFS(DTDFS) {} - - /// valueNumber - finds the value number for V under the Subtree. If - /// there is no value number, returns zero. - unsigned valueNumber(Value *V, DomTreeDFS::Node *Subtree) { - if (!(isa(V) || isa(V) || isa(V)) || - V->getType() == Type::getVoidTy(V->getContext())) return 0; - - VNMapType::iterator E = VNMap.end(); - VNPair pair(V, 0, Subtree); - VNMapType::iterator I = std::lower_bound(VNMap.begin(), E, pair); - while (I != E && I->V == V) { - if (I->Subtree->dominates(Subtree)) - return I->index; - ++I; - } - return 0; - } - - /// getOrInsertVN - always returns a value number, creating it if necessary. - unsigned getOrInsertVN(Value *V, DomTreeDFS::Node *Subtree) { - if (unsigned n = valueNumber(V, Subtree)) - return n; - else - return newVN(V); - } - - /// newVN - creates a new value number. Value V must not already have a - /// value number assigned. - unsigned newVN(Value *V) { - assert((isa(V) || isa(V) || isa(V)) && - "Bad Value for value numbering."); - assert(V->getType() != Type::getVoidTy(V->getContext()) && - "Won't value number a void value"); - - Values.push_back(V); - - VNPair pair = VNPair(V, Values.size(), DTDFS->getRootNode()); - VNMapType::iterator I = std::lower_bound(VNMap.begin(), VNMap.end(), pair); - assert((I == VNMap.end() || value(I->index) != V) && - "Attempt to create a duplicate value number."); - VNMap.insert(I, pair); - - return Values.size(); - } - - /// value - returns the Value associated with a value number. - Value *value(unsigned index) const { - assert(index != 0 && "Zero index is reserved for not found."); - assert(index <= Values.size() && "Index out of range."); - return Values[index-1]; - } - - /// canonicalize - return a Value that is equal to V under Subtree. - Value *canonicalize(Value *V, DomTreeDFS::Node *Subtree) { - if (isa(V)) return V; - - if (unsigned n = valueNumber(V, Subtree)) - return value(n); - else - return V; - } - - /// addEquality - adds that value V belongs to the set of equivalent - /// values defined by value number n under Subtree. - void addEquality(unsigned n, Value *V, DomTreeDFS::Node *Subtree) { - assert(canonicalize(value(n), Subtree) == value(n) && - "Node's 'canonical' choice isn't best within this subtree."); - - // Suppose that we are given "%x -> node #1 (%y)". The problem is that - // we may already have "%z -> node #2 (%x)" somewhere above us in the - // graph. We need to find those edges and add "%z -> node #1 (%y)" - // to keep the lookups canonical. - - std::vector ToRepoint(1, V); - - if (unsigned Conflict = valueNumber(V, Subtree)) { - for (VNMapType::iterator I = VNMap.begin(), E = VNMap.end(); - I != E; ++I) { - if (I->index == Conflict && I->Subtree->dominates(Subtree)) - ToRepoint.push_back(I->V); - } - } - - for (std::vector::iterator VI = ToRepoint.begin(), - VE = ToRepoint.end(); VI != VE; ++VI) { - Value *V = *VI; - - VNPair pair(V, n, Subtree); - VNMapType::iterator B = VNMap.begin(), E = VNMap.end(); - VNMapType::iterator I = std::lower_bound(B, E, pair); - if (I != E && I->V == V && I->Subtree == Subtree) - I->index = n; // Update best choice - else - VNMap.insert(I, pair); // New Value - - // XXX: we currently don't have to worry about updating values with - // more specific Subtrees, but we will need to for PHI node support. - -#ifndef NDEBUG - Value *V_n = value(n); - if (isa(V) && isa(V_n)) { - assert(V == V_n && "Constant equals different constant?"); - } -#endif - } - } - - /// remove - removes all references to value V. - void remove(Value *V) { - VNMapType::iterator B = VNMap.begin(), E = VNMap.end(); - VNPair pair(V, 0, DTDFS->getRootNode()); - VNMapType::iterator J = std::upper_bound(B, E, pair); - VNMapType::iterator I = J; - - while (I != B && (I == E || I->V == V)) --I; - - VNMap.erase(I, J); - } - }; - - /// The InequalityGraph stores the relationships between values. - /// Each Value in the graph is assigned to a Node. Nodes are pointer - /// comparable for equality. The caller is expected to maintain the logical - /// consistency of the system. - /// - /// The InequalityGraph class may invalidate Node*s after any mutator call. - /// @brief The InequalityGraph stores the relationships between values. - class InequalityGraph { - ValueNumbering &VN; - DomTreeDFS::Node *TreeRoot; - - InequalityGraph(); // DO NOT IMPLEMENT - InequalityGraph(InequalityGraph &); // DO NOT IMPLEMENT - public: - InequalityGraph(ValueNumbering &VN, DomTreeDFS::Node *TreeRoot) - : VN(VN), TreeRoot(TreeRoot) {} - - class Node; - - /// An Edge is contained inside a Node making one end of the edge implicit - /// and contains a pointer to the other end. The edge contains a lattice - /// value specifying the relationship and an DomTreeDFS::Node specifying - /// the root in the dominator tree to which this edge applies. - class Edge { - public: - Edge(unsigned T, LatticeVal V, DomTreeDFS::Node *ST) - : To(T), LV(V), Subtree(ST) {} - - unsigned To; - LatticeVal LV; - DomTreeDFS::Node *Subtree; - - bool operator<(const Edge &edge) const { - if (To != edge.To) return To < edge.To; - return *Subtree < *edge.Subtree; - } - - bool operator<(unsigned to) const { - return To < to; - } - - bool operator>(unsigned to) const { - return To > to; - } - - friend bool operator<(unsigned to, const Edge &edge) { - return edge.operator>(to); - } - }; - - /// A single node in the InequalityGraph. This stores the canonical Value - /// for the node, as well as the relationships with the neighbours. - /// - /// @brief A single node in the InequalityGraph. - class Node { - friend class InequalityGraph; - - typedef SmallVector RelationsType; - RelationsType Relations; - - // TODO: can this idea improve performance? - //friend class std::vector; - //Node(Node &N) { RelationsType.swap(N.RelationsType); } - - public: - typedef RelationsType::iterator iterator; - typedef RelationsType::const_iterator const_iterator; - -#ifndef NDEBUG - virtual ~Node() {} - virtual void dump() const { - dump(errs()); - } - private: - void dump(raw_ostream &os) const { - static const std::string names[32] = - { "000000", "000001", "000002", "000003", "000004", "000005", - "000006", "000007", "000008", "000009", " >", " >=", - " s>u<", "s>=u<=", " s>", " s>=", "000016", "000017", - " s", "s<=u>=", " <", " <=", " s<", " s<=", - "000024", "000025", " u>", " u>=", " u<", " u<=", - " !=", "000031" }; - for (Node::const_iterator NI = begin(), NE = end(); NI != NE; ++NI) { - os << names[NI->LV] << " " << NI->To - << " (" << NI->Subtree->getDFSNumIn() << "), "; - } - } - public: -#endif - - iterator begin() { return Relations.begin(); } - iterator end() { return Relations.end(); } - const_iterator begin() const { return Relations.begin(); } - const_iterator end() const { return Relations.end(); } - - iterator find(unsigned n, DomTreeDFS::Node *Subtree) { - iterator E = end(); - for (iterator I = std::lower_bound(begin(), E, n); - I != E && I->To == n; ++I) { - if (Subtree->DominatedBy(I->Subtree)) - return I; - } - return E; - } - - const_iterator find(unsigned n, DomTreeDFS::Node *Subtree) const { - const_iterator E = end(); - for (const_iterator I = std::lower_bound(begin(), E, n); - I != E && I->To == n; ++I) { - if (Subtree->DominatedBy(I->Subtree)) - return I; - } - return E; - } - - /// update - updates the lattice value for a given node, creating a new - /// entry if one doesn't exist. The new lattice value must not be - /// inconsistent with any previously existing value. - void update(unsigned n, LatticeVal R, DomTreeDFS::Node *Subtree) { - assert(validPredicate(R) && "Invalid predicate."); - - Edge edge(n, R, Subtree); - iterator B = begin(), E = end(); - iterator I = std::lower_bound(B, E, edge); - - iterator J = I; - while (J != E && J->To == n) { - if (Subtree->DominatedBy(J->Subtree)) - break; - ++J; - } - - if (J != E && J->To == n) { - edge.LV = static_cast(J->LV & R); - assert(validPredicate(edge.LV) && "Invalid union of lattice values."); - - if (edge.LV == J->LV) - return; // This update adds nothing new. - } - - if (I != B) { - // We also have to tighten any edge beneath our update. - for (iterator K = I - 1; K->To == n; --K) { - if (K->Subtree->DominatedBy(Subtree)) { - LatticeVal LV = static_cast(K->LV & edge.LV); - assert(validPredicate(LV) && "Invalid union of lattice values"); - K->LV = LV; - } - if (K == B) break; - } - } - - // Insert new edge at Subtree if it isn't already there. - if (I == E || I->To != n || Subtree != I->Subtree) - Relations.insert(I, edge); - } - }; - - private: - - std::vector Nodes; - - public: - /// node - returns the node object at a given value number. The pointer - /// returned may be invalidated on the next call to node(). - Node *node(unsigned index) { - assert(VN.value(index)); // This triggers the necessary checks. - if (Nodes.size() < index) Nodes.resize(index); - return &Nodes[index-1]; - } - - /// isRelatedBy - true iff n1 op n2 - bool isRelatedBy(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, - LatticeVal LV) { - if (n1 == n2) return LV & EQ_BIT; - - Node *N1 = node(n1); - Node::iterator I = N1->find(n2, Subtree), E = N1->end(); - if (I != E) return (I->LV & LV) == I->LV; - - return false; - } - - // The add* methods assume that your input is logically valid and may - // assertion-fail or infinitely loop if you attempt a contradiction. - - /// addInequality - Sets n1 op n2. - /// It is also an error to call this on an inequality that is already true. - void addInequality(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, - LatticeVal LV1) { - assert(n1 != n2 && "A node can't be inequal to itself."); - - if (LV1 != NE) - assert(!isRelatedBy(n1, n2, Subtree, reversePredicate(LV1)) && - "Contradictory inequality."); - - // Suppose we're adding %n1 < %n2. Find all the %a < %n1 and - // add %a < %n2 too. This keeps the graph fully connected. - if (LV1 != NE) { - // Break up the relationship into signed and unsigned comparison parts. - // If the signed parts of %a op1 %n1 match that of %n1 op2 %n2, and - // op1 and op2 aren't NE, then add %a op3 %n2. The new relationship - // should have the EQ_BIT iff it's set for both op1 and op2. - - unsigned LV1_s = LV1 & (SLT_BIT|SGT_BIT); - unsigned LV1_u = LV1 & (ULT_BIT|UGT_BIT); - - for (Node::iterator I = node(n1)->begin(), E = node(n1)->end(); I != E; ++I) { - if (I->LV != NE && I->To != n2) { - - DomTreeDFS::Node *Local_Subtree = NULL; - if (Subtree->DominatedBy(I->Subtree)) - Local_Subtree = Subtree; - else if (I->Subtree->DominatedBy(Subtree)) - Local_Subtree = I->Subtree; - - if (Local_Subtree) { - unsigned new_relationship = 0; - LatticeVal ILV = reversePredicate(I->LV); - unsigned ILV_s = ILV & (SLT_BIT|SGT_BIT); - unsigned ILV_u = ILV & (ULT_BIT|UGT_BIT); - - if (LV1_s != (SLT_BIT|SGT_BIT) && ILV_s == LV1_s) - new_relationship |= ILV_s; - if (LV1_u != (ULT_BIT|UGT_BIT) && ILV_u == LV1_u) - new_relationship |= ILV_u; - - if (new_relationship) { - if ((new_relationship & (SLT_BIT|SGT_BIT)) == 0) - new_relationship |= (SLT_BIT|SGT_BIT); - if ((new_relationship & (ULT_BIT|UGT_BIT)) == 0) - new_relationship |= (ULT_BIT|UGT_BIT); - if ((LV1 & EQ_BIT) && (ILV & EQ_BIT)) - new_relationship |= EQ_BIT; - - LatticeVal NewLV = static_cast(new_relationship); - - node(I->To)->update(n2, NewLV, Local_Subtree); - node(n2)->update(I->To, reversePredicate(NewLV), Local_Subtree); - } - } - } - } - - for (Node::iterator I = node(n2)->begin(), E = node(n2)->end(); I != E; ++I) { - if (I->LV != NE && I->To != n1) { - DomTreeDFS::Node *Local_Subtree = NULL; - if (Subtree->DominatedBy(I->Subtree)) - Local_Subtree = Subtree; - else if (I->Subtree->DominatedBy(Subtree)) - Local_Subtree = I->Subtree; - - if (Local_Subtree) { - unsigned new_relationship = 0; - unsigned ILV_s = I->LV & (SLT_BIT|SGT_BIT); - unsigned ILV_u = I->LV & (ULT_BIT|UGT_BIT); - - if (LV1_s != (SLT_BIT|SGT_BIT) && ILV_s == LV1_s) - new_relationship |= ILV_s; - - if (LV1_u != (ULT_BIT|UGT_BIT) && ILV_u == LV1_u) - new_relationship |= ILV_u; - - if (new_relationship) { - if ((new_relationship & (SLT_BIT|SGT_BIT)) == 0) - new_relationship |= (SLT_BIT|SGT_BIT); - if ((new_relationship & (ULT_BIT|UGT_BIT)) == 0) - new_relationship |= (ULT_BIT|UGT_BIT); - if ((LV1 & EQ_BIT) && (I->LV & EQ_BIT)) - new_relationship |= EQ_BIT; - - LatticeVal NewLV = static_cast(new_relationship); - - node(n1)->update(I->To, NewLV, Local_Subtree); - node(I->To)->update(n1, reversePredicate(NewLV), Local_Subtree); - } - } - } - } - } - - node(n1)->update(n2, LV1, Subtree); - node(n2)->update(n1, reversePredicate(LV1), Subtree); - } - - /// remove - removes a node from the graph by removing all references to - /// and from it. - void remove(unsigned n) { - Node *N = node(n); - for (Node::iterator NI = N->begin(), NE = N->end(); NI != NE; ++NI) { - Node::iterator Iter = node(NI->To)->find(n, TreeRoot); - do { - node(NI->To)->Relations.erase(Iter); - Iter = node(NI->To)->find(n, TreeRoot); - } while (Iter != node(NI->To)->end()); - } - N->Relations.clear(); - } - -#ifndef NDEBUG - virtual ~InequalityGraph() {} - virtual void dump() { - dump(errs()); - } - - void dump(raw_ostream &os) { - for (unsigned i = 1; i <= Nodes.size(); ++i) { - os << i << " = {"; - node(i)->dump(os); - os << "}\n"; - } - } -#endif - }; - - class VRPSolver; - - /// ValueRanges tracks the known integer ranges and anti-ranges of the nodes - /// in the InequalityGraph. - class ValueRanges { - ValueNumbering &VN; - TargetData *TD; - LLVMContext *Context; - - class ScopedRange { - typedef std::vector > - RangeListType; - RangeListType RangeList; - - static bool swo(const std::pair &LHS, - const std::pair &RHS) { - return *LHS.first < *RHS.first; - } - - public: -#ifndef NDEBUG - virtual ~ScopedRange() {} - virtual void dump() const { - dump(errs()); - } - - void dump(raw_ostream &os) const { - os << "{"; - for (const_iterator I = begin(), E = end(); I != E; ++I) { - os << &I->second << " (" << I->first->getDFSNumIn() << "), "; - } - os << "}"; - } -#endif - - typedef RangeListType::iterator iterator; - typedef RangeListType::const_iterator const_iterator; - - iterator begin() { return RangeList.begin(); } - iterator end() { return RangeList.end(); } - const_iterator begin() const { return RangeList.begin(); } - const_iterator end() const { return RangeList.end(); } - - iterator find(DomTreeDFS::Node *Subtree) { - iterator E = end(); - iterator I = std::lower_bound(begin(), E, - std::make_pair(Subtree, empty), swo); - - while (I != E && !I->first->dominates(Subtree)) ++I; - return I; - } - - const_iterator find(DomTreeDFS::Node *Subtree) const { - const_iterator E = end(); - const_iterator I = std::lower_bound(begin(), E, - std::make_pair(Subtree, empty), swo); - - while (I != E && !I->first->dominates(Subtree)) ++I; - return I; - } - - void update(const ConstantRange &CR, DomTreeDFS::Node *Subtree) { - assert(!CR.isEmptySet() && "Empty ConstantRange."); - assert(!CR.isSingleElement() && "Refusing to store single element."); - - iterator E = end(); - iterator I = - std::lower_bound(begin(), E, std::make_pair(Subtree, empty), swo); - - if (I != end() && I->first == Subtree) { - ConstantRange CR2 = I->second.intersectWith(CR); - assert(!CR2.isEmptySet() && !CR2.isSingleElement() && - "Invalid union of ranges."); - I->second = CR2; - } else - RangeList.insert(I, std::make_pair(Subtree, CR)); - } - }; - - std::vector Ranges; - - void update(unsigned n, const ConstantRange &CR, DomTreeDFS::Node *Subtree){ - if (CR.isFullSet()) return; - if (Ranges.size() < n) Ranges.resize(n); - Ranges[n-1].update(CR, Subtree); - } - - /// create - Creates a ConstantRange that matches the given LatticeVal - /// relation with a given integer. - ConstantRange create(LatticeVal LV, const ConstantRange &CR) { - assert(!CR.isEmptySet() && "Can't deal with empty set."); - - if (LV == NE) - return ConstantRange::makeICmpRegion(ICmpInst::ICMP_NE, CR); - - unsigned LV_s = LV & (SGT_BIT|SLT_BIT); - unsigned LV_u = LV & (UGT_BIT|ULT_BIT); - bool hasEQ = LV & EQ_BIT; - - ConstantRange Range(CR.getBitWidth()); - - if (LV_s == SGT_BIT) { - Range = Range.intersectWith(ConstantRange::makeICmpRegion( - hasEQ ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_SGT, CR)); - } else if (LV_s == SLT_BIT) { - Range = Range.intersectWith(ConstantRange::makeICmpRegion( - hasEQ ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_SLT, CR)); - } - - if (LV_u == UGT_BIT) { - Range = Range.intersectWith(ConstantRange::makeICmpRegion( - hasEQ ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_UGT, CR)); - } else if (LV_u == ULT_BIT) { - Range = Range.intersectWith(ConstantRange::makeICmpRegion( - hasEQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT, CR)); - } - - return Range; - } - -#ifndef NDEBUG - bool isCanonical(Value *V, DomTreeDFS::Node *Subtree) { - return V == VN.canonicalize(V, Subtree); - } -#endif - - public: - - ValueRanges(ValueNumbering &VN, TargetData *TD, LLVMContext *C) : - VN(VN), TD(TD), Context(C) {} - -#ifndef NDEBUG - virtual ~ValueRanges() {} - - virtual void dump() const { - dump(errs()); - } - - void dump(raw_ostream &os) const { - for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { - os << (i+1) << " = "; - Ranges[i].dump(os); - os << "\n"; - } - } -#endif - - /// range - looks up the ConstantRange associated with a value number. - ConstantRange range(unsigned n, DomTreeDFS::Node *Subtree) { - assert(VN.value(n)); // performs range checks - - if (n <= Ranges.size()) { - ScopedRange::iterator I = Ranges[n-1].find(Subtree); - if (I != Ranges[n-1].end()) return I->second; - } - - Value *V = VN.value(n); - ConstantRange CR = range(V); - return CR; - } - - /// range - determine a range from a Value without performing any lookups. - ConstantRange range(Value *V) const { - if (ConstantInt *C = dyn_cast(V)) - return ConstantRange(C->getValue()); - else if (isa(V)) - return ConstantRange(APInt::getNullValue(typeToWidth(V->getType()))); - else - return ConstantRange(typeToWidth(V->getType())); - } - - // typeToWidth - returns the number of bits necessary to store a value of - // this type, or zero if unknown. - uint32_t typeToWidth(const Type *Ty) const { - if (TD) - return TD->getTypeSizeInBits(Ty); - else - return Ty->getPrimitiveSizeInBits(); - } - - static bool isRelatedBy(const ConstantRange &CR1, const ConstantRange &CR2, - LatticeVal LV) { - switch (LV) { - default: assert(!"Impossible lattice value!"); - case NE: - return CR1.intersectWith(CR2).isEmptySet(); - case ULT: - return CR1.getUnsignedMax().ult(CR2.getUnsignedMin()); - case ULE: - return CR1.getUnsignedMax().ule(CR2.getUnsignedMin()); - case UGT: - return CR1.getUnsignedMin().ugt(CR2.getUnsignedMax()); - case UGE: - return CR1.getUnsignedMin().uge(CR2.getUnsignedMax()); - case SLT: - return CR1.getSignedMax().slt(CR2.getSignedMin()); - case SLE: - return CR1.getSignedMax().sle(CR2.getSignedMin()); - case SGT: - return CR1.getSignedMin().sgt(CR2.getSignedMax()); - case SGE: - return CR1.getSignedMin().sge(CR2.getSignedMax()); - case LT: - return CR1.getUnsignedMax().ult(CR2.getUnsignedMin()) && - CR1.getSignedMax().slt(CR2.getUnsignedMin()); - case LE: - return CR1.getUnsignedMax().ule(CR2.getUnsignedMin()) && - CR1.getSignedMax().sle(CR2.getUnsignedMin()); - case GT: - return CR1.getUnsignedMin().ugt(CR2.getUnsignedMax()) && - CR1.getSignedMin().sgt(CR2.getSignedMax()); - case GE: - return CR1.getUnsignedMin().uge(CR2.getUnsignedMax()) && - CR1.getSignedMin().sge(CR2.getSignedMax()); - case SLTUGT: - return CR1.getSignedMax().slt(CR2.getSignedMin()) && - CR1.getUnsignedMin().ugt(CR2.getUnsignedMax()); - case SLEUGE: - return CR1.getSignedMax().sle(CR2.getSignedMin()) && - CR1.getUnsignedMin().uge(CR2.getUnsignedMax()); - case SGTULT: - return CR1.getSignedMin().sgt(CR2.getSignedMax()) && - CR1.getUnsignedMax().ult(CR2.getUnsignedMin()); - case SGEULE: - return CR1.getSignedMin().sge(CR2.getSignedMax()) && - CR1.getUnsignedMax().ule(CR2.getUnsignedMin()); - } - } - - bool isRelatedBy(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, - LatticeVal LV) { - ConstantRange CR1 = range(n1, Subtree); - ConstantRange CR2 = range(n2, Subtree); - - // True iff all values in CR1 are LV to all values in CR2. - return isRelatedBy(CR1, CR2, LV); - } - - void addToWorklist(Value *V, Constant *C, ICmpInst::Predicate Pred, - VRPSolver *VRP); - void markBlock(VRPSolver *VRP); - - void mergeInto(Value **I, unsigned n, unsigned New, - DomTreeDFS::Node *Subtree, VRPSolver *VRP) { - ConstantRange CR_New = range(New, Subtree); - ConstantRange Merged = CR_New; - - for (; n != 0; ++I, --n) { - unsigned i = VN.valueNumber(*I, Subtree); - ConstantRange CR_Kill = i ? range(i, Subtree) : range(*I); - if (CR_Kill.isFullSet()) continue; - Merged = Merged.intersectWith(CR_Kill); - } - - if (Merged.isFullSet() || Merged == CR_New) return; - - applyRange(New, Merged, Subtree, VRP); - } - - void applyRange(unsigned n, const ConstantRange &CR, - DomTreeDFS::Node *Subtree, VRPSolver *VRP) { - ConstantRange Merged = CR.intersectWith(range(n, Subtree)); - if (Merged.isEmptySet()) { - markBlock(VRP); - return; - } - - if (const APInt *I = Merged.getSingleElement()) { - Value *V = VN.value(n); // XXX: redesign worklist. - const Type *Ty = V->getType(); - if (Ty->isInteger()) { - addToWorklist(V, ConstantInt::get(*Context, *I), - ICmpInst::ICMP_EQ, VRP); - return; - } else if (const PointerType *PTy = dyn_cast(Ty)) { - assert(*I == 0 && "Pointer is null but not zero?"); - addToWorklist(V, ConstantPointerNull::get(PTy), - ICmpInst::ICMP_EQ, VRP); - return; - } - } - - update(n, Merged, Subtree); - } - - void addNotEquals(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, - VRPSolver *VRP) { - ConstantRange CR1 = range(n1, Subtree); - ConstantRange CR2 = range(n2, Subtree); - - uint32_t W = CR1.getBitWidth(); - - if (const APInt *I = CR1.getSingleElement()) { - if (CR2.isFullSet()) { - ConstantRange NewCR2(CR1.getUpper(), CR1.getLower()); - applyRange(n2, NewCR2, Subtree, VRP); - } else if (*I == CR2.getLower()) { - APInt NewLower(CR2.getLower() + 1), - NewUpper(CR2.getUpper()); - if (NewLower == NewUpper) - NewLower = NewUpper = APInt::getMinValue(W); - - ConstantRange NewCR2(NewLower, NewUpper); - applyRange(n2, NewCR2, Subtree, VRP); - } else if (*I == CR2.getUpper() - 1) { - APInt NewLower(CR2.getLower()), - NewUpper(CR2.getUpper() - 1); - if (NewLower == NewUpper) - NewLower = NewUpper = APInt::getMinValue(W); - - ConstantRange NewCR2(NewLower, NewUpper); - applyRange(n2, NewCR2, Subtree, VRP); - } - } - - if (const APInt *I = CR2.getSingleElement()) { - if (CR1.isFullSet()) { - ConstantRange NewCR1(CR2.getUpper(), CR2.getLower()); - applyRange(n1, NewCR1, Subtree, VRP); - } else if (*I == CR1.getLower()) { - APInt NewLower(CR1.getLower() + 1), - NewUpper(CR1.getUpper()); - if (NewLower == NewUpper) - NewLower = NewUpper = APInt::getMinValue(W); - - ConstantRange NewCR1(NewLower, NewUpper); - applyRange(n1, NewCR1, Subtree, VRP); - } else if (*I == CR1.getUpper() - 1) { - APInt NewLower(CR1.getLower()), - NewUpper(CR1.getUpper() - 1); - if (NewLower == NewUpper) - NewLower = NewUpper = APInt::getMinValue(W); - - ConstantRange NewCR1(NewLower, NewUpper); - applyRange(n1, NewCR1, Subtree, VRP); - } - } - } - - void addInequality(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, - LatticeVal LV, VRPSolver *VRP) { - assert(!isRelatedBy(n1, n2, Subtree, LV) && "Asked to do useless work."); - - if (LV == NE) { - addNotEquals(n1, n2, Subtree, VRP); - return; - } - - ConstantRange CR1 = range(n1, Subtree); - ConstantRange CR2 = range(n2, Subtree); - - if (!CR1.isSingleElement()) { - ConstantRange NewCR1 = CR1.intersectWith(create(LV, CR2)); - if (NewCR1 != CR1) - applyRange(n1, NewCR1, Subtree, VRP); - } - - if (!CR2.isSingleElement()) { - ConstantRange NewCR2 = CR2.intersectWith( - create(reversePredicate(LV), CR1)); - if (NewCR2 != CR2) - applyRange(n2, NewCR2, Subtree, VRP); - } - } - }; - - /// UnreachableBlocks keeps tracks of blocks that are for one reason or - /// another discovered to be unreachable. This is used to cull the graph when - /// analyzing instructions, and to mark blocks with the "unreachable" - /// terminator instruction after the function has executed. - class UnreachableBlocks { - private: - std::vector DeadBlocks; - - public: - /// mark - mark a block as dead - void mark(BasicBlock *BB) { - std::vector::iterator E = DeadBlocks.end(); - std::vector::iterator I = - std::lower_bound(DeadBlocks.begin(), E, BB); - - if (I == E || *I != BB) DeadBlocks.insert(I, BB); - } - - /// isDead - returns whether a block is known to be dead already - bool isDead(BasicBlock *BB) { - std::vector::iterator E = DeadBlocks.end(); - std::vector::iterator I = - std::lower_bound(DeadBlocks.begin(), E, BB); - - return I != E && *I == BB; - } - - /// kill - replace the dead blocks' terminator with an UnreachableInst. - bool kill() { - bool modified = false; - for (std::vector::iterator I = DeadBlocks.begin(), - E = DeadBlocks.end(); I != E; ++I) { - BasicBlock *BB = *I; - - DEBUG(errs() << "unreachable block: " << BB->getName() << "\n"); - - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); - SI != SE; ++SI) { - BasicBlock *Succ = *SI; - Succ->removePredecessor(BB); - } - - TerminatorInst *TI = BB->getTerminator(); - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); - TI->eraseFromParent(); - new UnreachableInst(BB->getContext(), BB); - ++NumBlocks; - modified = true; - } - DeadBlocks.clear(); - return modified; - } - }; - - /// VRPSolver keeps track of how changes to one variable affect other - /// variables, and forwards changes along to the InequalityGraph. It - /// also maintains the correct choice for "canonical" in the IG. - /// @brief VRPSolver calculates inferences from a new relationship. - class VRPSolver { - private: - friend class ValueRanges; - - struct Operation { - Value *LHS, *RHS; - ICmpInst::Predicate Op; - - BasicBlock *ContextBB; // XXX use a DomTreeDFS::Node instead - Instruction *ContextInst; - }; - std::deque WorkList; - - ValueNumbering &VN; - InequalityGraph &IG; - UnreachableBlocks &UB; - ValueRanges &VR; - DomTreeDFS *DTDFS; - DomTreeDFS::Node *Top; - BasicBlock *TopBB; - Instruction *TopInst; - bool &modified; - LLVMContext *Context; - - typedef InequalityGraph::Node Node; - - // below - true if the Instruction is dominated by the current context - // block or instruction - bool below(Instruction *I) { - BasicBlock *BB = I->getParent(); - if (TopInst && TopInst->getParent() == BB) { - if (isa(TopInst)) return false; - if (isa(I)) return true; - if ( isa(TopInst) && !isa(I)) return true; - if (!isa(TopInst) && isa(I)) return false; - - for (BasicBlock::const_iterator Iter = BB->begin(), E = BB->end(); - Iter != E; ++Iter) { - if (&*Iter == TopInst) return true; - else if (&*Iter == I) return false; - } - assert(!"Instructions not found in parent BasicBlock?"); - } else { - DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); - if (!Node) return false; - return Top->dominates(Node); - } - return false; // Not reached - } - - // aboveOrBelow - true if the Instruction either dominates or is dominated - // by the current context block or instruction - bool aboveOrBelow(Instruction *I) { - BasicBlock *BB = I->getParent(); - DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); - if (!Node) return false; - - return Top == Node || Top->dominates(Node) || Node->dominates(Top); - } - - bool makeEqual(Value *V1, Value *V2) { - DEBUG(errs() << "makeEqual(" << *V1 << ", " << *V2 << ")\n"); - DEBUG(errs() << "context is "); - DEBUG(if (TopInst) - errs() << "I: " << *TopInst << "\n"; - else - errs() << "BB: " << TopBB->getName() - << "(" << Top->getDFSNumIn() << ")\n"); - - assert(V1->getType() == V2->getType() && - "Can't make two values with different types equal."); - - if (V1 == V2) return true; - - if (isa(V1) && isa(V2)) - return false; - - unsigned n1 = VN.valueNumber(V1, Top), n2 = VN.valueNumber(V2, Top); - - if (n1 && n2) { - if (n1 == n2) return true; - if (IG.isRelatedBy(n1, n2, Top, NE)) return false; - } - - if (n1) assert(V1 == VN.value(n1) && "Value isn't canonical."); - if (n2) assert(V2 == VN.value(n2) && "Value isn't canonical."); - - assert(!VN.compare(V2, V1) && "Please order parameters to makeEqual."); - - assert(!isa(V2) && "Tried to remove a constant."); - - SetVector Remove; - if (n2) Remove.insert(n2); - - if (n1 && n2) { - // Suppose we're being told that %x == %y, and %x <= %z and %y >= %z. - // We can't just merge %x and %y because the relationship with %z would - // be EQ and that's invalid. What we're doing is looking for any nodes - // %z such that %x <= %z and %y >= %z, and vice versa. - - Node::iterator end = IG.node(n2)->end(); - - // Find the intersection between N1 and N2 which is dominated by - // Top. If we find %x where N1 <= %x <= N2 (or >=) then add %x to - // Remove. - for (Node::iterator I = IG.node(n1)->begin(), E = IG.node(n1)->end(); - I != E; ++I) { - if (!(I->LV & EQ_BIT) || !Top->DominatedBy(I->Subtree)) continue; - - unsigned ILV_s = I->LV & (SLT_BIT|SGT_BIT); - unsigned ILV_u = I->LV & (ULT_BIT|UGT_BIT); - Node::iterator NI = IG.node(n2)->find(I->To, Top); - if (NI != end) { - LatticeVal NILV = reversePredicate(NI->LV); - unsigned NILV_s = NILV & (SLT_BIT|SGT_BIT); - unsigned NILV_u = NILV & (ULT_BIT|UGT_BIT); - - if ((ILV_s != (SLT_BIT|SGT_BIT) && ILV_s == NILV_s) || - (ILV_u != (ULT_BIT|UGT_BIT) && ILV_u == NILV_u)) - Remove.insert(I->To); - } - } - - // See if one of the nodes about to be removed is actually a better - // canonical choice than n1. - unsigned orig_n1 = n1; - SetVector::iterator DontRemove = Remove.end(); - for (SetVector::iterator I = Remove.begin()+1 /* skip n2 */, - E = Remove.end(); I != E; ++I) { - unsigned n = *I; - Value *V = VN.value(n); - if (VN.compare(V, V1)) { - V1 = V; - n1 = n; - DontRemove = I; - } - } - if (DontRemove != Remove.end()) { - unsigned n = *DontRemove; - Remove.remove(n); - Remove.insert(orig_n1); - } - } - - // We'd like to allow makeEqual on two values to perform a simple - // substitution without creating nodes in the IG whenever possible. - // - // The first iteration through this loop operates on V2 before going - // through the Remove list and operating on those too. If all of the - // iterations performed simple replacements then we exit early. - bool mergeIGNode = false; - unsigned i = 0; - for (Value *R = V2; i == 0 || i < Remove.size(); ++i) { - if (i) R = VN.value(Remove[i]); // skip n2. - - // Try to replace the whole instruction. If we can, we're done. - Instruction *I2 = dyn_cast(R); - if (I2 && below(I2)) { - std::vector ToNotify; - for (Value::use_iterator UI = I2->use_begin(), UE = I2->use_end(); - UI != UE;) { - Use &TheUse = UI.getUse(); - ++UI; - Instruction *I = cast(TheUse.getUser()); - ToNotify.push_back(I); - } - - DEBUG(errs() << "Simply removing " << *I2 - << ", replacing with " << *V1 << "\n"); - I2->replaceAllUsesWith(V1); - // leave it dead; it'll get erased later. - ++NumInstruction; - modified = true; - - for (std::vector::iterator II = ToNotify.begin(), - IE = ToNotify.end(); II != IE; ++II) { - opsToDef(*II); - } - - continue; - } - - // Otherwise, replace all dominated uses. - for (Value::use_iterator UI = R->use_begin(), UE = R->use_end(); - UI != UE;) { - Use &TheUse = UI.getUse(); - ++UI; - if (Instruction *I = dyn_cast(TheUse.getUser())) { - if (below(I)) { - TheUse.set(V1); - modified = true; - ++NumVarsReplaced; - opsToDef(I); - } - } - } - - // If that killed the instruction, stop here. - if (I2 && isInstructionTriviallyDead(I2)) { - DEBUG(errs() << "Killed all uses of " << *I2 - << ", replacing with " << *V1 << "\n"); - continue; - } - - // If we make it to here, then we will need to create a node for N1. - // Otherwise, we can skip out early! - mergeIGNode = true; - } - - if (!isa(V1)) { - if (Remove.empty()) { - VR.mergeInto(&V2, 1, VN.getOrInsertVN(V1, Top), Top, this); - } else { - std::vector RemoveVals; - RemoveVals.reserve(Remove.size()); - - for (SetVector::iterator I = Remove.begin(), - E = Remove.end(); I != E; ++I) { - Value *V = VN.value(*I); - if (!V->use_empty()) - RemoveVals.push_back(V); - } - VR.mergeInto(&RemoveVals[0], RemoveVals.size(), - VN.getOrInsertVN(V1, Top), Top, this); - } - } - - if (mergeIGNode) { - // Create N1. - if (!n1) n1 = VN.getOrInsertVN(V1, Top); - IG.node(n1); // Ensure that IG.Nodes won't get resized - - // Migrate relationships from removed nodes to N1. - for (SetVector::iterator I = Remove.begin(), E = Remove.end(); - I != E; ++I) { - unsigned n = *I; - for (Node::iterator NI = IG.node(n)->begin(), NE = IG.node(n)->end(); - NI != NE; ++NI) { - if (NI->Subtree->DominatedBy(Top)) { - if (NI->To == n1) { - assert((NI->LV & EQ_BIT) && "Node inequal to itself."); - continue; - } - if (Remove.count(NI->To)) - continue; - - IG.node(NI->To)->update(n1, reversePredicate(NI->LV), Top); - IG.node(n1)->update(NI->To, NI->LV, Top); - } - } - } - - // Point V2 (and all items in Remove) to N1. - if (!n2) - VN.addEquality(n1, V2, Top); - else { - for (SetVector::iterator I = Remove.begin(), - E = Remove.end(); I != E; ++I) { - VN.addEquality(n1, VN.value(*I), Top); - } - } - - // If !Remove.empty() then V2 = Remove[0]->getValue(). - // Even when Remove is empty, we still want to process V2. - i = 0; - for (Value *R = V2; i == 0 || i < Remove.size(); ++i) { - if (i) R = VN.value(Remove[i]); // skip n2. - - if (Instruction *I2 = dyn_cast(R)) { - if (aboveOrBelow(I2)) - defToOps(I2); - } - for (Value::use_iterator UI = V2->use_begin(), UE = V2->use_end(); - UI != UE;) { - Use &TheUse = UI.getUse(); - ++UI; - if (Instruction *I = dyn_cast(TheUse.getUser())) { - if (aboveOrBelow(I)) - opsToDef(I); - } - } - } - } - - // re-opsToDef all dominated users of V1. - if (Instruction *I = dyn_cast(V1)) { - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE;) { - Use &TheUse = UI.getUse(); - ++UI; - Value *V = TheUse.getUser(); - if (!V->use_empty()) { - Instruction *Inst = cast(V); - if (aboveOrBelow(Inst)) - opsToDef(Inst); - } - } - } - - return true; - } - - /// cmpInstToLattice - converts an CmpInst::Predicate to lattice value - /// Requires that the lattice value be valid; does not accept ICMP_EQ. - static LatticeVal cmpInstToLattice(ICmpInst::Predicate Pred) { - switch (Pred) { - case ICmpInst::ICMP_EQ: - assert(!"No matching lattice value."); - return static_cast(EQ_BIT); - default: - assert(!"Invalid 'icmp' predicate."); - case ICmpInst::ICMP_NE: - return NE; - case ICmpInst::ICMP_UGT: - return UGT; - case ICmpInst::ICMP_UGE: - return UGE; - case ICmpInst::ICMP_ULT: - return ULT; - case ICmpInst::ICMP_ULE: - return ULE; - case ICmpInst::ICMP_SGT: - return SGT; - case ICmpInst::ICMP_SGE: - return SGE; - case ICmpInst::ICMP_SLT: - return SLT; - case ICmpInst::ICMP_SLE: - return SLE; - } - } - - public: - VRPSolver(ValueNumbering &VN, InequalityGraph &IG, UnreachableBlocks &UB, - ValueRanges &VR, DomTreeDFS *DTDFS, bool &modified, - BasicBlock *TopBB) - : VN(VN), - IG(IG), - UB(UB), - VR(VR), - DTDFS(DTDFS), - Top(DTDFS->getNodeForBlock(TopBB)), - TopBB(TopBB), - TopInst(NULL), - modified(modified), - Context(&TopBB->getContext()) - { - assert(Top && "VRPSolver created for unreachable basic block."); - } - - VRPSolver(ValueNumbering &VN, InequalityGraph &IG, UnreachableBlocks &UB, - ValueRanges &VR, DomTreeDFS *DTDFS, bool &modified, - Instruction *TopInst) - : VN(VN), - IG(IG), - UB(UB), - VR(VR), - DTDFS(DTDFS), - Top(DTDFS->getNodeForBlock(TopInst->getParent())), - TopBB(TopInst->getParent()), - TopInst(TopInst), - modified(modified), - Context(&TopInst->getContext()) - { - assert(Top && "VRPSolver created for unreachable basic block."); - assert(Top->getBlock() == TopInst->getParent() && "Context mismatch."); - } - - bool isRelatedBy(Value *V1, Value *V2, ICmpInst::Predicate Pred) const { - if (Constant *C1 = dyn_cast(V1)) - if (Constant *C2 = dyn_cast(V2)) - return ConstantExpr::getCompare(Pred, C1, C2) == - ConstantInt::getTrue(*Context); - - unsigned n1 = VN.valueNumber(V1, Top); - unsigned n2 = VN.valueNumber(V2, Top); - - if (n1 && n2) { - if (n1 == n2) return Pred == ICmpInst::ICMP_EQ || - Pred == ICmpInst::ICMP_ULE || - Pred == ICmpInst::ICMP_UGE || - Pred == ICmpInst::ICMP_SLE || - Pred == ICmpInst::ICMP_SGE; - if (Pred == ICmpInst::ICMP_EQ) return false; - if (IG.isRelatedBy(n1, n2, Top, cmpInstToLattice(Pred))) return true; - if (VR.isRelatedBy(n1, n2, Top, cmpInstToLattice(Pred))) return true; - } - - if ((n1 && !n2 && isa(V2)) || - (n2 && !n1 && isa(V1))) { - ConstantRange CR1 = n1 ? VR.range(n1, Top) : VR.range(V1); - ConstantRange CR2 = n2 ? VR.range(n2, Top) : VR.range(V2); - - if (Pred == ICmpInst::ICMP_EQ) - return CR1.isSingleElement() && - CR1.getSingleElement() == CR2.getSingleElement(); - - return VR.isRelatedBy(CR1, CR2, cmpInstToLattice(Pred)); - } - if (Pred == ICmpInst::ICMP_EQ) return V1 == V2; - return false; - } - - /// add - adds a new property to the work queue - void add(Value *V1, Value *V2, ICmpInst::Predicate Pred, - Instruction *I = NULL) { - DEBUG(errs() << "adding " << *V1 << " " << Pred << " " << *V2); - if (I) - DEBUG(errs() << " context: " << *I); - else - DEBUG(errs() << " default context (" << Top->getDFSNumIn() << ")"); - DEBUG(errs() << "\n"); - - assert(V1->getType() == V2->getType() && - "Can't relate two values with different types."); - - WorkList.push_back(Operation()); - Operation &O = WorkList.back(); - O.LHS = V1, O.RHS = V2, O.Op = Pred, O.ContextInst = I; - O.ContextBB = I ? I->getParent() : TopBB; - } - - /// defToOps - Given an instruction definition that we've learned something - /// new about, find any new relationships between its operands. - void defToOps(Instruction *I) { - Instruction *NewContext = below(I) ? I : TopInst; - Value *Canonical = VN.canonicalize(I, Top); - - if (BinaryOperator *BO = dyn_cast(I)) { - const Type *Ty = BO->getType(); - assert(!Ty->isFPOrFPVector() && "Float in work queue!"); - - Value *Op0 = VN.canonicalize(BO->getOperand(0), Top); - Value *Op1 = VN.canonicalize(BO->getOperand(1), Top); - - // TODO: "and i32 -1, %x" EQ %y then %x EQ %y. - - switch (BO->getOpcode()) { - case Instruction::And: { - // "and i32 %a, %b" EQ -1 then %a EQ -1 and %b EQ -1 - ConstantInt *CI = cast(Constant::getAllOnesValue(Ty)); - if (Canonical == CI) { - add(CI, Op0, ICmpInst::ICMP_EQ, NewContext); - add(CI, Op1, ICmpInst::ICMP_EQ, NewContext); - } - } break; - case Instruction::Or: { - // "or i32 %a, %b" EQ 0 then %a EQ 0 and %b EQ 0 - Constant *Zero = Constant::getNullValue(Ty); - if (Canonical == Zero) { - add(Zero, Op0, ICmpInst::ICMP_EQ, NewContext); - add(Zero, Op1, ICmpInst::ICMP_EQ, NewContext); - } - } break; - case Instruction::Xor: { - // "xor i32 %c, %a" EQ %b then %a EQ %c ^ %b - // "xor i32 %c, %a" EQ %c then %a EQ 0 - // "xor i32 %c, %a" NE %c then %a NE 0 - // Repeat the above, with order of operands reversed. - Value *LHS = Op0; - Value *RHS = Op1; - if (!isa(LHS)) std::swap(LHS, RHS); - - if (ConstantInt *CI = dyn_cast(Canonical)) { - if (ConstantInt *Arg = dyn_cast(LHS)) { - add(RHS, - ConstantInt::get(*Context, CI->getValue() ^ Arg->getValue()), - ICmpInst::ICMP_EQ, NewContext); - } - } - if (Canonical == LHS) { - if (isa(Canonical)) - add(RHS, Constant::getNullValue(Ty), ICmpInst::ICMP_EQ, - NewContext); - } else if (isRelatedBy(LHS, Canonical, ICmpInst::ICMP_NE)) { - add(RHS, Constant::getNullValue(Ty), ICmpInst::ICMP_NE, - NewContext); - } - } break; - default: - break; - } - } else if (ICmpInst *IC = dyn_cast(I)) { - // "icmp ult i32 %a, %y" EQ true then %a u< y - // etc. - - if (Canonical == ConstantInt::getTrue(*Context)) { - add(IC->getOperand(0), IC->getOperand(1), IC->getPredicate(), - NewContext); - } else if (Canonical == ConstantInt::getFalse(*Context)) { - add(IC->getOperand(0), IC->getOperand(1), - ICmpInst::getInversePredicate(IC->getPredicate()), NewContext); - } - } else if (SelectInst *SI = dyn_cast(I)) { - if (I->getType()->isFPOrFPVector()) return; - - // Given: "%a = select i1 %x, i32 %b, i32 %c" - // %a EQ %b and %b NE %c then %x EQ true - // %a EQ %c and %b NE %c then %x EQ false - - Value *True = SI->getTrueValue(); - Value *False = SI->getFalseValue(); - if (isRelatedBy(True, False, ICmpInst::ICMP_NE)) { - if (Canonical == VN.canonicalize(True, Top) || - isRelatedBy(Canonical, False, ICmpInst::ICMP_NE)) - add(SI->getCondition(), ConstantInt::getTrue(*Context), - ICmpInst::ICMP_EQ, NewContext); - else if (Canonical == VN.canonicalize(False, Top) || - isRelatedBy(Canonical, True, ICmpInst::ICMP_NE)) - add(SI->getCondition(), ConstantInt::getFalse(*Context), - ICmpInst::ICMP_EQ, NewContext); - } - } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { - for (GetElementPtrInst::op_iterator OI = GEPI->idx_begin(), - OE = GEPI->idx_end(); OI != OE; ++OI) { - ConstantInt *Op = dyn_cast(VN.canonicalize(*OI, Top)); - if (!Op || !Op->isZero()) return; - } - // TODO: The GEPI indices are all zero. Copy from definition to operand, - // jumping the type plane as needed. - if (isRelatedBy(GEPI, Constant::getNullValue(GEPI->getType()), - ICmpInst::ICMP_NE)) { - Value *Ptr = GEPI->getPointerOperand(); - add(Ptr, Constant::getNullValue(Ptr->getType()), ICmpInst::ICMP_NE, - NewContext); - } - } else if (CastInst *CI = dyn_cast(I)) { - const Type *SrcTy = CI->getSrcTy(); - - unsigned ci = VN.getOrInsertVN(CI, Top); - uint32_t W = VR.typeToWidth(SrcTy); - if (!W) return; - ConstantRange CR = VR.range(ci, Top); - - if (CR.isFullSet()) return; - - switch (CI->getOpcode()) { - default: break; - case Instruction::ZExt: - case Instruction::SExt: - VR.applyRange(VN.getOrInsertVN(CI->getOperand(0), Top), - CR.truncate(W), Top, this); - break; - case Instruction::BitCast: - VR.applyRange(VN.getOrInsertVN(CI->getOperand(0), Top), - CR, Top, this); - break; - } - } - } - - /// opsToDef - A new relationship was discovered involving one of this - /// instruction's operands. Find any new relationship involving the - /// definition, or another operand. - void opsToDef(Instruction *I) { - Instruction *NewContext = below(I) ? I : TopInst; - - if (BinaryOperator *BO = dyn_cast(I)) { - Value *Op0 = VN.canonicalize(BO->getOperand(0), Top); - Value *Op1 = VN.canonicalize(BO->getOperand(1), Top); - - if (ConstantInt *CI0 = dyn_cast(Op0)) - if (ConstantInt *CI1 = dyn_cast(Op1)) { - add(BO, ConstantExpr::get(BO->getOpcode(), CI0, CI1), - ICmpInst::ICMP_EQ, NewContext); - return; - } - - // "%y = and i1 true, %x" then %x EQ %y - // "%y = or i1 false, %x" then %x EQ %y - // "%x = add i32 %y, 0" then %x EQ %y - // "%x = mul i32 %y, 0" then %x EQ 0 - - Instruction::BinaryOps Opcode = BO->getOpcode(); - const Type *Ty = BO->getType(); - assert(!Ty->isFPOrFPVector() && "Float in work queue!"); - - Constant *Zero = Constant::getNullValue(Ty); - Constant *One = ConstantInt::get(Ty, 1); - ConstantInt *AllOnes = cast(Constant::getAllOnesValue(Ty)); - - switch (Opcode) { - default: break; - case Instruction::LShr: - case Instruction::AShr: - case Instruction::Shl: - if (Op1 == Zero) { - add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); - return; - } - break; - case Instruction::Sub: - if (Op1 == Zero) { - add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); - return; - } - if (ConstantInt *CI0 = dyn_cast(Op0)) { - unsigned n_ci0 = VN.getOrInsertVN(Op1, Top); - ConstantRange CR = VR.range(n_ci0, Top); - if (!CR.isFullSet()) { - CR.subtract(CI0->getValue()); - unsigned n_bo = VN.getOrInsertVN(BO, Top); - VR.applyRange(n_bo, CR, Top, this); - return; - } - } - if (ConstantInt *CI1 = dyn_cast(Op1)) { - unsigned n_ci1 = VN.getOrInsertVN(Op0, Top); - ConstantRange CR = VR.range(n_ci1, Top); - if (!CR.isFullSet()) { - CR.subtract(CI1->getValue()); - unsigned n_bo = VN.getOrInsertVN(BO, Top); - VR.applyRange(n_bo, CR, Top, this); - return; - } - } - break; - case Instruction::Or: - if (Op0 == AllOnes || Op1 == AllOnes) { - add(BO, AllOnes, ICmpInst::ICMP_EQ, NewContext); - return; - } - if (Op0 == Zero) { - add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); - return; - } else if (Op1 == Zero) { - add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); - return; - } - break; - case Instruction::Add: - if (ConstantInt *CI0 = dyn_cast(Op0)) { - unsigned n_ci0 = VN.getOrInsertVN(Op1, Top); - ConstantRange CR = VR.range(n_ci0, Top); - if (!CR.isFullSet()) { - CR.subtract(-CI0->getValue()); - unsigned n_bo = VN.getOrInsertVN(BO, Top); - VR.applyRange(n_bo, CR, Top, this); - return; - } - } - if (ConstantInt *CI1 = dyn_cast(Op1)) { - unsigned n_ci1 = VN.getOrInsertVN(Op0, Top); - ConstantRange CR = VR.range(n_ci1, Top); - if (!CR.isFullSet()) { - CR.subtract(-CI1->getValue()); - unsigned n_bo = VN.getOrInsertVN(BO, Top); - VR.applyRange(n_bo, CR, Top, this); - return; - } - } - // fall-through - case Instruction::Xor: - if (Op0 == Zero) { - add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); - return; - } else if (Op1 == Zero) { - add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); - return; - } - break; - case Instruction::And: - if (Op0 == AllOnes) { - add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); - return; - } else if (Op1 == AllOnes) { - add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); - return; - } - if (Op0 == Zero || Op1 == Zero) { - add(BO, Zero, ICmpInst::ICMP_EQ, NewContext); - return; - } - break; - case Instruction::Mul: - if (Op0 == Zero || Op1 == Zero) { - add(BO, Zero, ICmpInst::ICMP_EQ, NewContext); - return; - } - if (Op0 == One) { - add(BO, Op1, ICmpInst::ICMP_EQ, NewContext); - return; - } else if (Op1 == One) { - add(BO, Op0, ICmpInst::ICMP_EQ, NewContext); - return; - } - break; - } - - // "%x = add i32 %y, %z" and %x EQ %y then %z EQ 0 - // "%x = add i32 %y, %z" and %x EQ %z then %y EQ 0 - // "%x = shl i32 %y, %z" and %x EQ %y and %y NE 0 then %z EQ 0 - // "%x = udiv i32 %y, %z" and %x EQ %y and %y NE 0 then %z EQ 1 - - Value *Known = Op0, *Unknown = Op1, - *TheBO = VN.canonicalize(BO, Top); - if (Known != TheBO) std::swap(Known, Unknown); - if (Known == TheBO) { - switch (Opcode) { - default: break; - case Instruction::LShr: - case Instruction::AShr: - case Instruction::Shl: - if (!isRelatedBy(Known, Zero, ICmpInst::ICMP_NE)) break; - // otherwise, fall-through. - case Instruction::Sub: - if (Unknown == Op0) break; - // otherwise, fall-through. - case Instruction::Xor: - case Instruction::Add: - add(Unknown, Zero, ICmpInst::ICMP_EQ, NewContext); - break; - case Instruction::UDiv: - case Instruction::SDiv: - if (Unknown == Op1) break; - if (isRelatedBy(Known, Zero, ICmpInst::ICMP_NE)) - add(Unknown, One, ICmpInst::ICMP_EQ, NewContext); - break; - } - } - - // TODO: "%a = add i32 %b, 1" and %b > %z then %a >= %z. - - } else if (ICmpInst *IC = dyn_cast(I)) { - // "%a = icmp ult i32 %b, %c" and %b u< %c then %a EQ true - // "%a = icmp ult i32 %b, %c" and %b u>= %c then %a EQ false - // etc. - - Value *Op0 = VN.canonicalize(IC->getOperand(0), Top); - Value *Op1 = VN.canonicalize(IC->getOperand(1), Top); - - ICmpInst::Predicate Pred = IC->getPredicate(); - if (isRelatedBy(Op0, Op1, Pred)) - add(IC, ConstantInt::getTrue(*Context), ICmpInst::ICMP_EQ, NewContext); - else if (isRelatedBy(Op0, Op1, ICmpInst::getInversePredicate(Pred))) - add(IC, ConstantInt::getFalse(*Context), - ICmpInst::ICMP_EQ, NewContext); - - } else if (SelectInst *SI = dyn_cast(I)) { - if (I->getType()->isFPOrFPVector()) return; - - // Given: "%a = select i1 %x, i32 %b, i32 %c" - // %x EQ true then %a EQ %b - // %x EQ false then %a EQ %c - // %b EQ %c then %a EQ %b - - Value *Canonical = VN.canonicalize(SI->getCondition(), Top); - if (Canonical == ConstantInt::getTrue(*Context)) { - add(SI, SI->getTrueValue(), ICmpInst::ICMP_EQ, NewContext); - } else if (Canonical == ConstantInt::getFalse(*Context)) { - add(SI, SI->getFalseValue(), ICmpInst::ICMP_EQ, NewContext); - } else if (VN.canonicalize(SI->getTrueValue(), Top) == - VN.canonicalize(SI->getFalseValue(), Top)) { - add(SI, SI->getTrueValue(), ICmpInst::ICMP_EQ, NewContext); - } - } else if (CastInst *CI = dyn_cast(I)) { - const Type *DestTy = CI->getDestTy(); - if (DestTy->isFPOrFPVector()) return; - - Value *Op = VN.canonicalize(CI->getOperand(0), Top); - Instruction::CastOps Opcode = CI->getOpcode(); - - if (Constant *C = dyn_cast(Op)) { - add(CI, ConstantExpr::getCast(Opcode, C, DestTy), - ICmpInst::ICMP_EQ, NewContext); - } - - uint32_t W = VR.typeToWidth(DestTy); - unsigned ci = VN.getOrInsertVN(CI, Top); - ConstantRange CR = VR.range(VN.getOrInsertVN(Op, Top), Top); - - if (!CR.isFullSet()) { - switch (Opcode) { - default: break; - case Instruction::ZExt: - VR.applyRange(ci, CR.zeroExtend(W), Top, this); - break; - case Instruction::SExt: - VR.applyRange(ci, CR.signExtend(W), Top, this); - break; - case Instruction::Trunc: { - ConstantRange Result = CR.truncate(W); - if (!Result.isFullSet()) - VR.applyRange(ci, Result, Top, this); - } break; - case Instruction::BitCast: - VR.applyRange(ci, CR, Top, this); - break; - // TODO: other casts? - } - } - } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { - for (GetElementPtrInst::op_iterator OI = GEPI->idx_begin(), - OE = GEPI->idx_end(); OI != OE; ++OI) { - ConstantInt *Op = dyn_cast(VN.canonicalize(*OI, Top)); - if (!Op || !Op->isZero()) return; - } - // TODO: The GEPI indices are all zero. Copy from operand to definition, - // jumping the type plane as needed. - Value *Ptr = GEPI->getPointerOperand(); - if (isRelatedBy(Ptr, Constant::getNullValue(Ptr->getType()), - ICmpInst::ICMP_NE)) { - add(GEPI, Constant::getNullValue(GEPI->getType()), ICmpInst::ICMP_NE, - NewContext); - } - } - } - - /// solve - process the work queue - void solve() { - //DEBUG(errs() << "WorkList entry, size: " << WorkList.size() << "\n"); - while (!WorkList.empty()) { - //DEBUG(errs() << "WorkList size: " << WorkList.size() << "\n"); - - Operation &O = WorkList.front(); - TopInst = O.ContextInst; - TopBB = O.ContextBB; - Top = DTDFS->getNodeForBlock(TopBB); // XXX move this into Context - - O.LHS = VN.canonicalize(O.LHS, Top); - O.RHS = VN.canonicalize(O.RHS, Top); - - assert(O.LHS == VN.canonicalize(O.LHS, Top) && "Canonicalize isn't."); - assert(O.RHS == VN.canonicalize(O.RHS, Top) && "Canonicalize isn't."); - - DEBUG(errs() << "solving " << *O.LHS << " " << O.Op << " " << *O.RHS; - if (O.ContextInst) - errs() << " context inst: " << *O.ContextInst; - else - errs() << " context block: " << O.ContextBB->getName(); - errs() << "\n"; - - VN.dump(); - IG.dump(); - VR.dump();); - - // If they're both Constant, skip it. Check for contradiction and mark - // the BB as unreachable if so. - if (Constant *CI_L = dyn_cast(O.LHS)) { - if (Constant *CI_R = dyn_cast(O.RHS)) { - if (ConstantExpr::getCompare(O.Op, CI_L, CI_R) == - ConstantInt::getFalse(*Context)) - UB.mark(TopBB); - - WorkList.pop_front(); - continue; - } - } - - if (VN.compare(O.LHS, O.RHS)) { - std::swap(O.LHS, O.RHS); - O.Op = ICmpInst::getSwappedPredicate(O.Op); - } - - if (O.Op == ICmpInst::ICMP_EQ) { - if (!makeEqual(O.RHS, O.LHS)) - UB.mark(TopBB); - } else { - LatticeVal LV = cmpInstToLattice(O.Op); - - if ((LV & EQ_BIT) && - isRelatedBy(O.LHS, O.RHS, ICmpInst::getSwappedPredicate(O.Op))) { - if (!makeEqual(O.RHS, O.LHS)) - UB.mark(TopBB); - } else { - if (isRelatedBy(O.LHS, O.RHS, ICmpInst::getInversePredicate(O.Op))){ - UB.mark(TopBB); - WorkList.pop_front(); - continue; - } - - unsigned n1 = VN.getOrInsertVN(O.LHS, Top); - unsigned n2 = VN.getOrInsertVN(O.RHS, Top); - - if (n1 == n2) { - if (O.Op != ICmpInst::ICMP_UGE && O.Op != ICmpInst::ICMP_ULE && - O.Op != ICmpInst::ICMP_SGE && O.Op != ICmpInst::ICMP_SLE) - UB.mark(TopBB); - - WorkList.pop_front(); - continue; - } - - if (VR.isRelatedBy(n1, n2, Top, LV) || - IG.isRelatedBy(n1, n2, Top, LV)) { - WorkList.pop_front(); - continue; - } - - VR.addInequality(n1, n2, Top, LV, this); - if ((!isa(O.RHS) && !isa(O.LHS)) || - LV == NE) - IG.addInequality(n1, n2, Top, LV); - - if (Instruction *I1 = dyn_cast(O.LHS)) { - if (aboveOrBelow(I1)) - defToOps(I1); - } - if (isa(O.LHS) || isa(O.LHS)) { - for (Value::use_iterator UI = O.LHS->use_begin(), - UE = O.LHS->use_end(); UI != UE;) { - Use &TheUse = UI.getUse(); - ++UI; - Instruction *I = cast(TheUse.getUser()); - if (aboveOrBelow(I)) - opsToDef(I); - } - } - if (Instruction *I2 = dyn_cast(O.RHS)) { - if (aboveOrBelow(I2)) - defToOps(I2); - } - if (isa(O.RHS) || isa(O.RHS)) { - for (Value::use_iterator UI = O.RHS->use_begin(), - UE = O.RHS->use_end(); UI != UE;) { - Use &TheUse = UI.getUse(); - ++UI; - Instruction *I = cast(TheUse.getUser()); - if (aboveOrBelow(I)) - opsToDef(I); - } - } - } - } - WorkList.pop_front(); - } - } - }; - - void ValueRanges::addToWorklist(Value *V, Constant *C, - ICmpInst::Predicate Pred, VRPSolver *VRP) { - VRP->add(V, C, Pred, VRP->TopInst); - } - - void ValueRanges::markBlock(VRPSolver *VRP) { - VRP->UB.mark(VRP->TopBB); - } - - /// PredicateSimplifier - This class is a simplifier that replaces - /// one equivalent variable with another. It also tracks what - /// can't be equal and will solve setcc instructions when possible. - /// @brief Root of the predicate simplifier optimization. - class PredicateSimplifier : public FunctionPass { - DomTreeDFS *DTDFS; - bool modified; - ValueNumbering *VN; - InequalityGraph *IG; - UnreachableBlocks UB; - ValueRanges *VR; - - std::vector WorkList; - - LLVMContext *Context; - public: - static char ID; // Pass identification, replacement for typeid - PredicateSimplifier() : FunctionPass(&ID) {} - - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredID(BreakCriticalEdgesID); - AU.addRequired(); - } - - private: - /// Forwards - Adds new properties to VRPSolver and uses them to - /// simplify instructions. Because new properties sometimes apply to - /// a transition from one BasicBlock to another, this will use the - /// PredicateSimplifier::proceedToSuccessor(s) interface to enter the - /// basic block. - /// @brief Performs abstract execution of the program. - class Forwards : public InstVisitor { - friend class InstVisitor; - PredicateSimplifier *PS; - DomTreeDFS::Node *DTNode; - - public: - ValueNumbering &VN; - InequalityGraph &IG; - UnreachableBlocks &UB; - ValueRanges &VR; - - Forwards(PredicateSimplifier *PS, DomTreeDFS::Node *DTNode) - : PS(PS), DTNode(DTNode), VN(*PS->VN), IG(*PS->IG), UB(PS->UB), - VR(*PS->VR) {} - - void visitTerminatorInst(TerminatorInst &TI); - void visitBranchInst(BranchInst &BI); - void visitSwitchInst(SwitchInst &SI); - - void visitAllocaInst(AllocaInst &AI); - void visitLoadInst(LoadInst &LI); - void visitStoreInst(StoreInst &SI); - - void visitSExtInst(SExtInst &SI); - void visitZExtInst(ZExtInst &ZI); - - void visitBinaryOperator(BinaryOperator &BO); - void visitICmpInst(ICmpInst &IC); - }; - - // Used by terminator instructions to proceed from the current basic - // block to the next. Verifies that "current" dominates "next", - // then calls visitBasicBlock. - void proceedToSuccessors(DomTreeDFS::Node *Current) { - for (DomTreeDFS::Node::iterator I = Current->begin(), - E = Current->end(); I != E; ++I) { - WorkList.push_back(*I); - } - } - - void proceedToSuccessor(DomTreeDFS::Node *Next) { - WorkList.push_back(Next); - } - - // Visits each instruction in the basic block. - void visitBasicBlock(DomTreeDFS::Node *Node) { - BasicBlock *BB = Node->getBlock(); - DEBUG(errs() << "Entering Basic Block: " << BB->getName() - << " (" << Node->getDFSNumIn() << ")\n"); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - visitInstruction(I++, Node); - } - } - - // Tries to simplify each Instruction and add new properties. - void visitInstruction(Instruction *I, DomTreeDFS::Node *DT) { - DEBUG(errs() << "Considering instruction " << *I << "\n"); - DEBUG(VN->dump()); - DEBUG(IG->dump()); - DEBUG(VR->dump()); - - // Sometimes instructions are killed in earlier analysis. - if (isInstructionTriviallyDead(I)) { - ++NumSimple; - modified = true; - if (unsigned n = VN->valueNumber(I, DTDFS->getRootNode())) - if (VN->value(n) == I) IG->remove(n); - VN->remove(I); - I->eraseFromParent(); - return; - } - -#ifndef NDEBUG - // Try to replace the whole instruction. - Value *V = VN->canonicalize(I, DT); - assert(V == I && "Late instruction canonicalization."); - if (V != I) { - modified = true; - ++NumInstruction; - DEBUG(errs() << "Removing " << *I << ", replacing with " << *V << "\n"); - if (unsigned n = VN->valueNumber(I, DTDFS->getRootNode())) - if (VN->value(n) == I) IG->remove(n); - VN->remove(I); - I->replaceAllUsesWith(V); - I->eraseFromParent(); - return; - } - - // Try to substitute operands. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Value *Oper = I->getOperand(i); - Value *V = VN->canonicalize(Oper, DT); - assert(V == Oper && "Late operand canonicalization."); - if (V != Oper) { - modified = true; - ++NumVarsReplaced; - DEBUG(errs() << "Resolving " << *I); - I->setOperand(i, V); - DEBUG(errs() << " into " << *I); - } - } -#endif - - std::string name = I->getParent()->getName(); - DEBUG(errs() << "push (%" << name << ")\n"); - Forwards visit(this, DT); - visit.visit(*I); - DEBUG(errs() << "pop (%" << name << ")\n"); - } - }; - - bool PredicateSimplifier::runOnFunction(Function &F) { - DominatorTree *DT = &getAnalysis(); - DTDFS = new DomTreeDFS(DT); - TargetData *TD = getAnalysisIfAvailable(); - - // FIXME: PredicateSimplifier should still be able to do basic - // optimizations without TargetData. But for now, just exit if - // it's not available. - if (!TD) return false; - - Context = &F.getContext(); - - DEBUG(errs() << "Entering Function: " << F.getName() << "\n"); - - modified = false; - DomTreeDFS::Node *Root = DTDFS->getRootNode(); - VN = new ValueNumbering(DTDFS); - IG = new InequalityGraph(*VN, Root); - VR = new ValueRanges(*VN, TD, Context); - WorkList.push_back(Root); - - do { - DomTreeDFS::Node *DTNode = WorkList.back(); - WorkList.pop_back(); - if (!UB.isDead(DTNode->getBlock())) visitBasicBlock(DTNode); - } while (!WorkList.empty()); - - delete DTDFS; - delete VR; - delete IG; - delete VN; - - modified |= UB.kill(); - - return modified; - } - - void PredicateSimplifier::Forwards::visitTerminatorInst(TerminatorInst &TI) { - PS->proceedToSuccessors(DTNode); - } - - void PredicateSimplifier::Forwards::visitBranchInst(BranchInst &BI) { - if (BI.isUnconditional()) { - PS->proceedToSuccessors(DTNode); - return; - } - - Value *Condition = BI.getCondition(); - BasicBlock *TrueDest = BI.getSuccessor(0); - BasicBlock *FalseDest = BI.getSuccessor(1); - - if (isa(Condition) || TrueDest == FalseDest) { - PS->proceedToSuccessors(DTNode); - return; - } - - LLVMContext *Context = &BI.getContext(); - - for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end(); - I != E; ++I) { - BasicBlock *Dest = (*I)->getBlock(); - DEBUG(errs() << "Branch thinking about %" << Dest->getName() - << "(" << PS->DTDFS->getNodeForBlock(Dest)->getDFSNumIn() << ")\n"); - - if (Dest == TrueDest) { - DEBUG(errs() << "(" << DTNode->getBlock()->getName() - << ") true set:\n"); - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, Dest); - VRP.add(ConstantInt::getTrue(*Context), Condition, ICmpInst::ICMP_EQ); - VRP.solve(); - DEBUG(VN.dump()); - DEBUG(IG.dump()); - DEBUG(VR.dump()); - } else if (Dest == FalseDest) { - DEBUG(errs() << "(" << DTNode->getBlock()->getName() - << ") false set:\n"); - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, Dest); - VRP.add(ConstantInt::getFalse(*Context), Condition, ICmpInst::ICMP_EQ); - VRP.solve(); - DEBUG(VN.dump()); - DEBUG(IG.dump()); - DEBUG(VR.dump()); - } - - PS->proceedToSuccessor(*I); - } - } - - void PredicateSimplifier::Forwards::visitSwitchInst(SwitchInst &SI) { - Value *Condition = SI.getCondition(); - - // Set the EQProperty in each of the cases BBs, and the NEProperties - // in the default BB. - - for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end(); - I != E; ++I) { - BasicBlock *BB = (*I)->getBlock(); - DEBUG(errs() << "Switch thinking about BB %" << BB->getName() - << "(" << PS->DTDFS->getNodeForBlock(BB)->getDFSNumIn() << ")\n"); - - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, BB); - if (BB == SI.getDefaultDest()) { - for (unsigned i = 1, e = SI.getNumCases(); i < e; ++i) - if (SI.getSuccessor(i) != BB) - VRP.add(Condition, SI.getCaseValue(i), ICmpInst::ICMP_NE); - VRP.solve(); - } else if (ConstantInt *CI = SI.findCaseDest(BB)) { - VRP.add(Condition, CI, ICmpInst::ICMP_EQ); - VRP.solve(); - } - PS->proceedToSuccessor(*I); - } - } - - void PredicateSimplifier::Forwards::visitAllocaInst(AllocaInst &AI) { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &AI); - VRP.add(Constant::getNullValue(AI.getType()), - &AI, ICmpInst::ICMP_NE); - VRP.solve(); - } - - void PredicateSimplifier::Forwards::visitLoadInst(LoadInst &LI) { - Value *Ptr = LI.getPointerOperand(); - // avoid "load i8* null" -> null NE null. - if (isa(Ptr)) return; - - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &LI); - VRP.add(Constant::getNullValue(Ptr->getType()), - Ptr, ICmpInst::ICMP_NE); - VRP.solve(); - } - - void PredicateSimplifier::Forwards::visitStoreInst(StoreInst &SI) { - Value *Ptr = SI.getPointerOperand(); - if (isa(Ptr)) return; - - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &SI); - VRP.add(Constant::getNullValue(Ptr->getType()), - Ptr, ICmpInst::ICMP_NE); - VRP.solve(); - } - - void PredicateSimplifier::Forwards::visitSExtInst(SExtInst &SI) { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &SI); - LLVMContext &Context = SI.getContext(); - uint32_t SrcBitWidth = cast(SI.getSrcTy())->getBitWidth(); - uint32_t DstBitWidth = cast(SI.getDestTy())->getBitWidth(); - APInt Min(APInt::getHighBitsSet(DstBitWidth, DstBitWidth-SrcBitWidth+1)); - APInt Max(APInt::getLowBitsSet(DstBitWidth, SrcBitWidth-1)); - VRP.add(ConstantInt::get(Context, Min), &SI, ICmpInst::ICMP_SLE); - VRP.add(ConstantInt::get(Context, Max), &SI, ICmpInst::ICMP_SGE); - VRP.solve(); - } - - void PredicateSimplifier::Forwards::visitZExtInst(ZExtInst &ZI) { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &ZI); - LLVMContext &Context = ZI.getContext(); - uint32_t SrcBitWidth = cast(ZI.getSrcTy())->getBitWidth(); - uint32_t DstBitWidth = cast(ZI.getDestTy())->getBitWidth(); - APInt Max(APInt::getLowBitsSet(DstBitWidth, SrcBitWidth)); - VRP.add(ConstantInt::get(Context, Max), &ZI, ICmpInst::ICMP_UGE); - VRP.solve(); - } - - void PredicateSimplifier::Forwards::visitBinaryOperator(BinaryOperator &BO) { - Instruction::BinaryOps ops = BO.getOpcode(); - - switch (ops) { - default: break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::UDiv: - case Instruction::SDiv: { - Value *Divisor = BO.getOperand(1); - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); - VRP.add(Constant::getNullValue(Divisor->getType()), - Divisor, ICmpInst::ICMP_NE); - VRP.solve(); - break; - } - } - - switch (ops) { - default: break; - case Instruction::Shl: { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); - VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE); - VRP.solve(); - } break; - case Instruction::AShr: { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); - VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_SLE); - VRP.solve(); - } break; - case Instruction::LShr: - case Instruction::UDiv: { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); - VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE); - VRP.solve(); - } break; - case Instruction::URem: { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); - VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE); - VRP.solve(); - } break; - case Instruction::And: { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); - VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE); - VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE); - VRP.solve(); - } break; - case Instruction::Or: { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO); - VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE); - VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_UGE); - VRP.solve(); - } break; - } - } - - void PredicateSimplifier::Forwards::visitICmpInst(ICmpInst &IC) { - // If possible, squeeze the ICmp predicate into something simpler. - // Eg., if x = [0, 4) and we're being asked icmp uge %x, 3 then change - // the predicate to eq. - - // XXX: once we do full PHI handling, modifying the instruction in the - // Forwards visitor will cause missed optimizations. - - ICmpInst::Predicate Pred = IC.getPredicate(); - - switch (Pred) { - default: break; - case ICmpInst::ICMP_ULE: Pred = ICmpInst::ICMP_ULT; break; - case ICmpInst::ICMP_UGE: Pred = ICmpInst::ICMP_UGT; break; - case ICmpInst::ICMP_SLE: Pred = ICmpInst::ICMP_SLT; break; - case ICmpInst::ICMP_SGE: Pred = ICmpInst::ICMP_SGT; break; - } - if (Pred != IC.getPredicate()) { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &IC); - if (VRP.isRelatedBy(IC.getOperand(1), IC.getOperand(0), - ICmpInst::ICMP_NE)) { - ++NumSnuggle; - PS->modified = true; - IC.setPredicate(Pred); - } - } - - Pred = IC.getPredicate(); - - LLVMContext &Context = IC.getContext(); - - if (ConstantInt *Op1 = dyn_cast(IC.getOperand(1))) { - ConstantInt *NextVal = 0; - switch (Pred) { - default: break; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_ULT: - if (Op1->getValue() != 0) - NextVal = ConstantInt::get(Context, Op1->getValue()-1); - break; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_UGT: - if (!Op1->getValue().isAllOnesValue()) - NextVal = ConstantInt::get(Context, Op1->getValue()+1); - break; - } - - if (NextVal) { - VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &IC); - if (VRP.isRelatedBy(IC.getOperand(0), NextVal, - ICmpInst::getInversePredicate(Pred))) { - ICmpInst *NewIC = new ICmpInst(&IC, ICmpInst::ICMP_EQ, - IC.getOperand(0), NextVal, ""); - NewIC->takeName(&IC); - IC.replaceAllUsesWith(NewIC); - - // XXX: prove this isn't necessary - if (unsigned n = VN.valueNumber(&IC, PS->DTDFS->getRootNode())) - if (VN.value(n) == &IC) IG.remove(n); - VN.remove(&IC); - - IC.eraseFromParent(); - ++NumSnuggle; - PS->modified = true; - } - } - } - } -} - -char PredicateSimplifier::ID = 0; -static RegisterPass -X("predsimplify", "Predicate Simplifier"); - -FunctionPass *llvm::createPredicateSimplifierPass() { - return new PredicateSimplifier(); -} From clattner at apple.com Tue Oct 6 12:10:23 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 10:10:23 -0700 Subject: [llvm-commits] [PATCH] Bug fix for comma-separated in CommandLine.cpp In-Reply-To: <4ACB6D2A.1080700@lip6.fr> References: <4ACB6D2A.1080700@lip6.fr> Message-ID: <98E3AAB8-940D-4689-8994-F9D3DB6312E8@apple.com> On Oct 6, 2009, at 9:15 AM, Nicolas Geoffray wrote: > Dear all, > > This simple patch fixes a bug in the CommaSeparated option in > CommandLine. The current code does the following: > > For an arg written like this: -myarg=foo,bar > It does myarg.push_back("foo") and myargs.push_back("foo,bar"). > > The patch fixes the issue. I'm not sure if it follows the logic of > all options in CommandLine.cpp, so I refer to someone who better > knows the code. Yes, this was introduced by a relatively recent refactoring of that code I did, please commit. -Chris > > > Thanks, > Nicolas > Index: lib/Support/CommandLine.cpp > =================================================================== > --- lib/Support/CommandLine.cpp (revision 83305) > +++ lib/Support/CommandLine.cpp (working copy) > @@ -643,6 +643,7 @@ > // Check for another comma. > Pos = Val.find(','); > } > + Value = Val; > } > > // If this is a named positional argument, just remember that it > is the > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Tue Oct 6 12:11:35 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 10:11:35 -0700 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: <4ACAEECE.5030003@gmail.com> References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> <4ACAE883.7000804@free.fr> <4ACAEECE.5030003@gmail.com> Message-ID: <64FCF3A4-9D7D-47A1-83BD-32671B601639@apple.com> On Oct 6, 2009, at 12:16 AM, T?r?k Edwin wrote: > On 2009-10-06 09:53, Chris Lattner wrote: >> >> On Oct 5, 2009, at 11:49 PM, Duncan Sands wrote: >> >>> Hi Torok, >>> >>>> + // Check malloc prototype. >>>> + // FIXME: this will be obsolete when nobuiltin attribute will >>>> exist. >>> >>> why would it be obsolete then? If LLVM sees "malloc" with a funky >>> prototype it will always need to bail out, no? >> >> Why would it in this case? Though it doesn't make a lot of sense, it >> wouldn't cause the program to crash, and the code would have >> undefined >> behavior. > > Once the LLVM optimizers/backends can emit warnings with line > numbers, I > think we should emit a warning if we see something obviously wrong > being > done, > like using malloc with a wrong proto without using > -ffreestanding/nobuiltins, or calling a function with an obviously > wrong > calling convention, etc. The front-end should generate this warning. -Chris From clattner at apple.com Tue Oct 6 12:12:46 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 10:12:46 -0700 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: <4ACAEDDE.8000204@free.fr> References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> <4ACAE883.7000804@free.fr> <4ACAEDDE.8000204@free.fr> Message-ID: On Oct 6, 2009, at 12:12 AM, Duncan Sands wrote: > Hi Chris, > >>>> + // Check malloc prototype. >>>> + // FIXME: this will be obsolete when nobuiltin attribute will >>>> exist. >>> >>> why would it be obsolete then? If LLVM sees "malloc" with a funky >>> prototype it will always need to bail out, no? >> Why would it in this case? Though it doesn't make a lot of sense, >> it wouldn't cause the program to crash, and the code would have >> undefined behavior. > > if the standard malloc for some operating system has an unusual > prototype, for example it returns more info than usual malloc, then > it is still a builtin, just a builtin that LLVM doesn't understand > yet. I don't see why it shouldn't be marked with a "builtin" > attribute. > A "builtin" attribute presumably wouldn't be for marking functions > that LLVM knows about, but for marking standard functions. LLVM may > or may not know about any particular function or particular standard. > That said, I appreciate that there are limits to this line of > reasoning. I think it only makes sense to consider something "builtin" if it follows the C standard. You can't define a 'malloc' with a different prototype than what the spec indicates (at least without using -ffree- standing). -Chris From baldrick at free.fr Tue Oct 6 12:21:13 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 19:21:13 +0200 Subject: [llvm-commits] [llvm] r83324 - /llvm/trunk/lib/Analysis/MallocHelper.cpp In-Reply-To: References: <200910052115.n95LFiPj032104@zion.cs.uiuc.edu> <4ACAE883.7000804@free.fr> <4ACAEDDE.8000204@free.fr> Message-ID: <4ACB7C89.5030704@free.fr> Hi Chris, > I think it only makes sense to consider something "builtin" if it > follows the C standard. You can't define a 'malloc' with a different > prototype than what the spec indicates (at least without using > -ffree-standing). other languages don't have this kind of restriction, so I think it is best to always check prototypes. Since we are now doing this, I guess further discussion can wait until someone tries to remove the prototype checking. Ciao, Duncan. From jyasskin at google.com Tue Oct 6 12:25:50 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 06 Oct 2009 17:25:50 -0000 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp Message-ID: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> Author: jyasskin Date: Tue Oct 6 12:25:50 2009 New Revision: 83391 URL: http://llvm.org/viewvc/llvm-project?rev=83391&view=rev Log: Fix PR5112, a miscompilation on gcc-4.0.3. Patch by Collin Winter! Modified: llvm/trunk/lib/Support/Triple.cpp Modified: llvm/trunk/lib/Support/Triple.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83391&r1=83390&r2=83391&view=diff ============================================================================== --- llvm/trunk/lib/Support/Triple.cpp (original) +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct 6 12:25:50 2009 @@ -390,7 +390,10 @@ } void Triple::setArchName(const StringRef &Str) { - setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); + // Work around a miscompilation bug in gcc 4.0.3. + Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); + Twine b = Str + "-" + a; + setTriple(b); } void Triple::setVendorName(const StringRef &Str) { From jyasskin at google.com Tue Oct 6 12:35:06 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 6 Oct 2009 10:35:06 -0700 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> Message-ID: Please consider pulling this into the 2.6 release. If it's too late, it won't hurt Unladen since we rely on some post-2.6 fixes anyway. On Tue, Oct 6, 2009 at 10:25 AM, Jeffrey Yasskin wrote: > Author: jyasskin > Date: Tue Oct ?6 12:25:50 2009 > New Revision: 83391 > > URL: http://llvm.org/viewvc/llvm-project?rev=83391&view=rev > Log: > Fix PR5112, a miscompilation on gcc-4.0.3. ?Patch by Collin Winter! > > Modified: > ? ?llvm/trunk/lib/Support/Triple.cpp > > Modified: llvm/trunk/lib/Support/Triple.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83391&r1=83390&r2=83391&view=diff > > ============================================================================== > --- llvm/trunk/lib/Support/Triple.cpp (original) > +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct ?6 12:25:50 2009 > @@ -390,7 +390,10 @@ > ?} > > ?void Triple::setArchName(const StringRef &Str) { > - ?setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); > + ?// Work around a miscompilation bug in gcc 4.0.3. > + ?Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); > + ?Twine b = Str + "-" + a; > + ?setTriple(b); > ?} > > ?void Triple::setVendorName(const StringRef &Str) { > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From gohman at apple.com Tue Oct 6 12:38:38 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 06 Oct 2009 17:38:38 -0000 Subject: [llvm-commits] [llvm] r83392 - in /llvm/trunk: lib/CodeGen/AsmPrinter/ lib/Target/ARM/AsmPrinter/ lib/Target/Alpha/AsmPrinter/ lib/Target/Blackfin/AsmPrinter/ lib/Target/CellSPU/AsmPrinter/ lib/Target/MSP430/AsmPrinter/ lib/Target/Mips/AsmPrinter/ lib/Target/PIC16/AsmPrinter/ lib/Target/PowerPC/AsmPrinter/ lib/Target/Sparc/AsmPrinter/ lib/Target/SystemZ/AsmPrinter/ lib/Target/X86/AsmPrinter/ lib/Target/XCore/AsmPrinter/ test/CodeGen/X86/ Message-ID: <200910061738.n96Hcdj9009289@zion.cs.uiuc.edu> Author: djg Date: Tue Oct 6 12:38:38 2009 New Revision: 83392 URL: http://llvm.org/viewvc/llvm-project?rev=83392&view=rev Log: Instead of printing unnecessary basic block labels as labels in verbose-asm mode, print comments instead. This eliminates a non-comment difference between verbose-asm mode and non-verbose-asm mode. Also, factor out the relevant code out of all the targets and into target-independent code. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll llvm/trunk/test/CodeGen/X86/pic.ll Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -1623,8 +1623,15 @@ if (unsigned Align = MBB->getAlignment()) EmitAlignment(Log2_32(Align)); - GetMBBSymbol(MBB->getNumber())->print(O, MAI); - O << ':'; + if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) { + if (VerboseAsm) + O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':'; + } else { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << ':'; + if (!VerboseAsm) + O << '\n'; + } if (VerboseAsm) { if (const BasicBlock *BB = MBB->getBasicBlock()) @@ -1635,6 +1642,7 @@ } EmitComments(*MBB); + O << '\n'; } } Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -285,7 +285,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { Modified: llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -171,7 +171,6 @@ I != E; ++I) { if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { Modified: llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -135,13 +135,7 @@ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // Print a label for the basic block. - if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) { - // This is an entry block or a block that's only reachable via a - // fallthrough edge. In non-VerboseAsm mode, don't print the label. - } else { - EmitBasicBlockStart(I); - O << '\n'; - } + EmitBasicBlockStart(I); for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { Modified: llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -460,7 +460,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { Modified: llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -124,13 +124,7 @@ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // Print a label for the basic block. - if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) { - // This is an entry block or a block that's only reachable via a - // fallthrough edge. In non-VerboseAsm mode, don't print the label. - } else { - EmitBasicBlockStart(I); - O << '\n'; - } + EmitBasicBlockStart(I); for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) Modified: llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -273,7 +273,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); Modified: llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -98,7 +98,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } // Print a basic block. Modified: llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -660,7 +660,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { @@ -844,7 +843,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { Modified: llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -119,7 +119,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { Modified: llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -129,13 +129,7 @@ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // Print a label for the basic block. - if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) { - // This is an entry block or a block that's only reachable via a - // fallthrough edge. In non-VerboseAsm mode, don't print the label. - } else { - EmitBasicBlockStart(I); - O << '\n'; - } + EmitBasicBlockStart(I); for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) Modified: llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -165,13 +165,7 @@ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // Print a label for the basic block. - if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) { - // This is an entry block or a block that's only reachable via a - // fallthrough edge. In non-VerboseAsm mode, don't print the label. - } else { - EmitBasicBlockStart(I); - O << '\n'; - } + EmitBasicBlockStart(I); for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { // Print the assembly for the instruction. Modified: llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp Tue Oct 6 12:38:38 2009 @@ -274,7 +274,6 @@ // Print a label for the basic block. if (I != MF.begin()) { EmitBasicBlockStart(I); - O << '\n'; } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); Modified: llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll (original) +++ llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll Tue Oct 6 12:38:38 2009 @@ -7,7 +7,7 @@ entry: ; CHECK: cmpl $2 ; CHECK-NEXT: je -; CHECK-NEXT: LBB1_1 +; CHECK-NEXT: %entry switch i32 %argc, label %UnifiedReturnBlock [ i32 1, label %bb Modified: llvm/trunk/test/CodeGen/X86/pic.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pic.ll?rev=83392&r1=83391&r2=83392&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/pic.ll (original) +++ llvm/trunk/test/CodeGen/X86/pic.ll Tue Oct 6 12:38:38 2009 @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=LINUX +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false | FileCheck %s -check-prefix=LINUX @ptr = external global i32* @dst = external global i32 @@ -12,7 +12,6 @@ ret void ; LINUX: test1: -; LINUX: .LBB1_0: ; LINUX: call .L1$pb ; LINUX-NEXT: .L1$pb: ; LINUX-NEXT: popl From gohman at apple.com Tue Oct 6 12:43:57 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 06 Oct 2009 17:43:57 -0000 Subject: [llvm-commits] [llvm] r83393 - /llvm/trunk/include/llvm/LLVMContext.h Message-ID: <200910061743.n96Hhv9A009976@zion.cs.uiuc.edu> Author: djg Date: Tue Oct 6 12:43:57 2009 New Revision: 83393 URL: http://llvm.org/viewvc/llvm-project?rev=83393&view=rev Log: Make LLVMContext's pImpl member const. Modified: llvm/trunk/include/llvm/LLVMContext.h Modified: llvm/trunk/include/llvm/LLVMContext.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LLVMContext.h?rev=83393&r1=83392&r2=83393&view=diff ============================================================================== --- llvm/trunk/include/llvm/LLVMContext.h (original) +++ llvm/trunk/include/llvm/LLVMContext.h Tue Oct 6 12:43:57 2009 @@ -19,6 +19,7 @@ class LLVMContextImpl; class MetadataContext; + /// This is an important class for using LLVM in a threaded context. It /// (opaquely) owns and manages the core "global" data of LLVM's core /// infrastructure, including the type and constant uniquing tables. @@ -28,8 +29,9 @@ // DO NOT IMPLEMENT LLVMContext(LLVMContext&); void operator=(LLVMContext&); + public: - LLVMContextImpl* pImpl; + LLVMContextImpl* const pImpl; MetadataContext &getMetadata(); bool RemoveDeadMetadata(); LLVMContext(); From edwintorok at gmail.com Tue Oct 6 12:46:42 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Tue, 06 Oct 2009 20:46:42 +0300 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> Message-ID: <4ACB8282.2010108@gmail.com> On 2009-10-06 20:35, Jeffrey Yasskin wrote: > Please consider pulling this into the 2.6 release. If it's too late, > it won't hurt Unladen since we rely on some post-2.6 fixes anyway. > > On Tue, Oct 6, 2009 at 10:25 AM, Jeffrey Yasskin wrote: > >> Author: jyasskin >> Date: Tue Oct 6 12:25:50 2009 >> New Revision: 83391 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83391&view=rev >> Log: >> Fix PR5112, a miscompilation on gcc-4.0.3. Patch by Collin Winter! >> >> Modified: >> llvm/trunk/lib/Support/Triple.cpp >> >> Modified: llvm/trunk/lib/Support/Triple.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83391&r1=83390&r2=83391&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Support/Triple.cpp (original) >> +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct 6 12:25:50 2009 >> @@ -390,7 +390,10 @@ >> } >> >> void Triple::setArchName(const StringRef &Str) { >> - setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); >> + // Work around a miscompilation bug in gcc 4.0.3. >> Who uses 4.0.3 still? That a very buggy release. Best regards, --Edwin From gohman at apple.com Tue Oct 6 12:51:06 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 06 Oct 2009 17:51:06 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83394 - /llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp Message-ID: <200910061751.n96Hp68d010927@zion.cs.uiuc.edu> Author: djg Date: Tue Oct 6 12:51:06 2009 New Revision: 83394 URL: http://llvm.org/viewvc/llvm-project?rev=83394&view=rev Log: The PredicateSimplifier pass was removed. Modified: llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp?rev=83394&r1=83393&r2=83394&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp Tue Oct 6 12:51:06 2009 @@ -75,7 +75,6 @@ llvm::createAggressiveDCEPass(); llvm::createConstantMergePass(); llvm::createIndVarSimplifyPass(); - llvm::createPredicateSimplifierPass(); llvm::createCondPropagationPass(); llvm::createGlobalOptimizerPass(); llvm::createJumpThreadingPass(); From collinwinter at google.com Tue Oct 6 13:07:54 2009 From: collinwinter at google.com (Collin Winter) Date: Tue, 6 Oct 2009 11:07:54 -0700 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: <4ACB8282.2010108@gmail.com> References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> <4ACB8282.2010108@gmail.com> Message-ID: <3c8293b60910061107o6866dd3bu31d62b71f12e9850@mail.gmail.com> Hi, 2009/10/6 T?r?k Edwin : > On 2009-10-06 20:35, Jeffrey Yasskin wrote: >> On Tue, Oct 6, 2009 at 10:25 AM, Jeffrey Yasskin wrote: >>> ?void Triple::setArchName(const StringRef &Str) { >>> - ?setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); >>> + ?// Work around a miscompilation bug in gcc 4.0.3. >>> > > Who uses 4.0.3 still? That a very buggy release. It's the compiler that shipped with Ubuntu Dapper, which some of our project's buildbot slaves are running on. The problem may impact other versions of gcc, but since 4.0.3 isn't listed on the "known-bad gcc versions", we went ahead and tracked down the problematic code snippet. Collin Winter From wendling at apple.com Tue Oct 6 13:15:19 2009 From: wendling at apple.com (Bill Wendling) Date: Tue, 6 Oct 2009 11:15:19 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r83332 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp In-Reply-To: <200910052225.n95MPX8L009144@zion.cs.uiuc.edu> References: <200910052225.n95MPX8L009144@zion.cs.uiuc.edu> Message-ID: On Oct 5, 2009, at 3:25 PM, Eric Christopher wrote: > Author: echristo > Date: Mon Oct 5 17:25:32 2009 > New Revision: 83332 > > URL: http://llvm.org/viewvc/llvm-project?rev=83332&view=rev > Log: > Revert the BITS_PER_UNIT part of my last patch. llvm does alignment > computation on bytes and would fail if BITS_PER_UNIT were anything > other than 8. > It might be worthwhile to put a comment around these so that future people won't make the same changes. -bw > Modified: > llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83332&r1=83331&r2=83332&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Oct 5 17:25:32 2009 > @@ -505,7 +505,7 @@ > > // Handle attribute "aligned". > if (DECL_ALIGN (FnDecl) != FUNCTION_BOUNDARY) > - Fn->setAlignment(DECL_ALIGN (FnDecl) / BITS_PER_UNIT); > + Fn->setAlignment(DECL_ALIGN (FnDecl) / 8); > > // Handle functions in specified sections. > if (DECL_SECTION_NAME(FnDecl)) > @@ -1614,7 +1614,7 @@ > if (DECL_ALIGN(decl)) { > unsigned TargetAlign = getTargetData().getABITypeAlignment(Ty); > if (DECL_USER_ALIGN(decl) || 8 * TargetAlign < (unsigned) > DECL_ALIGN(decl)) > - Alignment = DECL_ALIGN(decl) / BITS_PER_UNIT; > + Alignment = DECL_ALIGN(decl) / 8; > } > > const char *Name; // Name of variable > @@ -6628,8 +6628,7 @@ > if (BitStart == 0 && // llvm pointer points to it. > !isBitfield(FieldDecl) && // bitfield computation might > offset pointer. > DECL_ALIGN(FieldDecl)) > - LVAlign = std::max(LVAlign, > - unsigned(DECL_ALIGN(FieldDecl)) / BITS_PER_UNIT); > + LVAlign = std::max(LVAlign, unsigned(DECL_ALIGN(FieldDecl)) / > 8); > #endif > > // If the FIELD_DECL has an annotate attribute on it, emit it. > @@ -6839,7 +6838,7 @@ > unsigned Alignment = Ty->isSized() ? TD.getABITypeAlignment(Ty) : 1; > if (DECL_ALIGN(exp)) { > if (DECL_USER_ALIGN(exp) || 8 * Alignment < (unsigned)DECL_ALIGN > (exp)) > - Alignment = DECL_ALIGN(exp) / BITS_PER_UNIT; > + Alignment = DECL_ALIGN(exp) / 8; > } > > return LValue(BitCastToType(Decl, PTy), Alignment); > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From dpatel at apple.com Tue Oct 6 13:37:31 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 18:37:31 -0000 Subject: [llvm-commits] [llvm] r83400 - in /llvm/trunk: include/llvm/CodeGen/DwarfWriter.h lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfWriter.cpp lib/CodeGen/SelectionDAG/FastISel.cpp Message-ID: <200910061837.n96IbVn7017630@zion.cs.uiuc.edu> Author: dpatel Date: Tue Oct 6 13:37:31 2009 New Revision: 83400 URL: http://llvm.org/viewvc/llvm-project?rev=83400&view=rev Log: Add support to handle debug info attached to an instruction. This is not yet enabled. Modified: llvm/trunk/include/llvm/CodeGen/DwarfWriter.h llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Modified: llvm/trunk/include/llvm/CodeGen/DwarfWriter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/DwarfWriter.h?rev=83400&r1=83399&r2=83400&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/DwarfWriter.h (original) +++ llvm/trunk/include/llvm/CodeGen/DwarfWriter.h Tue Oct 6 13:37:31 2009 @@ -110,9 +110,10 @@ /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. unsigned RecordInlinedFnEnd(DISubprogram SP); + void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L); + void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L); }; - } // end llvm namespace #endif Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=83400&r1=83399&r2=83400&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Oct 6 13:37:31 2009 @@ -1363,12 +1363,18 @@ DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); if (BeforePrintingInsn) { if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) { - printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, - CurDLT.CompileUnit)); - O << '\n'; + unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + CurDLT.CompileUnit); + printLabel(L); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->SetDbgScopeBeginLabels(MI, L); +#endif + } else { +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->SetDbgScopeEndLabels(MI, 0); +#endif } - } - + } PrevDLT = CurDLT; } } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83400&r1=83399&r2=83400&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Oct 6 13:37:31 2009 @@ -1322,7 +1322,6 @@ Parent->AddScope(Slot); else // First function is top level function. - // FIXME - Dpatel - What is FunctionDbgScope ? if (!FunctionDbgScope) FunctionDbgScope = Slot; @@ -1901,6 +1900,12 @@ if (TimePassesIsEnabled) DebugTimer->startTimer(); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (!ExtractScopeInformation(MF)) + return; + CollectVariableInfo(); +#endif + // Begin accumulating function debug information. MMI->BeginFunction(MF); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp?rev=83400&r1=83399&r2=83400&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfWriter.cpp Tue Oct 6 13:37:31 2009 @@ -119,3 +119,9 @@ return DD->RecordInlinedFnEnd(SP); } +void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) { + DD->SetDbgScopeEndLabels(MI, L); +} +void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) { + DD->SetDbgScopeBeginLabels(MI, L); +} Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=83400&r1=83399&r2=83400&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Tue Oct 6 13:37:31 2009 @@ -425,7 +425,9 @@ if (AllocaLocation) MMI->setVariableDbgInfo(DI->getVariable(), AllocaLocation, FI); } +#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN DW->RecordVariable(DI->getVariable(), FI); +#endif return true; } case Intrinsic::eh_exception: { From jyasskin at google.com Tue Oct 6 14:06:17 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 06 Oct 2009 19:06:17 -0000 Subject: [llvm-commits] [llvm] r83401 - /llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Message-ID: <200910061906.n96J6HUn021563@zion.cs.uiuc.edu> Author: jyasskin Date: Tue Oct 6 14:06:16 2009 New Revision: 83401 URL: http://llvm.org/viewvc/llvm-project?rev=83401&view=rev Log: Fix illegal cross-type aliasing. Found by baldrick on a newer gcc. Modified: llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Modified: llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp?rev=83401&r1=83400&r2=83401&view=diff ============================================================================== --- llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp (original) +++ llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Tue Oct 6 14:06:16 2009 @@ -99,9 +99,8 @@ // Get the pointer to the native code to force it to JIT the function and // allocate space for the global. - void (*F1Ptr)(); - // Hack to avoid ISO C++ warning about casting function pointers. - *(void**)(void*)&F1Ptr = JIT->getPointerToFunction(F1); + void (*F1Ptr)() = + reinterpret_cast((intptr_t)JIT->getPointerToFunction(F1)); // Since F1 was codegen'd, a pointer to G should be available. int32_t *GPtr = (int32_t*)JIT->getPointerToGlobalIfAvailable(G); @@ -115,9 +114,8 @@ // Make a second function identical to the first, referring to the same // global. Function *F2 = makeReturnGlobal("F2", G, M); - // Hack to avoid ISO C++ warning about casting function pointers. - void (*F2Ptr)(); - *(void**)(void*)&F2Ptr = JIT->getPointerToFunction(F2); + void (*F2Ptr)() = + reinterpret_cast((intptr_t)JIT->getPointerToFunction(F2)); // F2() should increment G. F2Ptr(); From baldrick at free.fr Tue Oct 6 14:10:58 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 19:10:58 -0000 Subject: [llvm-commits] [gcc-plugin] r83402 - in /gcc-plugin/trunk: llvm-abi.h llvm-backend.cpp llvm-convert.cpp llvm-types.cpp x86/llvm-target.cpp Message-ID: <200910061910.n96JAwrC022175@zion.cs.uiuc.edu> Author: baldrick Date: Tue Oct 6 14:10:58 2009 New Revision: 83402 URL: http://llvm.org/viewvc/llvm-project?rev=83402&view=rev Log: Simplify the creation of pointer types. Modified: gcc-plugin/trunk/llvm-abi.h gcc-plugin/trunk/llvm-backend.cpp gcc-plugin/trunk/llvm-convert.cpp gcc-plugin/trunk/llvm-types.cpp gcc-plugin/trunk/x86/llvm-target.cpp Modified: gcc-plugin/trunk/llvm-abi.h URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-abi.h?rev=83402&r1=83401&r2=83402&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-abi.h (original) +++ gcc-plugin/trunk/llvm-abi.h Tue Oct 6 14:10:58 2009 @@ -388,7 +388,7 @@ if (ScalarType) C.HandleAggregateResultAsScalar(ConvertType(ScalarType)); else if (LLVM_SHOULD_RETURN_VECTOR_AS_SHADOW(type, isBuiltin)) - C.HandleScalarShadowResult(PointerType::getUnqual(Ty), false); + C.HandleScalarShadowResult(Ty->getPointerTo(), false); else C.HandleScalarResult(Ty); } else if (Ty->isSingleValueType() || Ty->isVoidTy()) { @@ -420,7 +420,7 @@ // FIXME: should return the hidden first argument for some targets // (e.g. ELF i386). - C.HandleAggregateShadowResult(PointerType::getUnqual(Ty), false); + C.HandleAggregateShadowResult(Ty->getPointerTo(), false); } } @@ -443,7 +443,7 @@ C.HandleScalarArgument(OpTy, type); ScalarElts.push_back(OpTy); } else if (isPassedByInvisibleReference(type)) { // variable size -> by-ref. - const Type *PtrTy = PointerType::getUnqual(Ty); + const Type *PtrTy = Ty->getPointerTo(); C.HandleByInvisibleReferenceArgument(PtrTy, type); ScalarElts.push_back(PtrTy); } else if (isa(Ty)) { @@ -744,7 +744,7 @@ if (ScalarType) C.HandleAggregateResultAsScalar(ConvertType(ScalarType)); else if (LLVM_SHOULD_RETURN_VECTOR_AS_SHADOW(type, isBuiltin)) - C.HandleScalarShadowResult(PointerType::getUnqual(Ty), false); + C.HandleScalarShadowResult(Ty->getPointerTo(), false); else C.HandleScalarResult(Ty); } else if (Ty->isSingleValueType() || Ty->isVoidTy()) { @@ -776,7 +776,7 @@ // FIXME: should return the hidden first argument for some targets // (e.g. ELF i386). - C.HandleAggregateShadowResult(PointerType::getUnqual(Ty), false); + C.HandleAggregateShadowResult(Ty->getPointerTo(), false); } } @@ -810,7 +810,7 @@ // not include variable sized fields here. std::vector Elts; if (isPassedByInvisibleReference(type)) { // variable size -> by-ref. - const Type *PtrTy = PointerType::getUnqual(Ty); + const Type *PtrTy = Ty->getPointerTo(); C.HandleByInvisibleReferenceArgument(PtrTy, type); ScalarElts.push_back(PtrTy); Modified: gcc-plugin/trunk/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-backend.cpp?rev=83402&r1=83401&r2=83402&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-backend.cpp (original) +++ gcc-plugin/trunk/llvm-backend.cpp Tue Oct 6 14:10:58 2009 @@ -803,7 +803,7 @@ const Type *FPTy = FunctionType::get(Type::getVoidTy(Context), std::vector(), false); - FPTy = PointerType::getUnqual(FPTy); + FPTy = FPTy->getPointerTo(); for (unsigned i = 0, e = Tors.size(); i != e; ++i) { StructInit[0] = ConstantInt::get(Type::getInt32Ty(Context), Tors[i].second); @@ -955,7 +955,7 @@ Constant *lineNo = ConstantInt::get(Type::getInt32Ty(Context), DECL_SOURCE_LINE(decl)); Constant *file = ConvertMetadataStringToGV(DECL_SOURCE_FILE(decl)); - const Type *SBP= PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); file = TheFolder->CreateBitCast(file, SBP); // There may be multiple annotate attributes. Pass return of lookup_attr @@ -1830,7 +1830,7 @@ if (!AttributeUsedGlobals.empty()) { std::vector AUGs; - const Type *SBP= PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); for (SmallSetVector::iterator AI = AttributeUsedGlobals.begin(), AE = AttributeUsedGlobals.end(); AI != AE; ++AI) { @@ -1849,7 +1849,7 @@ if (!AttributeCompilerUsedGlobals.empty()) { std::vector ACUGs; - const Type *SBP= PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); for (SmallSetVector::iterator AI = AttributeCompilerUsedGlobals.begin(), AE = AttributeCompilerUsedGlobals.end(); AI != AE; ++AI) { Modified: gcc-plugin/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-convert.cpp?rev=83402&r1=83401&r2=83402&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-convert.cpp (original) +++ gcc-plugin/trunk/llvm-convert.cpp Tue Oct 6 14:10:58 2009 @@ -280,7 +280,7 @@ Builder.CreateStore(ArgVal, Loc); } else { // This cast only involves pointers, therefore BitCast. - Loc = Builder.CreateBitCast(Loc, PointerType::getUnqual(LLVMTy)); + Loc = Builder.CreateBitCast(Loc, LLVMTy->getPointerTo()); Builder.CreateStore(ArgVal, Loc); } } @@ -425,7 +425,7 @@ Value *Loc = LocStack.back(); // This cast only involves pointers, therefore BitCast. - Loc = Builder.CreateBitCast(Loc, PointerType::getUnqual(StructTy)); + Loc = Builder.CreateBitCast(Loc, StructTy->getPointerTo()); Loc = Builder.CreateStructGEP(Loc, FieldNo); LocStack.push_back(Loc); @@ -807,7 +807,7 @@ } else { Value *RetVal = DECL_LOCAL(DECL_RESULT(FnDecl)); if (const StructType *STy = dyn_cast(Fn->getReturnType())) { - Value *R1 = Builder.CreateBitCast(RetVal, PointerType::getUnqual(STy)); + Value *R1 = Builder.CreateBitCast(RetVal, STy->getPointerTo()); llvm::Value *Idxs[2]; Idxs[0] = ConstantInt::get(llvm::Type::getInt32Ty(Context), 0); @@ -824,13 +824,12 @@ // pointer and loading. The load does not necessarily start at the // beginning of the aggregate (x86-64). if (ReturnOffset) { - RetVal = Builder.CreateBitCast(RetVal, - PointerType::getUnqual(Type::getInt8Ty(Context))); + RetVal = Builder.CreateBitCast(RetVal, Type::getInt8PtrTy(Context)); RetVal = Builder.CreateGEP(RetVal, ConstantInt::get(TD.getIntPtrType(Context), ReturnOffset)); } RetVal = Builder.CreateBitCast(RetVal, - PointerType::getUnqual(Fn->getReturnType())); + Fn->getReturnType()->getPointerTo()); RetVal = Builder.CreateLoad(RetVal, "retval"); RetVals.push_back(RetVal); } @@ -1450,10 +1449,8 @@ !TheTypeConverter->GCCTypeOverlapsWithLLVMTypePadding(type, LLVMTy) && // Don't copy tons of tiny elements. CountAggregateElements(LLVMTy) <= 8) { - DestLoc.Ptr = Builder.CreateBitCast(DestLoc.Ptr, - PointerType::getUnqual(LLVMTy)); - SrcLoc.Ptr = Builder.CreateBitCast(SrcLoc.Ptr, - PointerType::getUnqual(LLVMTy)); + DestLoc.Ptr = Builder.CreateBitCast(DestLoc.Ptr, LLVMTy->getPointerTo()); + SrcLoc.Ptr = Builder.CreateBitCast(SrcLoc.Ptr, LLVMTy->getPointerTo()); CopyAggregate(DestLoc, SrcLoc, Builder, type); return; } @@ -1505,8 +1502,7 @@ if (!TheTypeConverter->GCCTypeOverlapsWithLLVMTypePadding(type, LLVMTy) && // Don't zero tons of tiny elements. CountAggregateElements(LLVMTy) <= 8) { - DestLoc.Ptr = Builder.CreateBitCast(DestLoc.Ptr, - PointerType::getUnqual(LLVMTy)); + DestLoc.Ptr = Builder.CreateBitCast(DestLoc.Ptr, LLVMTy->getPointerTo()); ZeroAggregate(DestLoc, Builder); return; } @@ -1518,7 +1514,7 @@ Value *TreeToLLVM::EmitMemCpy(Value *DestPtr, Value *SrcPtr, Value *Size, unsigned Align) { - const Type *SBP = PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); const Type *IntPtr = TD.getIntPtrType(Context); Value *Ops[4] = { Builder.CreateBitCast(DestPtr, SBP), @@ -1534,7 +1530,7 @@ Value *TreeToLLVM::EmitMemMove(Value *DestPtr, Value *SrcPtr, Value *Size, unsigned Align) { - const Type *SBP = PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); const Type *IntPtr = TD.getIntPtrType(Context); Value *Ops[4] = { Builder.CreateBitCast(DestPtr, SBP), @@ -1550,7 +1546,7 @@ Value *TreeToLLVM::EmitMemSet(Value *DestPtr, Value *SrcVal, Value *Size, unsigned Align) { - const Type *SBP = PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); const Type *IntPtr = TD.getIntPtrType(Context); Value *Ops[4] = { Builder.CreateBitCast(DestPtr, SBP), @@ -1575,8 +1571,8 @@ // The idea is that it's a pointer to type "Value" // which is opaque* but the routine expects i8** and i8*. - const PointerType *Ty = PointerType::getUnqual(Type::getInt8Ty(Context)); - V = Builder.CreateBitCast(V, PointerType::getUnqual(Ty)); + const PointerType *Ty = Type::getInt8PtrTy(Context); + V = Builder.CreateBitCast(V, Ty->getPointerTo()); Value *Ops[2] = { V, @@ -1602,7 +1598,7 @@ Constant *lineNo = ConstantInt::get(Type::getInt32Ty(Context), DECL_SOURCE_LINE(decl)); Constant *file = ConvertMetadataStringToGV(DECL_SOURCE_FILE(decl)); - const Type *SBP= PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); file = Builder.getFolder().CreateBitCast(file, SBP); // There may be multiple annotate attributes. Pass return of lookup_attr @@ -1622,7 +1618,7 @@ // Assert its a string, and then get that string. assert(TREE_CODE(val) == STRING_CST && "Annotate attribute arg should always be a string"); - const Type *SBP = PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); Constant *strGV = TreeConstantToLLVM::EmitLV_STRING_CST(val); Value *Ops[4] = { Builder.CreateBitCast(V, SBP), @@ -1797,7 +1793,7 @@ const Type *IntTy = ConvertType(integer_type_node); - ExceptionValue = CreateTemporary(Type::getInt8Ty(Context)->getPointerTo()); + ExceptionValue = CreateTemporary(Type::getInt8PtrTy(Context)); ExceptionValue->setName("eh_exception"); ExceptionSelectorValue = CreateTemporary(IntTy); @@ -1860,7 +1856,7 @@ //FIXME assert(llvm_eh_personality_libfunc //FIXME && "no exception handling personality function!"); //FIXME Args.push_back(Builder.CreateBitCast(DECL_LOCAL(llvm_eh_personality_libfunc), -//FIXME PointerType::getUnqual(Type::getInt8Ty(Context)))); +//FIXME Type::getInt8PtrTy(Context))); //FIXME //FIXME // Add selections for each handler. //FIXME foreach_reachable_handler(i, false, false, AddHandler, &Handlers); @@ -1892,7 +1888,7 @@ //FIXME if (!TypeList) { //FIXME // Catch-all - push a null pointer. //FIXME Args.push_back( -//FIXME Constant::getNullValue(PointerType::getUnqual(Type::getInt8Ty(Context))) +//FIXME Constant::getNullValue(Type::getInt8PtrTy(Context)) //FIXME ); //FIXME } else { //FIXME // Add the type infos. @@ -1919,7 +1915,7 @@ //FIXME if (catch_all_type == NULL_TREE) //FIXME // Use a C++ style null catch-all object. //FIXME CatchAll = Constant::getNullValue( -//FIXME PointerType::getUnqual(Type::getInt8Ty(Context))); +//FIXME Type::getInt8PtrTy(Context)); //FIXME else //FIXME // This language has a type that catches all others. //FIXME CatchAll = Emit(catch_all_type, 0); @@ -1984,7 +1980,7 @@ //FIXME for (; TypeList; TypeList = TREE_CHAIN (TypeList)) { //FIXME Value *TType = Emit(lookup_type_for_runtime(TREE_VALUE(TypeList)), 0); //FIXME TType = Builder.CreateBitCast(TType, -//FIXME PointerType::getUnqual(Type::getInt8Ty(Context))); +//FIXME Type::getInt8PtrTy(Context)); //FIXME //FIXME // Call get eh type id. //FIXME Value *TypeID = Builder.CreateCall(FuncEHGetTypeID, TType, "eh_typeid"); @@ -2362,7 +2358,7 @@ if (!LV.isBitfield()) { if (!DestLoc) { // Scalar value: emit a load. - Value *Ptr = Builder.CreateBitCast(LV.Ptr, PointerType::getUnqual(Ty)); + Value *Ptr = Builder.CreateBitCast(LV.Ptr, Ty->getPointerTo()); LoadInst *LI = Builder.CreateLoad(Ptr, isVolatile); LI->setAlignment(Alignment); return LI; @@ -2492,7 +2488,7 @@ // If this is a direct call to a function using a static chain then we need // to ensure the function type is the one just calculated: it has an extra // parameter for the chain. - Callee = Builder.CreateBitCast(Callee, PointerType::getUnqual(Ty)); + Callee = Builder.CreateBitCast(Callee, Ty->getPointerTo()); Value *Result = EmitCallOf(Callee, stmt, DestLoc, PAL); @@ -2595,7 +2591,7 @@ Value *Loc = LocStack.back(); if (Loc) { // An address. Convert to the right type and load the value out. - Loc = Builder.CreateBitCast(Loc, PointerType::getUnqual(Ty)); + Loc = Builder.CreateBitCast(Loc, Ty->getPointerTo()); return Builder.CreateLoad(Loc, "val"); } else { // A value - just return it. @@ -2738,7 +2734,7 @@ /// reference with an additional parameter attribute "ByVal". void HandleByValArgument(const llvm::Type *LLVMTy, tree type) { Value *Loc = getAddress(); - assert(PointerType::getUnqual(LLVMTy) == Loc->getType()); + assert(LLVMTy->getPointerTo() == Loc->getType()); CallOperands.push_back(Loc); } @@ -2746,7 +2742,7 @@ /// argument is passed as a first class aggregate. void HandleFCAArgument(const llvm::Type *LLVMTy, tree type) { Value *Loc = getAddress(); - assert(PointerType::getUnqual(LLVMTy) == Loc->getType()); + assert(LLVMTy->getPointerTo() == Loc->getType()); CallOperands.push_back(Builder.CreateLoad(Loc)); } @@ -2755,7 +2751,7 @@ /// LLVM Struct, StructTy is the LLVM type of the struct we are entering. void EnterField(unsigned FieldNo, const llvm::Type *StructTy) { Value *Loc = getAddress(); - Loc = Builder.CreateBitCast(Loc, PointerType::getUnqual(StructTy)); + Loc = Builder.CreateBitCast(Loc, StructTy->getPointerTo()); pushAddress(Builder.CreateStructGEP(Loc, FieldNo, "elt")); } void ExitField() { @@ -2946,8 +2942,7 @@ Value *Ptr = DestLoc->Ptr; if (Client.Offset) { - Ptr = Builder.CreateBitCast(Ptr, - PointerType::getUnqual(Type::getInt8Ty(Context))); + Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Ptr = Builder.CreateGEP(Ptr, ConstantInt::get(TD.getIntPtrType(Context), Client.Offset)); } @@ -2969,7 +2964,7 @@ } else if (AGGREGATE_TYPE_P(TREE_TYPE(op))) { // Aggregate to aggregate copy. MemRef NewLoc = *DestLoc; - NewLoc.Ptr = Builder.CreateBitCast(DestLoc->Ptr,PointerType::getUnqual(Ty)); + NewLoc.Ptr = Builder.CreateBitCast(DestLoc->Ptr,Ty->getPointerTo()); Value *OpVal = Emit(op, &NewLoc); (void)OpVal; assert(OpVal == 0 && "Shouldn't cast scalar to aggregate!"); @@ -3005,7 +3000,7 @@ // Make the destination look like the source type. const Type *OpTy = ConvertType(TREE_TYPE(Op)); - Target.Ptr = Builder.CreateBitCast(Target.Ptr,PointerType::getUnqual(OpTy)); + Target.Ptr = Builder.CreateBitCast(Target.Ptr, OpTy->getPointerTo()); // Needs to be in sync with EmitLV. switch (TREE_CODE(Op)) { @@ -3046,7 +3041,7 @@ // Target holds the temporary created above. const Type *ExpTy = ConvertType(TREE_TYPE(exp)); return Builder.CreateLoad(Builder.CreateBitCast(Target.Ptr, - PointerType::getUnqual(ExpTy))); + ExpTy->getPointerTo())); } if (DestLoc) { @@ -3625,7 +3620,7 @@ Value *Idx = EmitGimpleReg(op1); // The offset in bytes. // Convert the pointer into an i8* and add the offset to it. - Ptr = Builder.CreateBitCast(Ptr, Type::getInt8Ty(Context)->getPointerTo()); + Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Value *GEP = POINTER_TYPE_OVERFLOW_UNDEFINED ? Builder.CreateInBoundsGEP(Ptr, Idx) : Builder.CreateGEP(Ptr, Idx); @@ -4243,7 +4238,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); // The gcc builtins are also full memory barriers. @@ -4267,7 +4262,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); C[2] = Builder.CreateIntCast(C[2], Ty[0], "cast"); @@ -4578,7 +4573,7 @@ //TODO location_t locus = gimple_location(stmt); //TODO Constant *lineNo = ConstantInt::get(Type::getInt32Ty, LOCATION_LINE(locus)); //TODO Constant *file = ConvertMetadataStringToGV(LOCATION_FILE(locus)); -//TODO const Type *SBP= PointerType::getUnqual(Type::getInt8Ty(Context)); +//TODO const Type *SBP= Type::getInt8PtrTy(Context); //TODO file = Builder.getFolder().CreateBitCast(file, SBP); //TODO //TODO // Get arguments. @@ -4750,7 +4745,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); @@ -4787,7 +4782,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); @@ -4824,7 +4819,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); @@ -4861,7 +4856,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); @@ -4898,7 +4893,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); @@ -4935,7 +4930,7 @@ }; const Type* Ty[2]; Ty[0] = ResultTy; - Ty[1] = PointerType::getUnqual(ResultTy); + Ty[1] = ResultTy->getPointerTo(); C[0] = Builder.CreateBitCast(C[0], Ty[1]); C[1] = Builder.CreateIntCast(C[1], Ty[0], "cast"); @@ -5245,8 +5240,7 @@ if (Locality == 0) Locality = ConstantInt::get(Type::getInt32Ty(Context), 3); - Ptr = Builder.CreateBitCast(Ptr, - PointerType::getUnqual(Type::getInt8Ty(Context))); + Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Value *Ops[3] = { Ptr, ReadWrite, Locality }; Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::prefetch), @@ -5289,8 +5283,7 @@ // Unfortunately, these constants are defined as RTL expressions and // should be handled separately. - Result = Builder.CreateBitCast(Ptr, - PointerType::getUnqual(Type::getInt8Ty(Context))); + Result = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); return true; } @@ -5304,8 +5297,7 @@ // needed for: MIPS, Sparc. Unfortunately, these constants are defined // as RTL expressions and should be handled separately. - Result = Builder.CreateBitCast(Ptr, - PointerType::getUnqual(Type::getInt8Ty(Context))); + Result = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); return true; } @@ -5409,8 +5401,7 @@ Intrinsic::eh_return_i32 : Intrinsic::eh_return_i64); Offset = Builder.CreateIntCast(Offset, IntPtr, true); - Handler = Builder.CreateBitCast(Handler, - PointerType::getUnqual(Type::getInt8Ty(Context))); + Handler = Builder.CreateBitCast(Handler, Type::getInt8PtrTy(Context)); SmallVector Args; Args.push_back(Offset); @@ -5439,7 +5430,7 @@ Value *Addr = Builder.CreateBitCast(Emit(gimple_call_arg(stmt, 0), 0), - PointerType::getUnqual(Type::getInt8Ty(Context))); + Type::getInt8PtrTy(Context)); Constant *Size, *Idx; for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) { @@ -5504,8 +5495,7 @@ return false; Value *Ptr = Emit(gimple_call_arg(stmt, 0), 0); - Ptr = Builder.CreateBitCast(Ptr, - PointerType::getUnqual(Type::getInt8Ty(Context))); + Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::stackrestore), Ptr); @@ -5548,16 +5538,14 @@ Constant *va_start = Intrinsic::getDeclaration(TheModule, Intrinsic::vastart); Value *ArgVal = Emit(gimple_call_arg(stmt, 0), 0); - ArgVal = Builder.CreateBitCast(ArgVal, - PointerType::getUnqual(Type::getInt8Ty(Context))); + ArgVal = Builder.CreateBitCast(ArgVal, Type::getInt8PtrTy(Context)); Builder.CreateCall(va_start, ArgVal); return true; } bool TreeToLLVM::EmitBuiltinVAEnd(gimple stmt) { Value *Arg = Emit(gimple_call_arg(stmt, 0), 0); - Arg = Builder.CreateBitCast(Arg, - PointerType::getUnqual(Type::getInt8Ty(Context))); + Arg = Builder.CreateBitCast(Arg, Type::getInt8PtrTy(Context)); Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::vaend), Arg); return true; @@ -5582,7 +5570,7 @@ Arg2 = Emit(Arg2T, 0); } - static const Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context)); + static const Type *VPTy = Type::getInt8PtrTy(Context); // FIXME: This ignores alignment and volatility of the arguments. SmallVector Args; @@ -5599,7 +5587,7 @@ VOID_TYPE)) return false; - static const Type *VPTy = PointerType::getUnqual(Type::getInt8Ty(Context)); + static const Type *VPTy = Type::getInt8PtrTy(Context); Value *Tramp = Emit(gimple_call_arg(stmt, 0), 0); Tramp = Builder.CreateBitCast(Tramp, VPTy); @@ -5721,7 +5709,7 @@ Value *TreeToLLVM::EmitFieldAnnotation(Value *FieldPtr, tree FieldDecl) { tree AnnotateAttr = lookup_attribute("annotate", DECL_ATTRIBUTES(FieldDecl)); - const Type *SBP = PointerType::getUnqual(Type::getInt8Ty(Context)); + const Type *SBP = Type::getInt8PtrTy(Context); Function *Fn = Intrinsic::getDeclaration(TheModule, Intrinsic::ptr_annotation, @@ -5836,8 +5824,7 @@ // much nicer in cases like: // float foo(int w, float A[][w], int g) { return A[g][0]; } - ArrayAddr = Builder.CreateBitCast(ArrayAddr, - PointerType::getUnqual(Type::getInt8Ty(Context))); + ArrayAddr = Builder.CreateBitCast(ArrayAddr, Type::getInt8PtrTy(Context)); if (VOID_TYPE_P(TREE_TYPE(ArrayTreeType))) return LValue(Builder.CreateGEP(ArrayAddr, IndexVal), 1); @@ -5878,7 +5865,7 @@ if (unsigned UnitOffset = BitStart / ValueSizeInBits) { // TODO: If Ptr.Ptr is a struct type or something, we can do much better // than this. e.g. check out when compiling unwind-dw2-fde-darwin.c. - Ptr.Ptr = Builder.CreateBitCast(Ptr.Ptr, PointerType::getUnqual(ValTy)); + Ptr.Ptr = Builder.CreateBitCast(Ptr.Ptr, ValTy->getPointerTo()); Ptr.Ptr = Builder.CreateGEP(Ptr.Ptr, ConstantInt::get(Type::getInt32Ty(Context), UnitOffset)); @@ -5887,11 +5874,11 @@ // If this is referring to the whole field, return the whole thing. if (BitStart == 0 && BitSize == ValueSizeInBits) { - return LValue(Builder.CreateBitCast(Ptr.Ptr, PointerType::getUnqual(ValTy)), + return LValue(Builder.CreateBitCast(Ptr.Ptr, ValTy->getPointerTo()), Ptr.getAlignment()); } - return LValue(Builder.CreateBitCast(Ptr.Ptr, PointerType::getUnqual(ValTy)), + return LValue(Builder.CreateBitCast(Ptr.Ptr, ValTy->getPointerTo()), 1, BitStart, BitSize); } @@ -5914,7 +5901,7 @@ StructAddrLV.BitStart == 0) && "structs cannot be bitfields!"); StructAddrLV.Ptr = Builder.CreateBitCast(StructAddrLV.Ptr, - PointerType::getUnqual(StructTy)); + StructTy->getPointerTo()); const Type *FieldTy = ConvertType(getDeclaredType(FieldDecl)); // BitStart - This is the actual offset of the field from the start of the @@ -5985,7 +5972,7 @@ Value *Ptr = Builder.CreatePtrToInt(StructAddrLV.Ptr, Offset->getType()); Ptr = Builder.CreateAdd(Ptr, Offset); - FieldPtr = Builder.CreateIntToPtr(Ptr, PointerType::getUnqual(FieldTy)); + FieldPtr = Builder.CreateIntToPtr(Ptr, FieldTy->getPointerTo()); } if (isBitfield(FieldDecl)) { @@ -6025,7 +6012,7 @@ // If this is a bitfield, the field may span multiple fields in the LLVM // type. As such, cast the pointer to be a pointer to the declared type. - FieldPtr = Builder.CreateBitCast(FieldPtr, PointerType::getUnqual(FieldTy)); + FieldPtr = Builder.CreateBitCast(FieldPtr, FieldTy->getPointerTo()); unsigned LLVMValueBitSize = FieldTy->getPrimitiveSizeInBits(); // Finally, because bitfields can span LLVM fields, and because the start @@ -6055,8 +6042,7 @@ Constant *Offset = ConstantInt::get(TD.getIntPtrType(Context), ByteOffset); FieldPtr = Builder.CreatePtrToInt(FieldPtr, Offset->getType()); FieldPtr = Builder.CreateAdd(FieldPtr, Offset); - FieldPtr = Builder.CreateIntToPtr(FieldPtr, - PointerType::getUnqual(FieldTy)); + FieldPtr = Builder.CreateIntToPtr(FieldPtr, FieldTy->getPointerTo()); // Adjust bitstart to account for the pointer movement. BitStart -= ByteOffset*8; @@ -6077,7 +6063,7 @@ } else { // Make sure we return a pointer to the right type. const Type *EltTy = ConvertType(TREE_TYPE(exp)); - FieldPtr = Builder.CreateBitCast(FieldPtr, PointerType::getUnqual(EltTy)); + FieldPtr = Builder.CreateBitCast(FieldPtr, EltTy->getPointerTo()); } assert(BitStart == 0 && @@ -6115,7 +6101,7 @@ if (Decl == 0) { if (errorcount || sorrycount) { const Type *Ty = ConvertType(TREE_TYPE(exp)); - const PointerType *PTy = PointerType::getUnqual(Ty); + const PointerType *PTy = Ty->getPointerTo(); LValue LV(ConstantPointerNull::get(PTy), 1); return LV; } @@ -6153,7 +6139,7 @@ // If we have "extern void foo", make the global have type {} instead of // type void. if (Ty->isVoidTy()) Ty = StructType::get(Context); - const PointerType *PTy = PointerType::getUnqual(Ty); + const PointerType *PTy = Ty->getPointerTo(); unsigned Alignment = Ty->isSized() ? TD.getABITypeAlignment(Ty) : 1; if (DECL_ALIGN(exp)) { if (DECL_USER_ALIGN(exp) || 8 * Alignment < (unsigned)DECL_ALIGN(exp)) @@ -6486,7 +6472,7 @@ TySize == 32 || TySize == 64) { LLVMTy = IntegerType::get(Context, TySize); Op = Builder.CreateLoad(Builder.CreateBitCast(LV.Ptr, - PointerType::getUnqual(LLVMTy))); + LLVMTy->getPointerTo())); } else { // Otherwise, emit our value as a lvalue and let the codegen deal with // it. @@ -7169,7 +7155,7 @@ Constant *Idx = Convert(TREE_OPERAND(exp, 1)); // The offset in bytes. // Convert the pointer into an i8* and add the offset to it. - Ptr = TheFolder->CreateBitCast(Ptr, Type::getInt8Ty(Context)->getPointerTo()); + Ptr = TheFolder->CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Constant *GEP = POINTER_TYPE_OVERFLOW_UNDEFINED ? TheFolder->CreateInBoundsGetElementPtr(Ptr, &Idx, 1) : TheFolder->CreateGetElementPtr(Ptr, &Idx, 1); @@ -7912,7 +7898,7 @@ BasicBlock *BB = TheTreeToLLVM->getLabelDeclBlock(exp); Constant *C = TheTreeToLLVM->getIndirectGotoBlockNumber(BB); return - TheFolder->CreateIntToPtr(C, PointerType::getUnqual(Type::getInt8Ty(Context))); + TheFolder->CreateIntToPtr(C, Type::getInt8PtrTy(Context)); } Constant *TreeConstantToLLVM::EmitLV_COMPLEX_CST(tree exp) { @@ -7998,7 +7984,7 @@ tree FieldDecl = TREE_OPERAND(exp, 1); StructAddrLV = TheFolder->CreateBitCast(StructAddrLV, - PointerType::getUnqual(StructTy)); + StructTy->getPointerTo()); const Type *FieldTy = ConvertType(getDeclaredType(FieldDecl)); // BitStart - This is the actual offset of the field from the start of the @@ -8040,14 +8026,12 @@ } Constant *Ptr = TheFolder->CreatePtrToInt(StructAddrLV, Offset->getType()); Ptr = TheFolder->CreateAdd(Ptr, Offset); - FieldPtr = TheFolder->CreateIntToPtr(Ptr, - PointerType::getUnqual(FieldTy)); + FieldPtr = TheFolder->CreateIntToPtr(Ptr, FieldTy->getPointerTo()); } // Make sure we return a result of the right type. - if (PointerType::getUnqual(FieldTy) != FieldPtr->getType()) - FieldPtr = TheFolder->CreateBitCast(FieldPtr, - PointerType::getUnqual(FieldTy)); + if (FieldTy->getPointerTo() != FieldPtr->getType()) + FieldPtr = TheFolder->CreateBitCast(FieldPtr, FieldTy->getPointerTo()); assert(BitStart == 0 && "It's a bitfield reference or we didn't get to the field!"); Modified: gcc-plugin/trunk/llvm-types.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-types.cpp?rev=83402&r1=83401&r2=83402&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-types.cpp (original) +++ gcc-plugin/trunk/llvm-types.cpp Tue Oct 6 14:10:58 2009 @@ -782,7 +782,7 @@ if (Ty->isVoidTy()) Ty = Type::getInt8Ty(Context); // void* -> sbyte* - return TypeDB.setType(type, PointerType::getUnqual(Ty)); + return TypeDB.setType(type, Ty->getPointerTo()); } case METHOD_TYPE: @@ -955,7 +955,7 @@ /// argument is passed by value. It is lowered to a parameter passed by /// reference with an additional parameter attribute "ByVal". void HandleByValArgument(const llvm::Type *LLVMTy, tree type) { - HandleScalarArgument(PointerType::getUnqual(LLVMTy), type); + HandleScalarArgument(LLVMTy->getPointerTo(), type); } /// HandleFCAArgument - This callback is invoked if the aggregate function Modified: gcc-plugin/trunk/x86/llvm-target.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/x86/llvm-target.cpp?rev=83402&r1=83401&r2=83402&view=diff ============================================================================== --- gcc-plugin/trunk/x86/llvm-target.cpp (original) +++ gcc-plugin/trunk/x86/llvm-target.cpp Tue Oct 6 14:10:58 2009 @@ -292,7 +292,7 @@ return true; } case IX86_BUILTIN_LOADQ: { - PointerType *i64Ptr = PointerType::getUnqual(Type::getInt64Ty(Context)); + const PointerType *i64Ptr = Type::getInt64PtrTy(Context); Ops[0] = Builder.CreateBitCast(Ops[0], i64Ptr, "tmp"); Ops[0] = Builder.CreateLoad(Ops[0], "tmp"); Value *Zero = ConstantInt::get(Type::getInt64Ty(Context), 0); @@ -304,7 +304,7 @@ } case IX86_BUILTIN_LOADUPS: { VectorType *v4f32 = VectorType::get(Type::getFloatTy(Context), 4); - PointerType *v4f32Ptr = PointerType::getUnqual(v4f32); + const PointerType *v4f32Ptr = v4f32->getPointerTo(); Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr, "tmp"); LoadInst *LI = Builder.CreateLoad(BC, "tmp"); LI->setAlignment(1); @@ -313,7 +313,7 @@ } case IX86_BUILTIN_LOADUPD: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); - PointerType *v2f64Ptr = PointerType::getUnqual(v2f64); + const PointerType *v2f64Ptr = v2f64->getPointerTo(); Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr, "tmp"); LoadInst *LI = Builder.CreateLoad(BC, "tmp"); LI->setAlignment(1); @@ -322,7 +322,7 @@ } case IX86_BUILTIN_LOADDQU: { VectorType *v16i8 = VectorType::get(Type::getInt8Ty(Context), 16); - PointerType *v16i8Ptr = PointerType::getUnqual(v16i8); + const PointerType *v16i8Ptr = v16i8->getPointerTo(); Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr, "tmp"); LoadInst *LI = Builder.CreateLoad(BC, "tmp"); LI->setAlignment(1); @@ -331,7 +331,7 @@ } case IX86_BUILTIN_STOREUPS: { VectorType *v4f32 = VectorType::get(Type::getFloatTy(Context), 4); - PointerType *v4f32Ptr = PointerType::getUnqual(v4f32); + const PointerType *v4f32Ptr = v4f32->getPointerTo(); Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr, "tmp"); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); @@ -340,7 +340,7 @@ } case IX86_BUILTIN_STOREUPD: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); - PointerType *v2f64Ptr = PointerType::getUnqual(v2f64); + const PointerType *v2f64Ptr = v2f64->getPointerTo(); Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr, "tmp"); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); @@ -349,7 +349,7 @@ } case IX86_BUILTIN_STOREDQU: { VectorType *v16i8 = VectorType::get(Type::getInt8Ty(Context), 16); - PointerType *v16i8Ptr = PointerType::getUnqual(v16i8); + const PointerType *v16i8Ptr = v16i8->getPointerTo(); Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr, "tmp"); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); @@ -357,7 +357,7 @@ return true; } case IX86_BUILTIN_LOADHPS: { - PointerType *f64Ptr = PointerType::getUnqual(Type::getDoubleTy(Context)); + const PointerType *f64Ptr = Type::getDoublePtrTy(Context); Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr, "tmp"); Value *Load = Builder.CreateLoad(Ops[1], "tmp"); Ops[1] = BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); @@ -367,7 +367,7 @@ return true; } case IX86_BUILTIN_LOADLPS: { - PointerType *f64Ptr = PointerType::getUnqual(Type::getDoubleTy(Context)); + const PointerType *f64Ptr = Type::getDoublePtrTy(Context); Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr, "tmp"); Value *Load = Builder.CreateLoad(Ops[1], "tmp"); Ops[1] = BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); @@ -394,7 +394,7 @@ } case IX86_BUILTIN_STOREHPS: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); - PointerType *f64Ptr = PointerType::getUnqual(Type::getDoubleTy(Context)); + const PointerType *f64Ptr = Type::getDoublePtrTy(Context); Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr, "tmp"); Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 1); Ops[1] = Builder.CreateBitCast(Ops[1], v2f64, "tmp"); @@ -404,7 +404,7 @@ } case IX86_BUILTIN_STORELPS: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); - PointerType *f64Ptr = PointerType::getUnqual(Type::getDoubleTy(Context)); + const PointerType *f64Ptr = Type::getDoublePtrTy(Context); Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr, "tmp"); Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 0); Ops[1] = Builder.CreateBitCast(Ops[1], v2f64, "tmp"); @@ -605,8 +605,7 @@ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_ldmxcsr); Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); Builder.CreateStore(Ops[0], Ptr); - Ptr = Builder.CreateBitCast(Ptr, - PointerType::getUnqual(Type::getInt8Ty(Context)), "tmp"); + Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context), "tmp"); Result = Builder.CreateCall(ldmxcsr, Ptr); return true; } @@ -614,8 +613,7 @@ Function *stmxcsr = Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_stmxcsr); Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); - Value *BPtr = Builder.CreateBitCast(Ptr, - PointerType::getUnqual(Type::getInt8Ty(Context)), "tmp"); + Value *BPtr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context), "tmp"); Builder.CreateCall(stmxcsr, BPtr); Result = Builder.CreateLoad(Ptr, "tmp"); From baldrick at free.fr Tue Oct 6 14:39:28 2009 From: baldrick at free.fr (Duncan Sands) Date: Tue, 06 Oct 2009 19:39:28 -0000 Subject: [llvm-commits] [gcc-plugin] r83403 - /gcc-plugin/trunk/x86/llvm-target.cpp Message-ID: <200910061939.n96JdTON026134@zion.cs.uiuc.edu> Author: baldrick Date: Tue Oct 6 14:39:28 2009 New Revision: 83403 URL: http://llvm.org/viewvc/llvm-project?rev=83403&view=rev Log: Remove useless value names ("tmp") - this are not helpful, and they fatten the bitcode. Modified: gcc-plugin/trunk/x86/llvm-target.cpp Modified: gcc-plugin/trunk/x86/llvm-target.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/x86/llvm-target.cpp?rev=83403&r1=83402&r2=83403&view=diff ============================================================================== --- gcc-plugin/trunk/x86/llvm-target.cpp (original) +++ gcc-plugin/trunk/x86/llvm-target.cpp Tue Oct 6 14:39:28 2009 @@ -75,7 +75,7 @@ case IX86_BUILTIN_PADDW128: case IX86_BUILTIN_PADDD128: case IX86_BUILTIN_PADDQ128: - Result = Builder.CreateAdd(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateAdd(Ops[0], Ops[1]); return true; case IX86_BUILTIN_SUBPS: case IX86_BUILTIN_SUBPD: @@ -87,34 +87,34 @@ case IX86_BUILTIN_PSUBW128: case IX86_BUILTIN_PSUBD128: case IX86_BUILTIN_PSUBQ128: - Result = Builder.CreateSub(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateSub(Ops[0], Ops[1]); return true; case IX86_BUILTIN_MULPS: case IX86_BUILTIN_MULPD: case IX86_BUILTIN_PMULLW: case IX86_BUILTIN_PMULLW128: - Result = Builder.CreateMul(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateMul(Ops[0], Ops[1]); return true; case IX86_BUILTIN_DIVPS: case IX86_BUILTIN_DIVPD: - Result = Builder.CreateFDiv(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateFDiv(Ops[0], Ops[1]); return true; case IX86_BUILTIN_PAND: case IX86_BUILTIN_PAND128: - Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateAnd(Ops[0], Ops[1]); return true; case IX86_BUILTIN_PANDN: case IX86_BUILTIN_PANDN128: - Ops[0] = Builder.CreateNot(Ops[0], "tmp"); - Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp"); + Ops[0] = Builder.CreateNot(Ops[0]); + Result = Builder.CreateAnd(Ops[0], Ops[1]); return true; case IX86_BUILTIN_POR: case IX86_BUILTIN_POR128: - Result = Builder.CreateOr(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateOr(Ops[0], Ops[1]); return true; case IX86_BUILTIN_PXOR: case IX86_BUILTIN_PXOR128: - Result = Builder.CreateXor(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateXor(Ops[0], Ops[1]); return true; case IX86_BUILTIN_ANDPS: case IX86_BUILTIN_ORPS: @@ -133,27 +133,27 @@ VectorType::get(Type::getInt64Ty(Context), 2), "tmp"); - Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); switch (FnCode) { case IX86_BUILTIN_ANDPS: case IX86_BUILTIN_ANDPD: - Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateAnd(Ops[0], Ops[1]); break; case IX86_BUILTIN_ORPS: case IX86_BUILTIN_ORPD: - Result = Builder.CreateOr (Ops[0], Ops[1], "tmp"); + Result = Builder.CreateOr (Ops[0], Ops[1]); break; case IX86_BUILTIN_XORPS: case IX86_BUILTIN_XORPD: - Result = Builder.CreateXor(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateXor(Ops[0], Ops[1]); break; case IX86_BUILTIN_ANDNPS: case IX86_BUILTIN_ANDNPD: - Ops[0] = Builder.CreateNot(Ops[0], "tmp"); - Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp"); + Ops[0] = Builder.CreateNot(Ops[0]); + Result = Builder.CreateAnd(Ops[0], Ops[1]); break; } - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateBitCast(Result, ResultType); return true; case IX86_BUILTIN_SHUFPS: if (ConstantInt *Elt = dyn_cast(Ops[2])) { @@ -293,20 +293,20 @@ } case IX86_BUILTIN_LOADQ: { const PointerType *i64Ptr = Type::getInt64PtrTy(Context); - Ops[0] = Builder.CreateBitCast(Ops[0], i64Ptr, "tmp"); - Ops[0] = Builder.CreateLoad(Ops[0], "tmp"); + Ops[0] = Builder.CreateBitCast(Ops[0], i64Ptr); + Ops[0] = Builder.CreateLoad(Ops[0]); Value *Zero = ConstantInt::get(Type::getInt64Ty(Context), 0); Result = BuildVector(Zero, Zero, NULL); Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 0); - Result = Builder.CreateInsertElement(Result, Ops[0], Idx, "tmp"); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateInsertElement(Result, Ops[0], Idx); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_LOADUPS: { VectorType *v4f32 = VectorType::get(Type::getFloatTy(Context), 4); const PointerType *v4f32Ptr = v4f32->getPointerTo(); - Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr, "tmp"); - LoadInst *LI = Builder.CreateLoad(BC, "tmp"); + Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr); + LoadInst *LI = Builder.CreateLoad(BC); LI->setAlignment(1); Result = LI; return true; @@ -314,8 +314,8 @@ case IX86_BUILTIN_LOADUPD: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); const PointerType *v2f64Ptr = v2f64->getPointerTo(); - Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr, "tmp"); - LoadInst *LI = Builder.CreateLoad(BC, "tmp"); + Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr); + LoadInst *LI = Builder.CreateLoad(BC); LI->setAlignment(1); Result = LI; return true; @@ -323,8 +323,8 @@ case IX86_BUILTIN_LOADDQU: { VectorType *v16i8 = VectorType::get(Type::getInt8Ty(Context), 16); const PointerType *v16i8Ptr = v16i8->getPointerTo(); - Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr, "tmp"); - LoadInst *LI = Builder.CreateLoad(BC, "tmp"); + Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr); + LoadInst *LI = Builder.CreateLoad(BC); LI->setAlignment(1); Result = LI; return true; @@ -332,7 +332,7 @@ case IX86_BUILTIN_STOREUPS: { VectorType *v4f32 = VectorType::get(Type::getFloatTy(Context), 4); const PointerType *v4f32Ptr = v4f32->getPointerTo(); - Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr, "tmp"); + Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); Result = SI; @@ -341,7 +341,7 @@ case IX86_BUILTIN_STOREUPD: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); const PointerType *v2f64Ptr = v2f64->getPointerTo(); - Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr, "tmp"); + Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); Result = SI; @@ -350,7 +350,7 @@ case IX86_BUILTIN_STOREDQU: { VectorType *v16i8 = VectorType::get(Type::getInt8Ty(Context), 16); const PointerType *v16i8Ptr = v16i8->getPointerTo(); - Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr, "tmp"); + Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); Result = SI; @@ -358,57 +358,57 @@ } case IX86_BUILTIN_LOADHPS: { const PointerType *f64Ptr = Type::getDoublePtrTy(Context); - Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr, "tmp"); - Value *Load = Builder.CreateLoad(Ops[1], "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr); + Value *Load = Builder.CreateLoad(Ops[1]); Ops[1] = BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType, "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 1, 4, 5); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_LOADLPS: { const PointerType *f64Ptr = Type::getDoublePtrTy(Context); - Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr, "tmp"); - Value *Load = Builder.CreateLoad(Ops[1], "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], f64Ptr); + Value *Load = Builder.CreateLoad(Ops[1]); Ops[1] = BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType, "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); Result = BuildVectorShuffle(Ops[0], Ops[1], 4, 5, 2, 3); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_LOADHPD: { - Value *Load = Builder.CreateLoad(Ops[1], "tmp"); + Value *Load = Builder.CreateLoad(Ops[1]); Ops[1] = BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType, "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 2); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_LOADLPD: { - Value *Load = Builder.CreateLoad(Ops[1], "tmp"); + Value *Load = Builder.CreateLoad(Ops[1]); Ops[1] = BuildVector(Load, UndefValue::get(Type::getDoubleTy(Context)), NULL); - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType, "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], ResultType); Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 1); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_STOREHPS: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); const PointerType *f64Ptr = Type::getDoublePtrTy(Context); - Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr, "tmp"); + Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr); Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 1); - Ops[1] = Builder.CreateBitCast(Ops[1], v2f64, "tmp"); - Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], v2f64); + Ops[1] = Builder.CreateExtractElement(Ops[1], Idx); Result = Builder.CreateStore(Ops[1], Ops[0]); return true; } case IX86_BUILTIN_STORELPS: { VectorType *v2f64 = VectorType::get(Type::getDoubleTy(Context), 2); const PointerType *f64Ptr = Type::getDoublePtrTy(Context); - Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr, "tmp"); + Ops[0] = Builder.CreateBitCast(Ops[0], f64Ptr); Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 0); - Ops[1] = Builder.CreateBitCast(Ops[1], v2f64, "tmp"); - Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "tmp"); + Ops[1] = Builder.CreateBitCast(Ops[1], v2f64); + Ops[1] = Builder.CreateExtractElement(Ops[1], Idx); Result = Builder.CreateStore(Ops[1], Ops[0]); return true; } @@ -424,13 +424,13 @@ case IX86_BUILTIN_VEC_INIT_V4HI: // Sometimes G++ promotes arguments to int. for (unsigned i = 0; i != 4; ++i) - Ops[i] = Builder.CreateIntCast(Ops[i], Type::getInt16Ty(Context), false, "tmp"); + Ops[i] = Builder.CreateIntCast(Ops[i], Type::getInt16Ty(Context), false); Result = BuildVector(Ops[0], Ops[1], Ops[2], Ops[3], NULL); return true; case IX86_BUILTIN_VEC_INIT_V8QI: // Sometimes G++ promotes arguments to int. for (unsigned i = 0; i != 8; ++i) - Ops[i] = Builder.CreateIntCast(Ops[i], Type::getInt8Ty(Context), false, "tmp"); + Ops[i] = Builder.CreateIntCast(Ops[i], Type::getInt8Ty(Context), false); Result = BuildVector(Ops[0], Ops[1], Ops[2], Ops[3], Ops[4], Ops[5], Ops[6], Ops[7], NULL); return true; @@ -442,24 +442,24 @@ case IX86_BUILTIN_VEC_EXT_V4SF: case IX86_BUILTIN_VEC_EXT_V8HI: case IX86_BUILTIN_VEC_EXT_V16QI: - Result = Builder.CreateExtractElement(Ops[0], Ops[1], "tmp"); + Result = Builder.CreateExtractElement(Ops[0], Ops[1]); return true; case IX86_BUILTIN_VEC_SET_V16QI: // Sometimes G++ promotes arguments to int. - Ops[1] = Builder.CreateIntCast(Ops[1], Type::getInt8Ty(Context), false, "tmp"); - Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2], "tmp"); + Ops[1] = Builder.CreateIntCast(Ops[1], Type::getInt8Ty(Context), false); + Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); return true; case IX86_BUILTIN_VEC_SET_V4HI: case IX86_BUILTIN_VEC_SET_V8HI: // GCC sometimes doesn't produce the right element type. - Ops[1] = Builder.CreateIntCast(Ops[1], Type::getInt16Ty(Context), false, "tmp"); - Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2], "tmp"); + Ops[1] = Builder.CreateIntCast(Ops[1], Type::getInt16Ty(Context), false); + Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); return true; case IX86_BUILTIN_VEC_SET_V4SI: - Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2], "tmp"); + Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); return true; case IX86_BUILTIN_VEC_SET_V2DI: - Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2], "tmp"); + Result = Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); return true; case IX86_BUILTIN_CMPEQPS: case IX86_BUILTIN_CMPLTPS: @@ -497,8 +497,8 @@ Value *Arg1 = Ops[1]; if (flip) std::swap(Arg0, Arg1); Value *CallOps[3] = { Arg0, Arg1, Pred }; - Result = Builder.CreateCall(cmpps, CallOps, CallOps+3, "tmp"); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateCall(cmpps, CallOps, CallOps+3); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_CMPEQSS: @@ -527,8 +527,8 @@ } Value *Pred = ConstantInt::get(Type::getInt8Ty(Context), PredCode); Value *CallOps[3] = { Ops[0], Ops[1], Pred }; - Result = Builder.CreateCall(cmpss, CallOps, CallOps+3, "tmp"); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateCall(cmpss, CallOps, CallOps+3); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_CMPEQPD: @@ -568,8 +568,8 @@ if (flip) std::swap(Arg0, Arg1); Value *CallOps[3] = { Arg0, Arg1, Pred }; - Result = Builder.CreateCall(cmppd, CallOps, CallOps+3, "tmp"); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateCall(cmppd, CallOps, CallOps+3); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_CMPEQSD: @@ -596,8 +596,8 @@ } Value *Pred = ConstantInt::get(Type::getInt8Ty(Context), PredCode); Value *CallOps[3] = { Ops[0], Ops[1], Pred }; - Result = Builder.CreateCall(cmpsd, CallOps, CallOps+3, "tmp"); - Result = Builder.CreateBitCast(Result, ResultType, "tmp"); + Result = Builder.CreateCall(cmpsd, CallOps, CallOps+3); + Result = Builder.CreateBitCast(Result, ResultType); return true; } case IX86_BUILTIN_LDMXCSR: { @@ -605,7 +605,7 @@ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_ldmxcsr); Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); Builder.CreateStore(Ops[0], Ptr); - Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context), "tmp"); + Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Result = Builder.CreateCall(ldmxcsr, Ptr); return true; } @@ -613,10 +613,10 @@ Function *stmxcsr = Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_stmxcsr); Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); - Value *BPtr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context), "tmp"); + Value *BPtr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Builder.CreateCall(stmxcsr, BPtr); - Result = Builder.CreateLoad(Ptr, "tmp"); + Result = Builder.CreateLoad(Ptr); return true; } } From kremenek at apple.com Tue Oct 6 14:45:38 2009 From: kremenek at apple.com (Ted Kremenek) Date: Tue, 06 Oct 2009 19:45:38 -0000 Subject: [llvm-commits] [llvm] r83404 - /llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt Message-ID: <200910061945.n96JjcUC026957@zion.cs.uiuc.edu> Author: kremenek Date: Tue Oct 6 14:45:38 2009 New Revision: 83404 URL: http://llvm.org/viewvc/llvm-project?rev=83404&view=rev Log: Update CMake file. Modified: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt Modified: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt?rev=83404&r1=83403&r2=83404&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt (original) +++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt Tue Oct 6 14:45:38 2009 @@ -19,7 +19,6 @@ LoopUnroll.cpp LoopUnswitch.cpp MemCpyOptimizer.cpp - PredicateSimplifier.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp From nicolas.geoffray at lip6.fr Tue Oct 6 14:55:54 2009 From: nicolas.geoffray at lip6.fr (Nicolas Geoffray) Date: Tue, 06 Oct 2009 19:55:54 -0000 Subject: [llvm-commits] [llvm] r83405 - /llvm/trunk/lib/Support/CommandLine.cpp Message-ID: <200910061955.n96Jts80028349@zion.cs.uiuc.edu> Author: geoffray Date: Tue Oct 6 14:55:53 2009 New Revision: 83405 URL: http://llvm.org/viewvc/llvm-project?rev=83405&view=rev Log: Bugfix for the CommaSeparated option. The original code was adding the whole string at the end of the list, instead of the last comma-separated string. Modified: llvm/trunk/lib/Support/CommandLine.cpp Modified: llvm/trunk/lib/Support/CommandLine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CommandLine.cpp?rev=83405&r1=83404&r2=83405&view=diff ============================================================================== --- llvm/trunk/lib/Support/CommandLine.cpp (original) +++ llvm/trunk/lib/Support/CommandLine.cpp Tue Oct 6 14:55:53 2009 @@ -643,6 +643,7 @@ // Check for another comma. Pos = Val.find(','); } + Value = Val; } // If this is a named positional argument, just remember that it is the From bob.wilson at apple.com Tue Oct 6 15:18:46 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 06 Oct 2009 20:18:46 -0000 Subject: [llvm-commits] [llvm] r83407 - /llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Message-ID: <200910062018.n96KIkxC031524@zion.cs.uiuc.edu> Author: bwilson Date: Tue Oct 6 15:18:46 2009 New Revision: 83407 URL: http://llvm.org/viewvc/llvm-project?rev=83407&view=rev Log: Fix a comment typo. Patch by Johnny Chen. Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=83407&r1=83406&r2=83407&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Tue Oct 6 15:18:46 2009 @@ -384,7 +384,7 @@ } /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the -/// instruction modifies the CSPR register. +/// instruction modifies the CPSR register. let Defs = [CPSR] in { multiclass AI1_bin_s_irs opcod, string opc, PatFrag opnode, bit Commutable = 0> { From clattner at apple.com Tue Oct 6 15:21:45 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 13:21:45 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r83394 - /llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp In-Reply-To: <200910061751.n96Hp68d010927@zion.cs.uiuc.edu> References: <200910061751.n96Hp68d010927@zion.cs.uiuc.edu> Message-ID: <6FA43509-CEC6-404B-8246-183FF3D748DD@apple.com> Thanks Dan! On Oct 6, 2009, at 10:51 AM, Dan Gohman wrote: > Author: djg > Date: Tue Oct 6 12:51:06 2009 > New Revision: 83394 > > URL: http://llvm.org/viewvc/llvm-project?rev=83394&view=rev > Log: > The PredicateSimplifier pass was removed. > > Modified: > llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp > > Modified: llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp?rev=83394&r1=83393&r2=83394&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp (original) > +++ llvm-gcc-4.2/trunk/gcc/llvm-linker-hack.cpp Tue Oct 6 12:51:06 > 2009 > @@ -75,7 +75,6 @@ > llvm::createAggressiveDCEPass(); > llvm::createConstantMergePass(); > llvm::createIndVarSimplifyPass(); > - llvm::createPredicateSimplifierPass(); > llvm::createCondPropagationPass(); > llvm::createGlobalOptimizerPass(); > llvm::createJumpThreadingPass(); > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Tue Oct 6 15:22:32 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 13:22:32 -0700 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: <3c8293b60910061107o6866dd3bu31d62b71f12e9850@mail.gmail.com> References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> <4ACB8282.2010108@gmail.com> <3c8293b60910061107o6866dd3bu31d62b71f12e9850@mail.gmail.com> Message-ID: On Oct 6, 2009, at 11:07 AM, Collin Winter wrote: > Hi, > > 2009/10/6 T?r?k Edwin : >> On 2009-10-06 20:35, Jeffrey Yasskin wrote: >>> On Tue, Oct 6, 2009 at 10:25 AM, Jeffrey Yasskin >> > wrote: >>>> void Triple::setArchName(const StringRef &Str) { >>>> - setTriple(Str + "-" + getVendorName() + "-" + >>>> getOSAndEnvironmentName()); >>>> + // Work around a miscompilation bug in gcc 4.0.3. >>>> >> >> Who uses 4.0.3 still? That a very buggy release. > > It's the compiler that shipped with Ubuntu Dapper, which some of our > project's buildbot slaves are running on. The problem may impact other > versions of gcc, but since 4.0.3 isn't listed on the "known-bad gcc > versions", we went ahead and tracked down the problematic code > snippet. IMO, if compiler bugs are easy to work-around, it is good to do so. That increases the audience that can use llvm without installing other random compilers to make it work. -Chris From clattner at apple.com Tue Oct 6 15:23:23 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 13:23:23 -0700 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> Message-ID: On Oct 6, 2009, at 10:25 AM, Jeffrey Yasskin wrote: > Author: jyasskin > Date: Tue Oct 6 12:25:50 2009 > New Revision: 83391 > > URL: http://llvm.org/viewvc/llvm-project?rev=83391&view=rev > Log: > Fix PR5112, a miscompilation on gcc-4.0.3. Patch by Collin Winter! > > Modified: > llvm/trunk/lib/Support/Triple.cpp > > Modified: llvm/trunk/lib/Support/Triple.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83391&r1=83390&r2=83391&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Support/Triple.cpp (original) > +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct 6 12:25:50 2009 > @@ -390,7 +390,10 @@ > } > > void Triple::setArchName(const StringRef &Str) { > - setTriple(Str + "-" + getVendorName() + "-" + > getOSAndEnvironmentName()); > + // Work around a miscompilation bug in gcc 4.0.3. > + Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); > + Twine b = Str + "-" + a; > + setTriple(b); > } Jeffrey, I don't think this patch is safe. Twines are constructed based on temporaries. I think taht the temporary returned by getVendorName() is destroyed at the ";" and the twine refers to the dangling pointer. CAn you just build up a temporary std::string instead? -Chris From clattner at apple.com Tue Oct 6 15:24:32 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 13:24:32 -0700 Subject: [llvm-commits] [llvm] r83392 - in /llvm/trunk: lib/CodeGen/AsmPrinter/ lib/Target/ARM/AsmPrinter/ lib/Target/Alpha/AsmPrinter/ lib/Target/Blackfin/AsmPrinter/ lib/Target/CellSPU/AsmPrinter/ lib/Target/MSP430/AsmPrinter/ lib/Target/Mips/AsmPrinter/ lib/Target/PIC16/AsmPrinter/ lib/Target/PowerPC/AsmPrinter/ lib/Target/Sparc/AsmPrinter/ lib/Target/SystemZ/AsmPrinter/ lib/Target/X86/AsmPrinter/ lib/Target/XCore/AsmPrinter/ test/CodeGen/X86/ In-Reply-To: <200910061738.n96Hcdj9009289@zion.cs.uiuc.edu> References: <200910061738.n96Hcdj9009289@zion.cs.uiuc.edu> Message-ID: On Oct 6, 2009, at 10:38 AM, Dan Gohman wrote: > Author: djg > Date: Tue Oct 6 12:38:38 2009 > New Revision: 83392 > > URL: http://llvm.org/viewvc/llvm-project?rev=83392&view=rev > Log: > Instead of printing unnecessary basic block labels as labels in > verbose-asm mode, print comments instead. This eliminates a non- > comment > difference between verbose-asm mode and non-verbose-asm mode. > > Also, factor out the relevant code out of all the targets and into > target-independent code. Very nice, thank you Dan! I definitely appreciate sinking common code down to AsmPrinter. -Chris From echristo at apple.com Tue Oct 6 15:50:41 2009 From: echristo at apple.com (Eric Christopher) Date: Tue, 6 Oct 2009 13:50:41 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r83332 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp In-Reply-To: References: <200910052225.n95MPX8L009144@zion.cs.uiuc.edu> Message-ID: <84953757-4583-427C-9A0C-80BE5A2AD6D9@apple.com> On Oct 6, 2009, at 11:15 AM, Bill Wendling wrote: >> Revert the BITS_PER_UNIT part of my last patch. llvm does alignment >> computation on bytes and would fail if BITS_PER_UNIT were anything >> other than 8. >> > It might be worthwhile to put a comment around these so that future > people won't make the same changes. Yeah, I thought about that, but the problem is that it's very pervasive and all over the place. -eric From eocallaghan at auroraux.org Tue Oct 6 16:01:17 2009 From: eocallaghan at auroraux.org (Edward O'Callaghan) Date: Tue, 06 Oct 2009 21:01:17 -0000 Subject: [llvm-commits] [compiler-rt] r83413 - in /compiler-rt/trunk: lib/sparc64/ www/index.html Message-ID: <200910062101.n96L1HgF005286@zion.cs.uiuc.edu> Author: evocallaghan Date: Tue Oct 6 16:01:17 2009 New Revision: 83413 URL: http://llvm.org/viewvc/llvm-project?rev=83413&view=rev Log: Update compiler-rt online docs to reflex recent work and testing. Added: compiler-rt/trunk/lib/sparc64/ Modified: compiler-rt/trunk/www/index.html Modified: compiler-rt/trunk/www/index.html URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/www/index.html?rev=83413&r1=83412&r2=83413&view=diff ============================================================================== --- compiler-rt/trunk/www/index.html (original) +++ compiler-rt/trunk/www/index.html Tue Oct 6 16:01:17 2009 @@ -65,6 +65,8 @@
  • i386
  • X86-64
  • +
  • SPARC64
  • +
  • ARM
  • PowerPC
  • PowerPC 64
@@ -83,6 +85,10 @@ All + NetBSD + All + + Linux All From jyasskin at google.com Tue Oct 6 16:01:47 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 6 Oct 2009 14:01:47 -0700 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> Message-ID: On Tue, Oct 6, 2009 at 1:23 PM, Chris Lattner wrote: > > On Oct 6, 2009, at 10:25 AM, Jeffrey Yasskin wrote: > >> Author: jyasskin >> Date: Tue Oct ?6 12:25:50 2009 >> New Revision: 83391 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83391&view=rev >> Log: >> Fix PR5112, a miscompilation on gcc-4.0.3. ?Patch by Collin Winter! >> >> Modified: >> ? llvm/trunk/lib/Support/Triple.cpp >> >> Modified: llvm/trunk/lib/Support/Triple.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83391&r1=83390&r2=83391&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> ====================================================================== >> --- llvm/trunk/lib/Support/Triple.cpp (original) >> +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct ?6 12:25:50 2009 >> @@ -390,7 +390,10 @@ >> } >> >> void Triple::setArchName(const StringRef &Str) { >> - ?setTriple(Str + "-" + getVendorName() + "-" + >> getOSAndEnvironmentName()); >> + ?// Work around a miscompilation bug in gcc 4.0.3. >> + ?Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); >> + ?Twine b = Str + "-" + a; >> + ?setTriple(b); >> } > > Jeffrey, I don't think this patch is safe. ?Twines are constructed based on > temporaries. ?I think taht the temporary returned by getVendorName() is > destroyed at the ";" and the twine refers to the dangling pointer. ?CAn you > just build up a temporary std::string instead? Oops. SmallString<64> it is. Collin, could you check that this still works around the gcc-4.0.3 bug? -------------- next part -------------- A non-text attachment was scrubbed... Name: fix_triple_again.patch Type: application/octet-stream Size: 876 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091006/b4802aff/attachment.obj From eocallaghan at auroraux.org Tue Oct 6 16:03:44 2009 From: eocallaghan at auroraux.org (Edward O'Callaghan) Date: Tue, 06 Oct 2009 21:03:44 -0000 Subject: [llvm-commits] [compiler-rt] r83414 - /compiler-rt/trunk/www/index.html Message-ID: <200910062103.n96L3iQk005606@zion.cs.uiuc.edu> Author: evocallaghan Date: Tue Oct 6 16:03:43 2009 New Revision: 83414 URL: http://llvm.org/viewvc/llvm-project?rev=83414&view=rev Log: Improve doc readability. Modified: compiler-rt/trunk/www/index.html Modified: compiler-rt/trunk/www/index.html URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/www/index.html?rev=83414&r1=83413&r2=83414&view=diff ============================================================================== --- compiler-rt/trunk/www/index.html (original) +++ compiler-rt/trunk/www/index.html Tue Oct 6 16:03:43 2009 @@ -111,13 +111,7 @@
  • lib/ Is a generic portable implementations.
  • -
  • lib/(arch) has optimized version for the following supported architectures: -
      -
    • i386
    • -
    • X86-64
    • -
    • PowerPC
    • -
    • PowerPC 64
    • -
  • +
  • lib/(arch) has optimized version for the supported architectures.
From clattner at apple.com Tue Oct 6 16:05:31 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 6 Oct 2009 14:05:31 -0700 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> Message-ID: >> Jeffrey, I don't think this patch is safe. Twines are constructed >> based on >> temporaries. I think taht the temporary returned by >> getVendorName() is >> destroyed at the ";" and the twine refers to the dangling pointer. >> CAn you >> just build up a temporary std::string instead? > > Oops. SmallString<64> it is. Collin, could you check that this still > works around the gcc-4.0.3 bug? > FWIW, this patch looks great to me (for real this time ;-) -Chris From bob.wilson at apple.com Tue Oct 6 16:16:20 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 06 Oct 2009 21:16:20 -0000 Subject: [llvm-commits] [llvm] r83415 - in /llvm/trunk/test/CodeGen/ARM: vld2.ll vld3.ll vld4.ll vldlane.ll vtbl.ll vtrn.ll vuzp.ll vzip.ll Message-ID: <200910062116.n96LGKX2007398@zion.cs.uiuc.edu> Author: bwilson Date: Tue Oct 6 16:16:19 2009 New Revision: 83415 URL: http://llvm.org/viewvc/llvm-project?rev=83415&view=rev Log: Update NEON struct names to match llvm-gcc changes. (This is not required for correctness but might help with sanity.) Modified: llvm/trunk/test/CodeGen/ARM/vld2.ll llvm/trunk/test/CodeGen/ARM/vld3.ll llvm/trunk/test/CodeGen/ARM/vld4.ll llvm/trunk/test/CodeGen/ARM/vldlane.ll llvm/trunk/test/CodeGen/ARM/vtbl.ll llvm/trunk/test/CodeGen/ARM/vtrn.ll llvm/trunk/test/CodeGen/ARM/vuzp.ll llvm/trunk/test/CodeGen/ARM/vzip.ll Modified: llvm/trunk/test/CodeGen/ARM/vld2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld2.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld2.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld2.ll Tue Oct 6 16:16:19 2009 @@ -1,16 +1,16 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> } +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } define <8 x i8> @vld2i8(i8* %A) nounwind { ;CHECK: vld2i8: ;CHECK: vld2.8 - %tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 1 + %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 %tmp4 = add <8 x i8> %tmp2, %tmp3 ret <8 x i8> %tmp4 } @@ -18,9 +18,9 @@ define <4 x i16> @vld2i16(i16* %A) nounwind { ;CHECK: vld2i16: ;CHECK: vld2.16 - %tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i16* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 1 + %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1 %tmp4 = add <4 x i16> %tmp2, %tmp3 ret <4 x i16> %tmp4 } @@ -28,9 +28,9 @@ define <2 x i32> @vld2i32(i32* %A) nounwind { ;CHECK: vld2i32: ;CHECK: vld2.32 - %tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i32* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 1 + %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1 %tmp4 = add <2 x i32> %tmp2, %tmp3 ret <2 x i32> %tmp4 } @@ -38,14 +38,14 @@ define <2 x float> @vld2f(float* %A) nounwind { ;CHECK: vld2f: ;CHECK: vld2.32 - %tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(float* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 1 + %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1 %tmp4 = add <2 x float> %tmp2, %tmp3 ret <2 x float> %tmp4 } -declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly -declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly -declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly -declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly Modified: llvm/trunk/test/CodeGen/ARM/vld3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld3.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld3.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld3.ll Tue Oct 6 16:16:19 2009 @@ -1,16 +1,16 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi3 = type { <8 x i8>, <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi3 = type { <4 x i16>, <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si3 = type { <2 x i32>, <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf3 = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } define <8 x i8> @vld3i8(i8* %A) nounwind { ;CHECK: vld3i8: ;CHECK: vld3.8 - %tmp1 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 2 + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 ret <8 x i8> %tmp4 } @@ -18,9 +18,9 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;CHECK: vld3i16: ;CHECK: vld3.16 - %tmp1 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i16* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 2 + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 ret <4 x i16> %tmp4 } @@ -28,9 +28,9 @@ define <2 x i32> @vld3i32(i32* %A) nounwind { ;CHECK: vld3i32: ;CHECK: vld3.32 - %tmp1 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i32* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 2 + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 ret <2 x i32> %tmp4 } @@ -38,14 +38,14 @@ define <2 x float> @vld3f(float* %A) nounwind { ;CHECK: vld3f: ;CHECK: vld3.32 - %tmp1 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(float* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 2 + %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2 %tmp4 = add <2 x float> %tmp2, %tmp3 ret <2 x float> %tmp4 } -declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly -declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly -declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly -declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly Modified: llvm/trunk/test/CodeGen/ARM/vld4.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld4.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld4.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld4.ll Tue Oct 6 16:16:19 2009 @@ -1,16 +1,16 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi4 = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi4 = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si4 = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf4 = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } define <8 x i8> @vld4i8(i8* %A) nounwind { ;CHECK: vld4i8: ;CHECK: vld4.8 - %tmp1 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 2 + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 ret <8 x i8> %tmp4 } @@ -18,9 +18,9 @@ define <4 x i16> @vld4i16(i16* %A) nounwind { ;CHECK: vld4i16: ;CHECK: vld4.16 - %tmp1 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i16* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 2 + %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 ret <4 x i16> %tmp4 } @@ -28,9 +28,9 @@ define <2 x i32> @vld4i32(i32* %A) nounwind { ;CHECK: vld4i32: ;CHECK: vld4.32 - %tmp1 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i32* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 2 + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 ret <2 x i32> %tmp4 } @@ -38,14 +38,14 @@ define <2 x float> @vld4f(float* %A) nounwind { ;CHECK: vld4f: ;CHECK: vld4.32 - %tmp1 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(float* %A) - %tmp2 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 0 - %tmp3 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 2 + %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2 %tmp4 = add <2 x float> %tmp2, %tmp3 ret <2 x float> %tmp4 } -declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly -declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly -declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly -declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly Modified: llvm/trunk/test/CodeGen/ARM/vldlane.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vldlane.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vldlane.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vldlane.ll Tue Oct 6 16:16:19 2009 @@ -1,17 +1,17 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> } +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld2lanei8: ;CHECK: vld2.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1 + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } @@ -20,9 +20,9 @@ ;CHECK: vld2lanei16: ;CHECK: vld2.16 %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v4hi2 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp2, 1 + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } @@ -31,9 +31,9 @@ ;CHECK: vld2lanei32: ;CHECK: vld2.32 %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v2si2 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp2, 1 + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 } @@ -42,31 +42,31 @@ ;CHECK: vld2lanef: ;CHECK: vld2.32 %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v2sf2 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp2, 1 + %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1 %tmp5 = add <2 x float> %tmp3, %tmp4 ret <2 x float> %tmp5 } -declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly - -%struct.__builtin_neon_v8qi3 = type { <8 x i8>, <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi3 = type { <4 x i16>, <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si3 = type { <2 x i32>, <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf3 = type { <2 x float>, <2 x float>, <2 x float> } +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld3lanei8: ;CHECK: vld3.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2 + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 %tmp6 = add <8 x i8> %tmp3, %tmp4 %tmp7 = add <8 x i8> %tmp5, %tmp6 ret <8 x i8> %tmp7 @@ -76,10 +76,10 @@ ;CHECK: vld3lanei16: ;CHECK: vld3.16 %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v4hi3 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v4hi3 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v4hi3 %tmp2, 2 + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 %tmp6 = add <4 x i16> %tmp3, %tmp4 %tmp7 = add <4 x i16> %tmp5, %tmp6 ret <4 x i16> %tmp7 @@ -89,10 +89,10 @@ ;CHECK: vld3lanei32: ;CHECK: vld3.32 %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v2si3 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v2si3 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v2si3 %tmp2, 2 + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 %tmp6 = add <2 x i32> %tmp3, %tmp4 %tmp7 = add <2 x i32> %tmp5, %tmp6 ret <2 x i32> %tmp7 @@ -102,34 +102,34 @@ ;CHECK: vld3lanef: ;CHECK: vld3.32 %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v2sf3 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v2sf3 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v2sf3 %tmp2, 2 + %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2 %tmp6 = add <2 x float> %tmp3, %tmp4 %tmp7 = add <2 x float> %tmp5, %tmp6 ret <2 x float> %tmp7 } -declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly - -%struct.__builtin_neon_v8qi4 = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi4 = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si4 = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf4 = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld4lanei8: ;CHECK: vld4.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2 - %tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3 + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 %tmp7 = add <8 x i8> %tmp3, %tmp4 %tmp8 = add <8 x i8> %tmp5, %tmp6 %tmp9 = add <8 x i8> %tmp7, %tmp8 @@ -140,11 +140,11 @@ ;CHECK: vld4lanei16: ;CHECK: vld4.16 %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 2 - %tmp6 = extractvalue %struct.__builtin_neon_v4hi4 %tmp2, 3 + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3 %tmp7 = add <4 x i16> %tmp3, %tmp4 %tmp8 = add <4 x i16> %tmp5, %tmp6 %tmp9 = add <4 x i16> %tmp7, %tmp8 @@ -155,11 +155,11 @@ ;CHECK: vld4lanei32: ;CHECK: vld4.32 %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 2 - %tmp6 = extractvalue %struct.__builtin_neon_v2si4 %tmp2, 3 + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3 %tmp7 = add <2 x i32> %tmp3, %tmp4 %tmp8 = add <2 x i32> %tmp5, %tmp6 %tmp9 = add <2 x i32> %tmp7, %tmp8 @@ -170,18 +170,18 @@ ;CHECK: vld4lanef: ;CHECK: vld4.32 %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) - %tmp3 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 2 - %tmp6 = extractvalue %struct.__builtin_neon_v2sf4 %tmp2, 3 + %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3 %tmp7 = add <2 x float> %tmp3, %tmp4 %tmp8 = add <2 x float> %tmp5, %tmp6 %tmp9 = add <2 x float> %tmp7, %tmp8 ret <2 x float> %tmp9 } -declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly Modified: llvm/trunk/test/CodeGen/ARM/vtbl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtbl.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vtbl.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vtbl.ll Tue Oct 6 16:16:19 2009 @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v8qi3 = type { <8 x i8>, <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v8qi4 = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vtbl1: @@ -13,38 +13,38 @@ ret <8 x i8> %tmp3 } -define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__builtin_neon_v8qi2* %B) nounwind { +define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind { ;CHECK: vtbl2: ;CHECK: vtbl.8 %tmp1 = load <8 x i8>* %A - %tmp2 = load %struct.__builtin_neon_v8qi2* %B - %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1 + %tmp2 = load %struct.__neon_int8x8x2_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 %tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4) ret <8 x i8> %tmp5 } -define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__builtin_neon_v8qi3* %B) nounwind { +define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind { ;CHECK: vtbl3: ;CHECK: vtbl.8 %tmp1 = load <8 x i8>* %A - %tmp2 = load %struct.__builtin_neon_v8qi3* %B - %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2 + %tmp2 = load %struct.__neon_int8x8x3_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 %tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5) ret <8 x i8> %tmp6 } -define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__builtin_neon_v8qi4* %B) nounwind { +define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind { ;CHECK: vtbl4: ;CHECK: vtbl.8 %tmp1 = load <8 x i8>* %A - %tmp2 = load %struct.__builtin_neon_v8qi4* %B - %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2 - %tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3 + %tmp2 = load %struct.__neon_int8x8x4_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 %tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6) ret <8 x i8> %tmp7 } @@ -59,40 +59,40 @@ ret <8 x i8> %tmp4 } -define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__builtin_neon_v8qi2* %B, <8 x i8>* %C) nounwind { +define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind { ;CHECK: vtbx2: ;CHECK: vtbx.8 %tmp1 = load <8 x i8>* %A - %tmp2 = load %struct.__builtin_neon_v8qi2* %B - %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1 + %tmp2 = load %struct.__neon_int8x8x2_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 %tmp5 = load <8 x i8>* %C %tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5) ret <8 x i8> %tmp6 } -define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__builtin_neon_v8qi3* %B, <8 x i8>* %C) nounwind { +define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind { ;CHECK: vtbx3: ;CHECK: vtbx.8 %tmp1 = load <8 x i8>* %A - %tmp2 = load %struct.__builtin_neon_v8qi3* %B - %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2 + %tmp2 = load %struct.__neon_int8x8x3_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 %tmp6 = load <8 x i8>* %C %tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6) ret <8 x i8> %tmp7 } -define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__builtin_neon_v8qi4* %B, <8 x i8>* %C) nounwind { +define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind { ;CHECK: vtbx4: ;CHECK: vtbx.8 %tmp1 = load <8 x i8>* %A - %tmp2 = load %struct.__builtin_neon_v8qi4* %B - %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0 - %tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1 - %tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2 - %tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3 + %tmp2 = load %struct.__neon_int8x8x4_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 %tmp7 = load <8 x i8>* %C %tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7) ret <8 x i8> %tmp8 Modified: llvm/trunk/test/CodeGen/ARM/vtrn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtrn.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vtrn.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vtrn.ll Tue Oct 6 16:16:19 2009 @@ -1,15 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> } - -%struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> } -%struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> } -%struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> } -%struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> } - define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vtrni8: ;CHECK: vtrn.8 Modified: llvm/trunk/test/CodeGen/ARM/vuzp.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vuzp.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vuzp.ll Tue Oct 6 16:16:19 2009 @@ -1,15 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> } - -%struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> } -%struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> } -%struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> } -%struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> } - define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vuzpi8: ;CHECK: vuzp.8 Modified: llvm/trunk/test/CodeGen/ARM/vzip.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vzip.ll?rev=83415&r1=83414&r2=83415&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vzip.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vzip.ll Tue Oct 6 16:16:19 2009 @@ -1,15 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -%struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> } -%struct.__builtin_neon_v4hi2 = type { <4 x i16>, <4 x i16> } -%struct.__builtin_neon_v2si2 = type { <2 x i32>, <2 x i32> } -%struct.__builtin_neon_v2sf2 = type { <2 x float>, <2 x float> } - -%struct.__builtin_neon_v16qi2 = type { <16 x i8>, <16 x i8> } -%struct.__builtin_neon_v8hi2 = type { <8 x i16>, <8 x i16> } -%struct.__builtin_neon_v4si2 = type { <4 x i32>, <4 x i32> } -%struct.__builtin_neon_v4sf2 = type { <4 x float>, <4 x float> } - define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vzipi8: ;CHECK: vzip.8 From collinwinter at google.com Tue Oct 6 16:16:52 2009 From: collinwinter at google.com (Collin Winter) Date: Tue, 6 Oct 2009 14:16:52 -0700 Subject: [llvm-commits] [llvm] r83391 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: References: <200910061725.n96HPoKI007518@zion.cs.uiuc.edu> Message-ID: <3c8293b60910061416v7f77e021r7d04339d1a08a458@mail.gmail.com> On Tue, Oct 6, 2009 at 2:01 PM, Jeffrey Yasskin wrote: > On Tue, Oct 6, 2009 at 1:23 PM, Chris Lattner wrote: >> >> On Oct 6, 2009, at 10:25 AM, Jeffrey Yasskin wrote: >> >>> Author: jyasskin >>> Date: Tue Oct ?6 12:25:50 2009 >>> New Revision: 83391 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=83391&view=rev >>> Log: >>> Fix PR5112, a miscompilation on gcc-4.0.3. ?Patch by Collin Winter! >>> >>> Modified: >>> ? llvm/trunk/lib/Support/Triple.cpp >>> >>> Modified: llvm/trunk/lib/Support/Triple.cpp >>> URL: >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83391&r1=83390&r2=83391&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ====================================================================== >>> --- llvm/trunk/lib/Support/Triple.cpp (original) >>> +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct ?6 12:25:50 2009 >>> @@ -390,7 +390,10 @@ >>> } >>> >>> void Triple::setArchName(const StringRef &Str) { >>> - ?setTriple(Str + "-" + getVendorName() + "-" + >>> getOSAndEnvironmentName()); >>> + ?// Work around a miscompilation bug in gcc 4.0.3. >>> + ?Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); >>> + ?Twine b = Str + "-" + a; >>> + ?setTriple(b); >>> } >> >> Jeffrey, I don't think this patch is safe. ?Twines are constructed based on >> temporaries. ?I think taht the temporary returned by getVendorName() is >> destroyed at the ";" and the twine refers to the dangling pointer. ?CAn you >> just build up a temporary std::string instead? > > Oops. SmallString<64> it is. Collin, could you check that this still > works around the gcc-4.0.3 bug? Still fixes the bug, thanks. Collin From jyasskin at google.com Tue Oct 6 16:45:26 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 06 Oct 2009 21:45:26 -0000 Subject: [llvm-commits] [llvm] r83417 - /llvm/trunk/lib/Support/Triple.cpp Message-ID: <200910062145.n96LjQsf011221@zion.cs.uiuc.edu> Author: jyasskin Date: Tue Oct 6 16:45:26 2009 New Revision: 83417 URL: http://llvm.org/viewvc/llvm-project?rev=83417&view=rev Log: r83391 was completely broken since Twines keep references to their inputs, and some of the inputs were temporaries. Here's a real fix for the miscompilation. Thanks to sabre for pointing out the problem. Modified: llvm/trunk/lib/Support/Triple.cpp Modified: llvm/trunk/lib/Support/Triple.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83417&r1=83416&r2=83417&view=diff ============================================================================== --- llvm/trunk/lib/Support/Triple.cpp (original) +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct 6 16:45:26 2009 @@ -9,6 +9,7 @@ #include "llvm/ADT/Triple.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include #include @@ -390,10 +391,14 @@ } void Triple::setArchName(const StringRef &Str) { - // Work around a miscompilation bug in gcc 4.0.3. - Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); - Twine b = Str + "-" + a; - setTriple(b); + // Work around a miscompilation bug for Twines in gcc 4.0.3. + SmallString<64> Triple; + Triple += Str; + Triple += "-"; + Triple += getVendorName(); + Triple += "-"; + Triple += getOSAndEnvironmentName(); + setTriple(Triple.str()); } void Triple::setVendorName(const StringRef &Str) { From jyasskin at google.com Tue Oct 6 16:49:22 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 6 Oct 2009 14:49:22 -0700 Subject: [llvm-commits] [llvm] r83417 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: <200910062145.n96LjQsf011221@zion.cs.uiuc.edu> References: <200910062145.n96LjQsf011221@zion.cs.uiuc.edu> Message-ID: Here's the real twine fix, in case you want it for the 2.6 branch. On Tue, Oct 6, 2009 at 2:45 PM, Jeffrey Yasskin wrote: > Author: jyasskin > Date: Tue Oct ?6 16:45:26 2009 > New Revision: 83417 > > URL: http://llvm.org/viewvc/llvm-project?rev=83417&view=rev > Log: > r83391 was completely broken since Twines keep references to their inputs, and > some of the inputs were temporaries. ?Here's a real fix for the miscompilation. > Thanks to sabre for pointing out the problem. > > Modified: > ? ?llvm/trunk/lib/Support/Triple.cpp > > Modified: llvm/trunk/lib/Support/Triple.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83417&r1=83416&r2=83417&view=diff > > ============================================================================== > --- llvm/trunk/lib/Support/Triple.cpp (original) > +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct ?6 16:45:26 2009 > @@ -9,6 +9,7 @@ > > ?#include "llvm/ADT/Triple.h" > > +#include "llvm/ADT/SmallString.h" > ?#include "llvm/ADT/Twine.h" > ?#include > ?#include > @@ -390,10 +391,14 @@ > ?} > > ?void Triple::setArchName(const StringRef &Str) { > - ?// Work around a miscompilation bug in gcc 4.0.3. > - ?Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); > - ?Twine b = Str + "-" + a; > - ?setTriple(b); > + ?// Work around a miscompilation bug for Twines in gcc 4.0.3. > + ?SmallString<64> Triple; > + ?Triple += Str; > + ?Triple += "-"; > + ?Triple += getVendorName(); > + ?Triple += "-"; > + ?Triple += getOSAndEnvironmentName(); > + ?setTriple(Triple.str()); > ?} > > ?void Triple::setVendorName(const StringRef &Str) { > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From bob.wilson at apple.com Tue Oct 6 17:01:16 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 06 Oct 2009 22:01:16 -0000 Subject: [llvm-commits] [llvm] r83421 - /llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Message-ID: <200910062201.n96M1Gaa013369@zion.cs.uiuc.edu> Author: bwilson Date: Tue Oct 6 17:01:15 2009 New Revision: 83421 URL: http://llvm.org/viewvc/llvm-project?rev=83421&view=rev Log: Use copyRegToReg hook to copy registers. Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83421&r1=83420&r2=83421&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Tue Oct 6 17:01:15 2009 @@ -163,9 +163,8 @@ if (MO.isUse()) { // Insert a copy from VirtReg. - AddDefaultPred(BuildMI(MBB, MBBI, MI->getDebugLoc(), - TII->get(ARM::FCPYD), MO.getReg()) - .addReg(VirtReg)); + TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, + ARM::DPRRegisterClass, ARM::DPRRegisterClass); if (MO.isKill()) { MachineInstr *CopyMI = prior(MBBI); CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); @@ -173,9 +172,8 @@ MO.setIsKill(); } else if (MO.isDef() && !MO.isDead()) { // Add a copy to VirtReg. - AddDefaultPred(BuildMI(MBB, NextI, MI->getDebugLoc(), - TII->get(ARM::FCPYD), VirtReg) - .addReg(MO.getReg())); + TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), + ARM::DPRRegisterClass, ARM::DPRRegisterClass); } } } From bob.wilson at apple.com Tue Oct 6 17:01:59 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 06 Oct 2009 22:01:59 -0000 Subject: [llvm-commits] [llvm] r83422 - in /llvm/trunk/lib/Target/ARM: ARMISelDAGToDAG.cpp ARMInstrNEON.td ARMRegisterInfo.h NEONPreAllocPass.cpp Message-ID: <200910062202.n96M20L7013475@zion.cs.uiuc.edu> Author: bwilson Date: Tue Oct 6 17:01:59 2009 New Revision: 83422 URL: http://llvm.org/viewvc/llvm-project?rev=83422&view=rev Log: Add codegen support for NEON vld2 operations on quad registers. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83422&r1=83421&r2=83422&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Tue Oct 6 17:01:59 2009 @@ -130,6 +130,10 @@ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps); + + /// PairDRegs - Insert a pair of double registers into an implicit def to + /// form a quad register. + SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); }; } @@ -923,6 +927,20 @@ return 0; } +/// PairDRegs - Insert a pair of double registers into an implicit def to +/// form a quad register. +SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue Undef = + SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); + SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + VT, Undef, V0, SubReg0); + return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + VT, SDValue(Pair, 0), V1, SubReg1); +} + SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); @@ -1332,16 +1350,33 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; + EVT RegVT = VT; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld2 type"); case MVT::v8i8: Opc = ARM::VLD2d8; break; case MVT::v4i16: Opc = ARM::VLD2d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VLD2d32; break; + case MVT::v16i8: Opc = ARM::VLD2q8; RegVT = MVT::v8i8; break; + case MVT::v8i16: Opc = ARM::VLD2q16; RegVT = MVT::v4i16; break; + case MVT::v4f32: Opc = ARM::VLD2q32; RegVT = MVT::v2f32; break; + case MVT::v4i32: Opc = ARM::VLD2q32; RegVT = MVT::v2i32; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 4); + if (RegVT == VT) + return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 4); + + // Quad registers are loaded as pairs of double registers. + std::vector ResTys(4, RegVT); + ResTys.push_back(MVT::Other); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + SDNode *Q0 = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + SDNode *Q1 = PairDRegs(VT, SDValue(VLd, 2), SDValue(VLd, 3)); + ReplaceUses(SDValue(N, 0), SDValue(Q0, 0)); + ReplaceUses(SDValue(N, 1), SDValue(Q1, 0)); + ReplaceUses(SDValue(N, 2), SDValue(VLd, 4)); + return NULL; } case Intrinsic::arm_neon_vld3: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83422&r1=83421&r2=83422&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Oct 6 17:01:59 2009 @@ -182,11 +182,20 @@ class VLD2D : NLdSt<(outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; +class VLD2Q + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD2, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + "", []>; def VLD2d8 : VLD2D<"vld2.8">; def VLD2d16 : VLD2D<"vld2.16">; def VLD2d32 : VLD2D<"vld2.32">; +def VLD2q8 : VLD2Q<"vld2.8">; +def VLD2q16 : VLD2Q<"vld2.16">; +def VLD2q32 : VLD2Q<"vld2.32">; + // VLD3 : Vector Load (multiple 3-element structures) class VLD3D : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h?rev=83422&r1=83421&r2=83422&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h Tue Oct 6 17:01:59 2009 @@ -23,6 +23,16 @@ class ARMBaseInstrInfo; class Type; +namespace ARM { + /// SubregIndex - The index of various subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// ARMRegisterInfo.td file. + enum SubregIndex { + SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, + DSUBREG_0 = 5, DSUBREG_1 = 6 + }; +} + struct ARMRegisterInfo : public ARMBaseRegisterInfo { public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83422&r1=83421&r2=83422&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Tue Oct 6 17:01:59 2009 @@ -52,6 +52,13 @@ NumRegs = 2; return true; + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + FirstOpnd = 0; + NumRegs = 4; + return true; + case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: From enderby at apple.com Tue Oct 6 17:26:42 2009 From: enderby at apple.com (Kevin Enderby) Date: Tue, 06 Oct 2009 22:26:42 -0000 Subject: [llvm-commits] [llvm] r83424 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Message-ID: <200910062226.n96MQgaB016790@zion.cs.uiuc.edu> Author: enderby Date: Tue Oct 6 17:26:42 2009 New Revision: 83424 URL: http://llvm.org/viewvc/llvm-project?rev=83424&view=rev Log: Added bits of the ARM target assembler to llvm-mc to parse some load instruction operands. Some parsing of arm memory operands for preindexing and postindexing forms including with register controled shifts. This is a work in progress. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=83424&r1=83423&r2=83424&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Oct 6 17:26:42 2009 @@ -23,6 +23,15 @@ namespace { struct ARMOperand; +// The shift types for register controlled shifts in arm memory addressing +enum ShiftType { + Lsl, + Lsr, + Asr, + Ror, + Rrx +}; + class ARMAsmParser : public TargetAsmParser { MCAsmParser &Parser; @@ -35,8 +44,31 @@ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool ParseRegister(ARMOperand &Op); + + bool ParseMemory(ARMOperand &Op); + + bool ParseShift(enum ShiftType *St, const MCExpr *ShiftAmount); + + bool ParseOperand(ARMOperand &Op); + bool ParseDirectiveWord(unsigned Size, SMLoc L); + // TODO - For now hacked versions of the next two are in here in this file to + // allow some parser testing until the table gen versions are implemented. + + /// @name Auto-generated Match Functions + /// { + bool MatchInstruction(SmallVectorImpl &Operands, + MCInst &Inst); + + /// MatchRegisterName - Match the given string to a register name, or 0 if + /// there is no match. + unsigned MatchRegisterName(const StringRef &Name); + + /// } + + public: ARMAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} @@ -48,9 +80,380 @@ } // end anonymous namespace +namespace { + +/// ARMOperand - Instances of this class represent a parsed ARM machine +/// instruction. +struct ARMOperand { + enum { + Token, + Register, + Memory + } Kind; + + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNum; + } Reg; + + // This is for all forms of ARM address expressions + struct { + unsigned BaseRegNum; + bool OffsetIsReg; + const MCExpr *Offset; // used when OffsetIsReg is false + unsigned OffsetRegNum; // used when OffsetIsReg is true + bool OffsetRegShifted; // only used when OffsetIsReg is true + enum ShiftType ShiftType; // used when OffsetRegShifted is true + const MCExpr *ShiftAmount; // used when OffsetRegShifted is true + bool Preindexed; + bool Postindexed; + bool Negative; // only used when OffsetIsReg is true + bool Writeback; + } Mem; + + }; + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNum; + } + + bool isToken() const {return Kind == Token; } + + bool isReg() const { return Kind == Register; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + static ARMOperand CreateToken(StringRef Str) { + ARMOperand Res; + Res.Kind = Token; + Res.Tok.Data = Str.data(); + Res.Tok.Length = Str.size(); + return Res; + } + + static ARMOperand CreateReg(unsigned RegNum) { + ARMOperand Res; + Res.Kind = Register; + Res.Reg.RegNum = RegNum; + return Res; + } + + static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg, + const MCExpr *Offset, unsigned OffsetRegNum, + bool OffsetRegShifted, enum ShiftType ShiftType, + const MCExpr *ShiftAmount, bool Preindexed, + bool Postindexed, bool Negative, bool Writeback) { + ARMOperand Res; + Res.Kind = Memory; + Res.Mem.BaseRegNum = BaseRegNum; + Res.Mem.OffsetIsReg = OffsetIsReg; + Res.Mem.Offset = Offset; + Res.Mem.OffsetRegNum = OffsetRegNum; + Res.Mem.OffsetRegShifted = OffsetRegShifted; + Res.Mem.ShiftType = ShiftType; + Res.Mem.ShiftAmount = ShiftAmount; + Res.Mem.Preindexed = Preindexed; + Res.Mem.Postindexed = Postindexed; + Res.Mem.Negative = Negative; + Res.Mem.Writeback = Writeback; + return Res; + } +}; + +} // end anonymous namespace. + +// Try to parse a register name. The token must be an Identifier when called, +// and if it is a register name a Reg operand is created, the token is eaten +// and false is returned. Else true is returned and no token is eaten. +// TODO this is likely to change to allow different register types and or to +// parse for a specific register type. +bool ARMAsmParser::ParseRegister(ARMOperand &Op) { + const AsmToken &Tok = getLexer().getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + // FIXME: Validate register for the current architecture; we have to do + // validation later, so maybe there is no need for this here. + unsigned RegNum; + + RegNum = MatchRegisterName(Tok.getString()); + if (RegNum == 0) + return true; + + Op = ARMOperand::CreateReg(RegNum); + getLexer().Lex(); // Eat identifier token. + + return false; +} + +// Try to parse an arm memory expression. It must start with a '[' token. +// TODO Only preindexing and postindexing addressing are started, unindexed +// with option, etc are still to do. +bool ARMAsmParser::ParseMemory(ARMOperand &Op) { + const AsmToken &LBracTok = getLexer().getTok(); + assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket"); + getLexer().Lex(); // Eat left bracket token. + + const AsmToken &BaseRegTok = getLexer().getTok(); + if (BaseRegTok.isNot(AsmToken::Identifier)) + return Error(BaseRegTok.getLoc(), "register expected"); + unsigned BaseRegNum = MatchRegisterName(BaseRegTok.getString()); + if (BaseRegNum == 0) + return Error(BaseRegTok.getLoc(), "register expected"); + getLexer().Lex(); // Eat identifier token. + + bool Preindexed = false; + bool Postindexed = false; + bool OffsetIsReg = false; + bool Negative = false; + bool Writeback = false; + + // First look for preindexed address forms: + // [Rn, +/-Rm] + // [Rn, #offset] + // [Rn, +/-Rm, shift] + // that is after the "[Rn" we now have see if the next token is a comma. + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + Preindexed = true; + getLexer().Lex(); // Eat comma token. + + const AsmToken &NextTok = getLexer().getTok(); + if (NextTok.is(AsmToken::Plus)) + getLexer().Lex(); // Eat plus token. + else if (NextTok.is(AsmToken::Minus)) { + Negative = true; + getLexer().Lex(); // Eat minus token + } + + // See if there is a register following the "[Rn," we have so far. + const AsmToken &OffsetRegTok = getLexer().getTok(); + unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + bool OffsetRegShifted = false; + enum ShiftType ShiftType; + const MCExpr *ShiftAmount; + const MCExpr *Offset; + if (OffsetRegNum != 0) { + OffsetIsReg = true; + getLexer().Lex(); // Eat identifier token for the offset register. + // Look for a comma then a shift + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &Tok = getLexer().getTok(); + if (ParseShift(&ShiftType, ShiftAmount)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // "[Rn," we have so far was not followed by "Rm" + // Look for #offset following the "[Rn," + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + } + const AsmToken &RBracTok = getLexer().getTok(); + if (RBracTok.isNot(AsmToken::RBrac)) + return Error(RBracTok.getLoc(), "']' expected"); + getLexer().Lex(); // Eat right bracket token. + + const AsmToken &ExclaimTok = getLexer().getTok(); + if (ExclaimTok.is(AsmToken::Exclaim)) { + Writeback = true; + getLexer().Lex(); // Eat exclaim token + } + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, + OffsetRegShifted, ShiftType, ShiftAmount, + Preindexed, Postindexed, Negative, Writeback); + return false; + } + // The "[Rn" we have so far was not followed by a comma. + else if (Tok.is(AsmToken::RBrac)) { + // This is a post indexing addressing forms: + // [Rn], #offset + // [Rn], +/-Rm + // [Rn], +/-Rm, shift + // that is a ']' follows after the "[Rn". + Postindexed = true; + Writeback = true; + getLexer().Lex(); // Eat right bracket token. + + const AsmToken &CommaTok = getLexer().getTok(); + if (CommaTok.isNot(AsmToken::Comma)) + return Error(CommaTok.getLoc(), "',' expected"); + getLexer().Lex(); // Eat comma token. + + const AsmToken &NextTok = getLexer().getTok(); + if (NextTok.is(AsmToken::Plus)) + getLexer().Lex(); // Eat plus token. + else if (NextTok.is(AsmToken::Minus)) { + Negative = true; + getLexer().Lex(); // Eat minus token + } + + // See if there is a register following the "[Rn]," we have so far. + const AsmToken &OffsetRegTok = getLexer().getTok(); + unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + bool OffsetRegShifted = false; + enum ShiftType ShiftType; + const MCExpr *ShiftAmount; + const MCExpr *Offset; + if (OffsetRegNum != 0) { + OffsetIsReg = true; + getLexer().Lex(); // Eat identifier token for the offset register. + // Look for a comma then a shift + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &Tok = getLexer().getTok(); + if (ParseShift(&ShiftType, ShiftAmount)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // "[Rn]," we have so far was not followed by "Rm" + // Look for #offset following the "[Rn]," + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + } + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, + OffsetRegShifted, ShiftType, ShiftAmount, + Preindexed, Postindexed, Negative, Writeback); + return false; + } + + return true; +} + +/// ParseShift as one of these two: +/// ( lsl | lsr | asr | ror ) , # shift_amount +/// rrx +/// and returns true if it parses a shift otherwise it returns false. +bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *ShiftAmount) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return true; + const StringRef &ShiftName = Tok.getString(); + if (ShiftName == "lsl" || ShiftName == "LSL") + *St = Lsl; + else if (ShiftName == "lsr" || ShiftName == "LSR") + *St = Lsr; + else if (ShiftName == "asr" || ShiftName == "ASR") + *St = Asr; + else if (ShiftName == "ror" || ShiftName == "ROR") + *St = Ror; + else if (ShiftName == "rrx" || ShiftName == "RRX") + *St = Rrx; + else + return true; + getLexer().Lex(); // Eat shift type token. + + // For all but a Rotate right there must be a '#' and a shift amount + if (*St != Rrx) { + // Look for # following the shift type + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(ShiftAmount)) + return true; + } + + return false; +} + +// A hack to allow some testing +unsigned ARMAsmParser::MatchRegisterName(const StringRef &Name) { + if (Name == "r1") + return 1; + else if (Name == "r2") + return 2; + else if (Name == "r3") + return 3; + return 0; +} + +// A hack to allow some testing +bool ARMAsmParser::MatchInstruction(SmallVectorImpl &Operands, + MCInst &Inst) { + struct ARMOperand Op0 = Operands[0]; + assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); + const StringRef &Mnemonic = Op0.getToken(); + if (Mnemonic == "add" || + Mnemonic == "ldr") + return false; + + return true; +} + +// TODO - this is a work in progress +bool ARMAsmParser::ParseOperand(ARMOperand &Op) { + switch (getLexer().getKind()) { + case AsmToken::Identifier: + if (!ParseRegister(Op)) + return false; + // TODO parse other operands that start with an identifier + return true; + case AsmToken::LBrac: + if (!ParseMemory(Op)) + return false; + default: + return true; + } +} + bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { + SmallVector Operands; + + Operands.push_back(ARMOperand::CreateToken(Name)); + SMLoc Loc = getLexer().getTok().getLoc(); - Error(Loc, "ARMAsmParser::ParseInstruction currently unimplemented"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + + // Read the first operand. + Operands.push_back(ARMOperand()); + if (ParseOperand(Operands.back())) + return true; + + while (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); // Eat the comma. + + // Parse and remember the operand. + Operands.push_back(ARMOperand()); + if (ParseOperand(Operands.back())) + return true; + } + } + if (!MatchInstruction(Operands, Inst)) + return false; + + Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented"); return true; } From dpatel at apple.com Tue Oct 6 17:33:40 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 22:33:40 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83425 - in /llvm-gcc-4.2/trunk/gcc: llvm-convert.cpp llvm-debug.cpp llvm-debug.h Message-ID: <200910062233.n96MXeA7017725@zion.cs.uiuc.edu> Author: dpatel Date: Tue Oct 6 17:33:39 2009 New Revision: 83425 URL: http://llvm.org/viewvc/llvm-project?rev=83425&view=rev Log: - Remove EmitRegionStart(). - s/EmitRegionEnd/EmitFunctionEnd/g Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.h Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83425&r1=83424&r2=83425&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Tue Oct 6 17:33:39 2009 @@ -696,7 +696,7 @@ } if (TheDebugInfo) { TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock()); - TheDebugInfo->EmitRegionEnd(Builder.GetInsertBlock(), true); + TheDebugInfo->EmitFunctionEnd(Builder.GetInsertBlock(), true); } if (RetVals.empty()) Builder.CreateRetVoid(); Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=83425&r1=83424&r2=83425&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Tue Oct 6 17:33:39 2009 @@ -288,20 +288,9 @@ return getOrCreateCompileUnit(main_input_filename); } -/// EmitRegionStart- Constructs the debug code for entering a declarative -/// region - "llvm.dbg.region.start." -void DebugInfo::EmitRegionStart(BasicBlock *CurBB) { - llvm::DIDescriptor D; - if (!RegionStack.empty()) - D = RegionStack.back(); - D = DebugFactory.CreateLexicalBlock(D); - RegionStack.push_back(D); - DebugFactory.InsertRegionStart(D, CurBB); -} - -/// EmitRegionEnd - Constructs the debug code for exiting a declarative +/// EmitFunctionEnd - Constructs the debug code for exiting a declarative /// region - "llvm.dbg.region.end." -void DebugInfo::EmitRegionEnd(BasicBlock *CurBB, bool EndFunction) { +void DebugInfo::EmitFunctionEnd(BasicBlock *CurBB, bool EndFunction) { assert(!RegionStack.empty() && "Region stack mismatch, stack empty!"); DebugFactory.InsertRegionEnd(RegionStack.back(), CurBB); RegionStack.pop_back(); Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.h?rev=83425&r1=83424&r2=83425&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.h Tue Oct 6 17:33:39 2009 @@ -84,13 +84,9 @@ /// "llvm.dbg.func.start." void EmitFunctionStart(tree_node *FnDecl, Function *Fn, BasicBlock *CurBB); - /// EmitRegionStart- Constructs the debug code for entering a declarative - /// region - "llvm.dbg.region.start." - void EmitRegionStart(BasicBlock *CurBB); - - /// EmitRegionEnd - Constructs the debug code for exiting a declarative + /// EmitFunctionEnd - Constructs the debug code for exiting a declarative /// region - "llvm.dbg.region.end." - void EmitRegionEnd(BasicBlock *CurBB, bool EndFunction); + void EmitFunctionEnd(BasicBlock *CurBB, bool EndFunction); /// EmitDeclare - Constructs the debug code for allocation of a new variable. /// region - "llvm.dbg.declare." From dpatel at apple.com Tue Oct 6 18:14:05 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 06 Oct 2009 23:14:05 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83430 - in /llvm-gcc-4.2/trunk/gcc: llvm-convert.cpp llvm-debug.cpp llvm-debug.h Message-ID: <200910062314.n96NE55Z022949@zion.cs.uiuc.edu> Author: dpatel Date: Tue Oct 6 18:14:03 2009 New Revision: 83430 URL: http://llvm.org/viewvc/llvm-project?rev=83430&view=rev Log: Add support to attach debug info to an instruction. This is not yet enabled. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.h Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83430&r1=83429&r2=83430&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Tue Oct 6 18:14:03 2009 @@ -695,7 +695,7 @@ } } if (TheDebugInfo) { - TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock()); + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); TheDebugInfo->EmitFunctionEnd(Builder.GetInsertBlock(), true); } if (RetVals.empty()) @@ -792,7 +792,7 @@ TheDebugInfo->setLocationLine(EXPR_LINENO(exp)); } - TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock()); + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); } switch (TREE_CODE(exp)) { Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=83430&r1=83429&r2=83430&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Tue Oct 6 18:14:03 2009 @@ -250,8 +250,9 @@ Fn->hasInternalLinkage(), true /*definition*/); +#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN DebugFactory.InsertSubprogramStart(SP, CurBB); - +#endif // Push function on region stack. RegionStack.push_back(SP); RegionMap[FnDecl] = SP; @@ -292,7 +293,9 @@ /// region - "llvm.dbg.region.end." void DebugInfo::EmitFunctionEnd(BasicBlock *CurBB, bool EndFunction) { assert(!RegionStack.empty() && "Region stack mismatch, stack empty!"); +#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN DebugFactory.InsertRegionEnd(RegionStack.back(), CurBB); +#endif RegionStack.pop_back(); // Blocks get erased; clearing these is needed for determinism, and also // a good idea if the next function gets inlined. @@ -356,8 +359,8 @@ /// EmitStopPoint - Emit a call to llvm.dbg.stoppoint to indicate a change of /// source line - "llvm.dbg.stoppoint." Now enabled at -O. -void DebugInfo::EmitStopPoint(Function *Fn, BasicBlock *CurBB) { - +void DebugInfo::EmitStopPoint(Function *Fn, BasicBlock *CurBB, + LLVMBuilder &Builder) { // Don't bother if things are the same as last time. if (PrevLineNo == CurLineNo && PrevBB == CurBB && @@ -373,10 +376,20 @@ // Don't set/allow source line breakpoints in Apple Block prologue code // or in Apple Block helper functions. if (!isPartOfAppleBlockPrologue(CurLineNo) - && !isCopyOrDestroyHelper(cfun->decl)) + && !isCopyOrDestroyHelper(cfun->decl)) { +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + llvm::DIDescriptor DR = RegionStack.back(); + llvm::DIScope DS = llvm::DIScope(DR.getNode()); + llvm::DILocation DO(NULL); + llvm::DILocation DL = + DebugFactory.CreateLocation(CurLineNo, 0 /* column */, DS, DO); + Builder.SetCurrentDebugLocation(DL.getNode()); +#else DebugFactory.InsertStopPoint(getOrCreateCompileUnit(CurFullPath), CurLineNo, 0 /*column no. */, CurBB); +#endif + } } /// EmitGlobalVariable - Emit information about a global variable. Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.h?rev=83430&r1=83429&r2=83430&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.h Tue Oct 6 18:14:03 2009 @@ -28,6 +28,7 @@ #ifndef LLVM_DEBUG_H #define LLVM_DEBUG_H +#include "llvm-internal.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ValueHandle.h" @@ -96,7 +97,7 @@ /// EmitStopPoint - Emit a call to llvm.dbg.stoppoint to indicate a change of /// source line. - void EmitStopPoint(Function *Fn, BasicBlock *CurBB); + void EmitStopPoint(Function *Fn, BasicBlock *CurBB, LLVMBuilder &Builder); /// EmitGlobalVariable - Emit information about a global variable. /// From echristo at apple.com Tue Oct 6 19:02:19 2009 From: echristo at apple.com (Eric Christopher) Date: Wed, 07 Oct 2009 00:02:19 -0000 Subject: [llvm-commits] [llvm] r83434 - /llvm/trunk/lib/Transforms/Utils/InlineCost.cpp Message-ID: <200910070002.n9702JdZ029127@zion.cs.uiuc.edu> Author: echristo Date: Tue Oct 6 19:02:18 2009 New Revision: 83434 URL: http://llvm.org/viewvc/llvm-project?rev=83434&view=rev Log: While we still have a MallocInst treat it as a call like any other for inlining. When MallocInst goes away this code will be subsumed as part of calls and work just fine... Modified: llvm/trunk/lib/Transforms/Utils/InlineCost.cpp Modified: llvm/trunk/lib/Transforms/Utils/InlineCost.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineCost.cpp?rev=83434&r1=83433&r2=83434&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/InlineCost.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/InlineCost.cpp Tue Oct 6 19:02:18 2009 @@ -135,6 +135,10 @@ NumInsts += 5; } + // This, too, is a call. + if (isa(II)) + NumInsts += 5; + if (const AllocaInst *AI = dyn_cast(II)) { if (!AI->isStaticAlloca()) this->usesDynamicAlloca = true; From asl at math.spbu.ru Tue Oct 6 19:06:35 2009 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Wed, 07 Oct 2009 00:06:35 -0000 Subject: [llvm-commits] [llvm] r83435 - in /llvm/trunk/lib/Target/ARM: ARMBaseInstrInfo.cpp ARMISelLowering.cpp Message-ID: <200910070006.n9706ZWW029683@zion.cs.uiuc.edu> Author: asl Date: Tue Oct 6 19:06:35 2009 New Revision: 83435 URL: http://llvm.org/viewvc/llvm-project?rev=83435&view=rev Log: Add PseudoSourceValues for constpool stuff on ELF (Darwin should use something similar) and register spills. Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=83435&r1=83434&r2=83435&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Tue Oct 6 19:06:35 2009 @@ -21,6 +21,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -665,27 +667,35 @@ const TargetRegisterClass *RC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOStore, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addReg(0).addImm(0)); + .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } } @@ -695,23 +705,31 @@ const TargetRegisterClass *RC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOLoad, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) - .addFrameIndex(FI).addReg(0).addImm(0)); + .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).addMemOperand(MMO); } } Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=83435&r1=83434&r2=83435&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Oct 6 19:06:35 2009 @@ -1303,17 +1303,20 @@ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), - CPAddr, NULL, 0); + CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + Result = DAG.getLoad(PtrVT, dl, Chain, Result, + PseudoSourceValue::getGOT(), 0); return Result; } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); } } @@ -1360,7 +1363,8 @@ ARMPCLabelIndex, PCAdj); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } From evan.cheng at apple.com Tue Oct 6 19:32:49 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 6 Oct 2009 17:32:49 -0700 Subject: [llvm-commits] [llvm] r83434 - /llvm/trunk/lib/Transforms/Utils/InlineCost.cpp In-Reply-To: <200910070002.n9702JdZ029127@zion.cs.uiuc.edu> References: <200910070002.n9702JdZ029127@zion.cs.uiuc.edu> Message-ID: How about FreeInst? Evan On Oct 6, 2009, at 5:02 PM, Eric Christopher wrote: > Author: echristo > Date: Tue Oct 6 19:02:18 2009 > New Revision: 83434 > > URL: http://llvm.org/viewvc/llvm-project?rev=83434&view=rev > Log: > While we still have a MallocInst treat it as a call like any other > for inlining. > > When MallocInst goes away this code will be subsumed as part of > calls and work just fine... > > Modified: > llvm/trunk/lib/Transforms/Utils/InlineCost.cpp > > Modified: llvm/trunk/lib/Transforms/Utils/InlineCost.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineCost.cpp?rev=83434&r1=83433&r2=83434&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Transforms/Utils/InlineCost.cpp (original) > +++ llvm/trunk/lib/Transforms/Utils/InlineCost.cpp Tue Oct 6 > 19:02:18 2009 > @@ -135,6 +135,10 @@ > NumInsts += 5; > } > > + // This, too, is a call. > + if (isa(II)) > + NumInsts += 5; > + > if (const AllocaInst *AI = dyn_cast(II)) { > if (!AI->isStaticAlloca()) > this->usesDynamicAlloca = true; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From gohman at apple.com Tue Oct 6 19:33:10 2009 From: gohman at apple.com (Dan Gohman) Date: Wed, 07 Oct 2009 00:33:10 -0000 Subject: [llvm-commits] [llvm] r83437 - /llvm/trunk/lib/CodeGen/CodePlacementOpt.cpp Message-ID: <200910070033.n970XBle000548@zion.cs.uiuc.edu> Author: djg Date: Tue Oct 6 19:33:10 2009 New Revision: 83437 URL: http://llvm.org/viewvc/llvm-project?rev=83437&view=rev Log: Fix this comment. The loop header is the loop entry point. Modified: llvm/trunk/lib/CodeGen/CodePlacementOpt.cpp Modified: llvm/trunk/lib/CodeGen/CodePlacementOpt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodePlacementOpt.cpp?rev=83437&r1=83436&r2=83437&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/CodePlacementOpt.cpp (original) +++ llvm/trunk/lib/CodeGen/CodePlacementOpt.cpp Tue Oct 6 19:33:10 2009 @@ -95,11 +95,11 @@ /// ... /// jmp B /// -/// C: --> new loop header +/// C: /// ... /// /// -/// B: +/// B: --> loop header /// ... /// jcc C, [exit] /// From echristo at apple.com Tue Oct 6 19:33:21 2009 From: echristo at apple.com (Eric Christopher) Date: Tue, 6 Oct 2009 17:33:21 -0700 Subject: [llvm-commits] [llvm] r83434 - /llvm/trunk/lib/Transforms/Utils/InlineCost.cpp In-Reply-To: References: <200910070002.n9702JdZ029127@zion.cs.uiuc.edu> Message-ID: On Oct 6, 2009, at 5:32 PM, Evan Cheng wrote: > How about FreeInst? Ha. Probably. I'll add that :) -eric From evan.cheng at apple.com Tue Oct 6 19:34:42 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 6 Oct 2009 17:34:42 -0700 Subject: [llvm-commits] [llvm] r83435 - in /llvm/trunk/lib/Target/ARM: ARMBaseInstrInfo.cpp ARMISelLowering.cpp In-Reply-To: <200910070006.n9706ZWW029683@zion.cs.uiuc.edu> References: <200910070006.n9706ZWW029683@zion.cs.uiuc.edu> Message-ID: <441292C4-16F3-4F3E-9CB2-8ACF73D71D9E@apple.com> This patch is not ELF specific, right? Evan On Oct 6, 2009, at 5:06 PM, Anton Korobeynikov wrote: > Author: asl > Date: Tue Oct 6 19:06:35 2009 > New Revision: 83435 > > URL: http://llvm.org/viewvc/llvm-project?rev=83435&view=rev > Log: > Add PseudoSourceValues for constpool stuff on ELF (Darwin should use > something similar) > and register spills. > > Modified: > llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp > llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp > > Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=83435&r1=83434&r2=83435&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Tue Oct 6 > 19:06:35 2009 > @@ -21,6 +21,8 @@ > #include "llvm/CodeGen/MachineFrameInfo.h" > #include "llvm/CodeGen/MachineInstrBuilder.h" > #include "llvm/CodeGen/MachineJumpTableInfo.h" > +#include "llvm/CodeGen/MachineMemOperand.h" > +#include "llvm/CodeGen/PseudoSourceValue.h" > #include "llvm/MC/MCAsmInfo.h" > #include "llvm/Support/CommandLine.h" > #include "llvm/Support/ErrorHandling.h" > @@ -665,27 +667,35 @@ > const TargetRegisterClass *RC) const { > DebugLoc DL = DebugLoc::getUnknownLoc(); > if (I != MBB.end()) DL = I->getDebugLoc(); > + MachineFunction &MF = *MBB.getParent(); > + MachineFrameInfo &MFI = *MF.getFrameInfo(); > + > + MachineMemOperand *MMO = > + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), > + MachineMemOperand::MOStore, 0, > + MFI.getObjectSize(FI), > + MFI.getObjectAlignment(FI)); > > if (RC == ARM::GPRRegisterClass) { > AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) > .addReg(SrcReg, getKillRegState(isKill)) > - .addFrameIndex(FI).addReg(0).addImm(0)); > + > .addFrameIndex > (FI).addReg(0).addImm(0).addMemOperand(MMO)); > } else if (RC == ARM::DPRRegisterClass || > RC == ARM::DPR_VFP2RegisterClass || > RC == ARM::DPR_8RegisterClass) { > AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) > .addReg(SrcReg, getKillRegState(isKill)) > - .addFrameIndex(FI).addImm(0)); > + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); > } else if (RC == ARM::SPRRegisterClass) { > AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) > .addReg(SrcReg, getKillRegState(isKill)) > - .addFrameIndex(FI).addImm(0)); > + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); > } else { > assert((RC == ARM::QPRRegisterClass || > RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); > // FIXME: Neon instructions should support predicates > BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, > getKillRegState(isKill)) > - .addFrameIndex(FI).addImm(0); > + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); > } > } > > @@ -695,23 +705,31 @@ > const TargetRegisterClass *RC) const { > DebugLoc DL = DebugLoc::getUnknownLoc(); > if (I != MBB.end()) DL = I->getDebugLoc(); > + MachineFunction &MF = *MBB.getParent(); > + MachineFrameInfo &MFI = *MF.getFrameInfo(); > + > + MachineMemOperand *MMO = > + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), > + MachineMemOperand::MOLoad, 0, > + MFI.getObjectSize(FI), > + MFI.getObjectAlignment(FI)); > > if (RC == ARM::GPRRegisterClass) { > AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) > - .addFrameIndex(FI).addReg(0).addImm(0)); > + > .addFrameIndex > (FI).addReg(0).addImm(0).addMemOperand(MMO)); > } else if (RC == ARM::DPRRegisterClass || > RC == ARM::DPR_VFP2RegisterClass || > RC == ARM::DPR_8RegisterClass) { > AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) > - .addFrameIndex(FI).addImm(0)); > + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); > } else if (RC == ARM::SPRRegisterClass) { > AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) > - .addFrameIndex(FI).addImm(0)); > + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); > } else { > assert((RC == ARM::QPRRegisterClass || > RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); > // FIXME: Neon instructions should support predicates > - BuildMI(MBB, I, DL, get(ARM::VLDRQ), > DestReg).addFrameIndex(FI).addImm(0); > + BuildMI(MBB, I, DL, get(ARM::VLDRQ), > DestReg).addFrameIndex(FI).addImm(0).addMemOperand(MMO); > } > } > > > Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=83435&r1=83434&r2=83435&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Oct 6 > 19:06:35 2009 > @@ -1303,17 +1303,20 @@ > SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); > CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); > SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), > - CPAddr, NULL, 0); > + CPAddr, > + > PseudoSourceValue::getConstantPool(), 0); > SDValue Chain = Result.getValue(1); > SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); > Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); > if (!UseGOTOFF) > - Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); > + Result = DAG.getLoad(PtrVT, dl, Chain, Result, > + PseudoSourceValue::getGOT(), 0); > return Result; > } else { > SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); > CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); > - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, > 0); > + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, > + PseudoSourceValue::getConstantPool(), 0); > } > } > > @@ -1360,7 +1363,8 @@ > > ARMPCLabelIndex, PCAdj); > SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); > CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); > - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), > CPAddr, NULL, 0); > + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, > + > PseudoSourceValue::getConstantPool(), 0); > SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); > return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); > } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From jyasskin at google.com Tue Oct 6 19:44:01 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 6 Oct 2009 17:44:01 -0700 Subject: [llvm-commits] [llvm] r83010 - /llvm/trunk/lib/VMCore/AsmWriter.cpp In-Reply-To: <200909282056.n8SKu0ca012458@zion.cs.uiuc.edu> References: <200909282056.n8SKu0ca012458@zion.cs.uiuc.edu> Message-ID: This patch assumes that the Module* parameter to AssemblyWriter() is non-null, but Value::print() in this file can pass NULL. This crashes if you run `opt -instcombine -debug-only=instcombine` on any input where instcombine will replace values. Let me know if you need a more precise test case. On Mon, Sep 28, 2009 at 1:56 PM, Devang Patel wrote: > Author: dpatel > Date: Mon Sep 28 15:56:00 2009 > New Revision: 83010 > > URL: http://llvm.org/viewvc/llvm-project?rev=83010&view=rev > Log: > Do not hardcode metadata names. > > Modified: > llvm/trunk/lib/VMCore/AsmWriter.cpp > > Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AsmWriter.cpp?rev=83010&r1=83009&r2=83010&view=diff > > > ============================================================================== > --- llvm/trunk/lib/VMCore/AsmWriter.cpp (original) > +++ llvm/trunk/lib/VMCore/AsmWriter.cpp Mon Sep 28 15:56:00 2009 > @@ -1266,6 +1266,7 @@ > TypePrinting TypePrinter; > AssemblyAnnotationWriter *AnnotationWriter; > std::vector NumberedTypes; > + DenseMap MDNames; > > public: > inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, > @@ -1273,6 +1274,14 @@ > AssemblyAnnotationWriter *AAW) > : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { > AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M); > + // FIXME: Provide MDPrinter > + Metadata &TheMetadata = M->getContext().getMetadata(); > + const StringMap *Names = TheMetadata.getHandlerNames(); > + for (StringMapConstIterator I = Names->begin(), > + E = Names->end(); I != E; ++I) { > + const StringMapEntry &Entry = *I; > + MDNames[I->second] = Entry.getKeyData(); > + } > } > > void write(const Module *M) { printModule(M); } > @@ -1991,11 +2000,16 @@ > Out << ", align " << cast(I).getAlignment(); > } > > - // Print DebugInfo > + // Print Metadata info > Metadata &TheMetadata = I.getContext().getMetadata(); > - unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); > - if (const MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &I)) > - Out << ", dbg !" << Machine.getMetadataSlot(Dbg); > + const Metadata::MDMapTy *MDMap = TheMetadata.getMDs(&I); > + if (MDMap) > + for (Metadata::MDMapTy::const_iterator MI = MDMap->begin(), > + ME = MDMap->end(); MI != ME; ++MI) > + if (const MDNode *MD = dyn_cast_or_null(MI->second)) > + Out << ", " << MDNames[MI->first] > + << " !" << Machine.getMetadataSlot(MD); > + > printInfoComment(I); > } > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091006/6bb9bde5/attachment.html From asl at math.spbu.ru Tue Oct 6 19:44:51 2009 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Wed, 07 Oct 2009 04:44:51 +0400 Subject: [llvm-commits] [llvm] r83435 - in /llvm/trunk/lib/Target/ARM: ARMBaseInstrInfo.cpp ARMISelLowering.cpp In-Reply-To: <441292C4-16F3-4F3E-9CB2-8ACF73D71D9E@apple.com> References: <200910070006.n9706ZWW029683@zion.cs.uiuc.edu> <441292C4-16F3-4F3E-9CB2-8ACF73D71D9E@apple.com> Message-ID: <1254876291.32230.64.camel@aslstation> Hello, Evan > This patch is not ELF specific, right? InstrInfo part is generic, lowering part touches only ELF-specific code. -- With best regards, Anton Korobeynikov. Faculty of Mathematics & Mechanics, Saint Petersburg State University. From echristo at apple.com Tue Oct 6 19:54:08 2009 From: echristo at apple.com (Eric Christopher) Date: Wed, 07 Oct 2009 00:54:08 -0000 Subject: [llvm-commits] [llvm] r83441 - /llvm/trunk/lib/Transforms/Utils/InlineCost.cpp Message-ID: <200910070054.n970s86s003200@zion.cs.uiuc.edu> Author: echristo Date: Tue Oct 6 19:54:08 2009 New Revision: 83441 URL: http://llvm.org/viewvc/llvm-project?rev=83441&view=rev Log: Add FreeInst to the "is a call" check for Insts that are calls, but not intrinsics. Modified: llvm/trunk/lib/Transforms/Utils/InlineCost.cpp Modified: llvm/trunk/lib/Transforms/Utils/InlineCost.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineCost.cpp?rev=83441&r1=83440&r2=83441&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/InlineCost.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/InlineCost.cpp Tue Oct 6 19:54:08 2009 @@ -135,8 +135,8 @@ NumInsts += 5; } - // This, too, is a call. - if (isa(II)) + // These, too, are calls. + if (isa(II) || isa(II)) NumInsts += 5; if (const AllocaInst *AI = dyn_cast(II)) { From gohman at apple.com Tue Oct 6 22:00:19 2009 From: gohman at apple.com (Dan Gohman) Date: Wed, 07 Oct 2009 03:00:19 -0000 Subject: [llvm-commits] [llvm] r83449 - /llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Message-ID: <200910070300.n9730JZq019452@zion.cs.uiuc.edu> Author: djg Date: Tue Oct 6 22:00:18 2009 New Revision: 83449 URL: http://llvm.org/viewvc/llvm-project?rev=83449&view=rev Log: INTRINSIC_W_CHAIN and INTRINSIC_VOID do not use MemSDNode. They may access memory, but they don't carry a MachineMemOperand. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=83449&r1=83448&r2=83449&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Tue Oct 6 22:00:18 2009 @@ -1599,8 +1599,6 @@ N->getOpcode() == ISD::ATOMIC_LOAD_MAX || N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || - N->getOpcode() == ISD::INTRINSIC_W_CHAIN || - N->getOpcode() == ISD::INTRINSIC_VOID || N->isTargetMemoryOpcode(); } }; From kennethuil at gmail.com Tue Oct 6 21:35:25 2009 From: kennethuil at gmail.com (Kenneth Uildriks) Date: Tue, 6 Oct 2009 21:35:25 -0500 Subject: [llvm-commits] [PATCH] Additions to C-bindings Message-ID: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> This patch adds a few functions to the C bindings to expose use-def chains, function and parameter attribute retrieval, getting const opcode and const values of constants, checking for a global initializer, and ReplaceAllUsesWith. -------------- next part -------------- A non-text attachment was scrubbed... Name: cbindings.patch Type: text/x-patch Size: 7270 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091006/8d32651d/attachment.bin From echristo at apple.com Tue Oct 6 23:50:55 2009 From: echristo at apple.com (Eric Christopher) Date: Tue, 6 Oct 2009 21:50:55 -0700 Subject: [llvm-commits] [PATCH] Additions to C-bindings In-Reply-To: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> References: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> Message-ID: On Oct 6, 2009, at 7:35 PM, Kenneth Uildriks wrote: > This patch adds a few functions to the C bindings to expose use-def > chains, function and parameter attribute retrieval, getting const > opcode and const values of constants, checking for a global > initializer, and ReplaceAllUsesWith. > The C bindings are guaranteed to be fairly stable, is there some reason you need all of these exposed? -eric From baldrick at free.fr Wed Oct 7 01:56:33 2009 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Oct 2009 06:56:33 -0000 Subject: [llvm-commits] [gcc-plugin] r83453 - /gcc-plugin/trunk/TODO Message-ID: <200910070656.n976uXan007143@zion.cs.uiuc.edu> Author: baldrick Date: Wed Oct 7 01:56:32 2009 New Revision: 83453 URL: http://llvm.org/viewvc/llvm-project?rev=83453&view=rev Log: Add a note on that GCC generates tons of debug info when compiling with -g, and it would be good to turn this off. Modified: gcc-plugin/trunk/TODO Modified: gcc-plugin/trunk/TODO URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/TODO?rev=83453&r1=83452&r2=83453&view=diff ============================================================================== --- gcc-plugin/trunk/TODO (original) +++ gcc-plugin/trunk/TODO Wed Oct 7 01:56:32 2009 @@ -36,6 +36,9 @@ Consider using separate caches for types and globals. +Work out how to stop GCC from outputting debug info for global variables +when compiling with -g. The output is all thrown away, so harmless, but it +would be more efficient not to produce any in the first place. Correctness ----------- From baldrick at free.fr Wed Oct 7 02:35:20 2009 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Oct 2009 07:35:20 -0000 Subject: [llvm-commits] [llvm] r83454 - in /llvm/trunk: include/llvm/Type.h lib/VMCore/Type.cpp Message-ID: <200910070735.n977ZKHs005298@zion.cs.uiuc.edu> Author: baldrick Date: Wed Oct 7 02:35:19 2009 New Revision: 83454 URL: http://llvm.org/viewvc/llvm-project?rev=83454&view=rev Log: Make getPointerTo return a const PointerType* rather than an unqualified PointerType* because it seems more correct. Modified: llvm/trunk/include/llvm/Type.h llvm/trunk/lib/VMCore/Type.cpp Modified: llvm/trunk/include/llvm/Type.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Type.h?rev=83454&r1=83453&r2=83454&view=diff ============================================================================== --- llvm/trunk/include/llvm/Type.h (original) +++ llvm/trunk/include/llvm/Type.h Wed Oct 7 02:35:19 2009 @@ -430,7 +430,7 @@ /// getPointerTo - Return a pointer to the current type. This is equivalent /// to PointerType::get(Foo, AddrSpace). - PointerType *getPointerTo(unsigned AddrSpace = 0) const; + const PointerType *getPointerTo(unsigned AddrSpace = 0) const; private: /// isSizedDerivedType - Derived types like structures and arrays are sized Modified: llvm/trunk/lib/VMCore/Type.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Type.cpp?rev=83454&r1=83453&r2=83454&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Type.cpp (original) +++ llvm/trunk/lib/VMCore/Type.cpp Wed Oct 7 02:35:19 2009 @@ -951,7 +951,7 @@ return PT; } -PointerType *Type::getPointerTo(unsigned addrs) const { +const PointerType *Type::getPointerTo(unsigned addrs) const { return PointerType::get(this, addrs); } From baldrick at free.fr Wed Oct 7 03:46:22 2009 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Oct 2009 08:46:22 -0000 Subject: [llvm-commits] [gcc-plugin] r83455 - in /gcc-plugin/trunk: llvm-convert.cpp llvm-debug.cpp llvm-debug.h Message-ID: <200910070846.n978kMX8027271@zion.cs.uiuc.edu> Author: baldrick Date: Wed Oct 7 03:46:21 2009 New Revision: 83455 URL: http://llvm.org/viewvc/llvm-project?rev=83455&view=rev Log: Give LLVM values corresponding to GCC SSA names the same name as they would get in a gcc dump file. Because this fattens up the bitcode considerably it is only turned on if the -fverbose-asm flag is passed to GCC. Modified: gcc-plugin/trunk/llvm-convert.cpp gcc-plugin/trunk/llvm-debug.cpp gcc-plugin/trunk/llvm-debug.h Modified: gcc-plugin/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-convert.cpp?rev=83455&r1=83454&r2=83455&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-convert.cpp (original) +++ gcc-plugin/trunk/llvm-convert.cpp Wed Oct 7 03:46:21 2009 @@ -155,6 +155,51 @@ return align ? align : 1; } +/// NameValue - Try to name the given value after the given GCC tree node. If +/// the GCC tree node has no sensible name then it does nothing. If the value +/// already has a name then it is not changed. +static void NameValue(Value *V, tree t, Twine Prefix = Twine(), + Twine Postfix = Twine()) { + // If the value already has a name, do not change it. + if (V->hasName()) + return; + + // No sensible name - give up, discarding any pre- and post-fixes. + if (!t) + return; + + switch (TREE_CODE(t)) { + default: + // Unhandled case - give up. + return; + + case CONST_DECL: + case FIELD_DECL: + case FUNCTION_DECL: + case NAMESPACE_DECL: + case PARM_DECL: + case VAR_DECL: { + if (DECL_NAME(t)) { + V->setName(Prefix + IDENTIFIER_POINTER(DECL_NAME(t)) + Postfix); + return; + } + const char *Annotation = TREE_CODE(t) == CONST_DECL ? "C." : "D."; + Twine UID(DECL_UID(t)); + V->setName(Prefix + Annotation + UID + Postfix); + return; + } + + case RESULT_DECL: + V->setName(Prefix + "" + Postfix); + return; + + case SSA_NAME: + Twine NameVersion(SSA_NAME_VERSION(t)); + NameValue(V, SSA_NAME_VAR(t), Prefix, "_" + NameVersion + Postfix); + return; + } +} + //===----------------------------------------------------------------------===// // ... High-Level Methods ... //===----------------------------------------------------------------------===// @@ -956,6 +1001,8 @@ assert(TREE_CODE(name) == SSA_NAME && "PHI result not an SSA name!"); assert(SSANames.find(name) == SSANames.end() && "Multiply defined SSA name!"); + if (flag_verbose_asm) + NameValue(PHI, name); SSANames[name] = PHI; // The phi operands will be populated later - remember the phi node. @@ -1695,22 +1742,13 @@ Alignment = DECL_ALIGN(decl) / 8; } - const char *Name; // Name of variable - if (DECL_NAME(decl)) - Name = IDENTIFIER_POINTER(DECL_NAME(decl)); - else if (TREE_CODE(decl) == RESULT_DECL) - Name = "retval"; - else - Name = ""; - // Insert an alloca for this variable. AllocaInst *AI; - if (!Size) { // Fixed size alloca -> entry block. + if (!Size) // Fixed size alloca -> entry block. AI = CreateTemporary(Ty); - AI->setName(Name); - } else { - AI = Builder.CreateAlloca(Ty, Size, Name); - } + else + AI = Builder.CreateAlloca(Ty, Size); + NameValue(AI, decl); AI->setAlignment(Alignment); @@ -1734,11 +1772,11 @@ if (TheDebugInfo) { if (DECL_NAME(decl)) { TheDebugInfo->EmitDeclare(decl, dwarf::DW_TAG_auto_variable, - Name, TREE_TYPE(decl), AI, + AI->getName(), TREE_TYPE(decl), AI, Builder.GetInsertBlock()); } else if (TREE_CODE(decl) == RESULT_DECL) { TheDebugInfo->EmitDeclare(decl, dwarf::DW_TAG_return_variable, - Name, TREE_TYPE(decl), AI, + AI->getName(), TREE_TYPE(decl), AI, Builder.GetInsertBlock()); } } @@ -2136,21 +2174,19 @@ unsigned Alignment = DECL_ALIGN(var); assert(Alignment != 0 && "Parameter with unknown alignment!"); - const char *ParameterName = - DECL_NAME(var) ? IDENTIFIER_POINTER(DECL_NAME(var)) : "anon"; - const Type *Ty = ConvertType(TREE_TYPE(reg)); // Perform the load in the entry block, after all parameters have been set up // with their initial values, and before any modifications to their values. - LoadInst *LI = new LoadInst(DECL_LOCAL_IF_SET(var), ParameterName, - SSAInsertionPoint); + LoadInst *LI = new LoadInst(DECL_LOCAL_IF_SET(var), "", SSAInsertionPoint); LI->setAlignment(Alignment); // Potentially perform a useless type conversion (useless_type_conversion_p). Value *Def = LI; if (LI->getType() != Ty) Def = new BitCastInst(Def, Ty, "", SSAInsertionPoint); + if (flag_verbose_asm) + NameValue(Def, reg); return SSANames[reg] = Def; } @@ -8050,6 +8086,8 @@ // If this is the definition of an ssa name, record it in the SSANames map. if (TREE_CODE(lhs) == SSA_NAME) { assert(SSANames.find(lhs) == SSANames.end() &&"Multiply defined SSA name!"); + if (flag_verbose_asm) + NameValue(RHS, lhs); SSANames[lhs] = RHS; return; } Modified: gcc-plugin/trunk/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-debug.cpp?rev=83455&r1=83454&r2=83455&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-debug.cpp (original) +++ gcc-plugin/trunk/llvm-debug.cpp Wed Oct 7 03:46:21 2009 @@ -309,7 +309,7 @@ /// EmitDeclare - Constructs the debug code for allocation of a new variable. /// region - "llvm.dbg.declare." -void DebugInfo::EmitDeclare(tree decl, unsigned Tag, const char *Name, +void DebugInfo::EmitDeclare(tree decl, unsigned Tag, StringRef Name, tree type, Value *AI, BasicBlock *CurBB) { // Do not emit variable declaration info, for now. Modified: gcc-plugin/trunk/llvm-debug.h URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-debug.h?rev=83455&r1=83454&r2=83455&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-debug.h (original) +++ gcc-plugin/trunk/llvm-debug.h Wed Oct 7 03:46:21 2009 @@ -90,7 +90,7 @@ /// EmitDeclare - Constructs the debug code for allocation of a new variable. /// region - "llvm.dbg.declare." - void EmitDeclare(tree_node *decl, unsigned Tag, const char *Name, + void EmitDeclare(tree_node *decl, unsigned Tag, StringRef Name, tree_node *type, Value *AI, BasicBlock *CurBB); From baldrick at free.fr Wed Oct 7 03:59:24 2009 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Oct 2009 08:59:24 -0000 Subject: [llvm-commits] [gcc-plugin] r83456 - /gcc-plugin/trunk/README Message-ID: <200910070859.n978xOq0028885@zion.cs.uiuc.edu> Author: baldrick Date: Wed Oct 7 03:59:24 2009 New Revision: 83456 URL: http://llvm.org/viewvc/llvm-project?rev=83456&view=rev Log: Advertise -fverbose-asm more. Modified: gcc-plugin/trunk/README Modified: gcc-plugin/trunk/README URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/README?rev=83456&r1=83455&r2=83456&view=diff ============================================================================== --- gcc-plugin/trunk/README (original) +++ gcc-plugin/trunk/README Wed Oct 7 03:59:24 2009 @@ -59,7 +59,12 @@ Output LLVM IR rather than target assembler. You need to use -S with this, since otherwise GCC will pass the output to the system assembler (these don't usually understand LLVM IR). It would be nice to fix this and have the option - work with -c too but it's not clear how. + work with -c too but it's not clear how. If you plan to read the IR then you + probably want to use the -fverbose-asm flag as well (see below). + +-fverbose-asm + Annotate the target assembler with helpful comments. Turns on the generation + of helpful names (the same as in GCC tree dumps) in the LLVM IR. -fstats Output both LLVM and GCC statistics. @@ -67,10 +72,6 @@ -ftime-report Output both LLVM and GCC timing information. --fverbose-asm - Annotate the target assembler with helpful comments. Gives values helpful - names in the LLVM IR. - -fno-ident If the ident global asm in the LLVM IR annoys you, use this to turn it off. From edwintorok at gmail.com Wed Oct 7 04:22:55 2009 From: edwintorok at gmail.com (Torok Edwin) Date: Wed, 07 Oct 2009 09:22:55 -0000 Subject: [llvm-commits] [llvm] r83457 - /llvm/trunk/lib/Analysis/MallocHelper.cpp Message-ID: <200910070922.n979MtJu032325@zion.cs.uiuc.edu> Author: edwin Date: Wed Oct 7 04:22:55 2009 New Revision: 83457 URL: http://llvm.org/viewvc/llvm-project?rev=83457&view=rev Log: Add PR to this FIXME, looks like I didn't commit this change after all. Modified: llvm/trunk/lib/Analysis/MallocHelper.cpp Modified: llvm/trunk/lib/Analysis/MallocHelper.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MallocHelper.cpp?rev=83457&r1=83456&r2=83457&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/MallocHelper.cpp (original) +++ llvm/trunk/lib/Analysis/MallocHelper.cpp Wed Oct 7 04:22:55 2009 @@ -40,7 +40,8 @@ return false; // Check malloc prototype. - // FIXME: this will be obsolete when nobuiltin attribute will exist. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. const FunctionType *FTy = MallocFunc->getFunctionType(); if (FTy->getNumParams() != 1) return false; From edwintorok at gmail.com Wed Oct 7 04:23:56 2009 From: edwintorok at gmail.com (Torok Edwin) Date: Wed, 07 Oct 2009 09:23:56 -0000 Subject: [llvm-commits] [llvm] r83458 - /llvm/trunk/include/llvm/ADT/DenseMap.h Message-ID: <200910070923.n979NuLv032463@zion.cs.uiuc.edu> Author: edwin Date: Wed Oct 7 04:23:56 2009 New Revision: 83458 URL: http://llvm.org/viewvc/llvm-project?rev=83458&view=rev Log: Add a comment explaining how DenseMap::insert works, because it is not intuitive. It does NOT update the value if the key is already in the map, it also returns false if the key is already in the map, regardless if the value matched. Modified: llvm/trunk/include/llvm/ADT/DenseMap.h Modified: llvm/trunk/include/llvm/ADT/DenseMap.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/DenseMap.h?rev=83458&r1=83457&r2=83458&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/DenseMap.h (original) +++ llvm/trunk/include/llvm/ADT/DenseMap.h Wed Oct 7 04:23:56 2009 @@ -145,6 +145,9 @@ return ValueT(); } + // Inserts key,value pair into the map if the key isn't already in the map. + // If the key is already in the map, it returns false and doesn't update the + // value. std::pair insert(const std::pair &KV) { BucketT *TheBucket; if (LookupBucketFor(KV.first, TheBucket)) From baldrick at free.fr Wed Oct 7 04:57:42 2009 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Oct 2009 09:57:42 -0000 Subject: [llvm-commits] [gcc-plugin] r83459 - /gcc-plugin/trunk/llvm-convert.cpp Message-ID: <200910070957.n979vgqT004252@zion.cs.uiuc.edu> Author: baldrick Date: Wed Oct 7 04:57:41 2009 New Revision: 83459 URL: http://llvm.org/viewvc/llvm-project?rev=83459&view=rev Log: When not doing verbose naming, give basic blocks more useful names: if the GCC basic block is name the LLVM basic block "8" etc. This is better than calling everything "bb". Also, since the length of identifiers is known, make use of this. Modified: gcc-plugin/trunk/llvm-convert.cpp Modified: gcc-plugin/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-convert.cpp?rev=83459&r1=83458&r2=83459&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-convert.cpp (original) +++ gcc-plugin/trunk/llvm-convert.cpp Wed Oct 7 04:57:41 2009 @@ -180,7 +180,9 @@ case PARM_DECL: case VAR_DECL: { if (DECL_NAME(t)) { - V->setName(Prefix + IDENTIFIER_POINTER(DECL_NAME(t)) + Postfix); + StringRef Ident(IDENTIFIER_POINTER(DECL_NAME(t)), + IDENTIFIER_LENGTH(DECL_NAME(t))); + V->setName(Prefix + Ident + Postfix); return; } const char *Annotation = TREE_CODE(t) == CONST_DECL ? "C." : "D."; @@ -941,7 +943,8 @@ tree label = gimple_label_label(stmt); if (tree name = DECL_NAME(label)) { // If the label has a name then use it. - BB->setName(IDENTIFIER_POINTER(name)); + StringRef Ident(IDENTIFIER_POINTER(name), IDENTIFIER_LENGTH(name)); + BB->setName(Ident); } else if (LABEL_DECL_UID(label) != -1) { // If the label has a UID then use it. Twine UID(LABEL_DECL_UID(label)); @@ -957,7 +960,8 @@ BB->setName(""); } } else { - BB->setName("bb"); + Twine Index(bb->index); + BB->setName(Index); } return BasicBlocks[bb] = BB; From kennethuil at gmail.com Wed Oct 7 06:52:52 2009 From: kennethuil at gmail.com (Kenneth Uildriks) Date: Wed, 7 Oct 2009 06:52:52 -0500 Subject: [llvm-commits] [PATCH] Additions to C-bindings In-Reply-To: References: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> Message-ID: <400d33ea0910070452j1fd69311n8caa49e79d3c7967@mail.gmail.com> On Tue, Oct 6, 2009 at 11:50 PM, Eric Christopher wrote: > > On Oct 6, 2009, at 7:35 PM, Kenneth Uildriks wrote: > >> This patch adds a few functions to the C bindings to expose use-def >> chains, function and parameter attribute retrieval, getting const >> opcode and const values of constants, checking for a global >> initializer, and ReplaceAllUsesWith. >> > > The C bindings are guaranteed to be fairly stable, is there some reason you > need all of these exposed? > > -eric > I wanted to call them from generated code and from my language. From kennethuil at gmail.com Wed Oct 7 08:06:36 2009 From: kennethuil at gmail.com (Kenneth Uildriks) Date: Wed, 7 Oct 2009 08:06:36 -0500 Subject: [llvm-commits] [PATCH] Additions to C-bindings In-Reply-To: <400d33ea0910070452j1fd69311n8caa49e79d3c7967@mail.gmail.com> References: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> <400d33ea0910070452j1fd69311n8caa49e79d3c7967@mail.gmail.com> Message-ID: <400d33ea0910070606t4d3fbf61nd07c20263cb80e4c@mail.gmail.com> On Wed, Oct 7, 2009 at 6:52 AM, Kenneth Uildriks wrote: > On Tue, Oct 6, 2009 at 11:50 PM, Eric Christopher wrote: >> >> On Oct 6, 2009, at 7:35 PM, Kenneth Uildriks wrote: >> >>> This patch adds a few functions to the C bindings to expose use-def >>> chains, function and parameter attribute retrieval, getting const >>> opcode and const values of constants, checking for a global >>> initializer, and ReplaceAllUsesWith. >>> >> >> The C bindings are guaranteed to be fairly stable, is there some reason you >> need all of these exposed? >> >> -eric >> > > I wanted to call them from generated code and from my language. > Not only do I want to end up with a self-hosting compiler, but the design of my compiler requires that much of the code generation code is actually generated and JITted when the compiler is running. This code generation code need to call a C interface to do its work. From baldrick at free.fr Wed Oct 7 09:10:44 2009 From: baldrick at free.fr (Duncan Sands) Date: Wed, 07 Oct 2009 14:10:44 -0000 Subject: [llvm-commits] [gcc-plugin] r83460 - /gcc-plugin/trunk/llvm-types.cpp Message-ID: <200910071410.n97EAiol004191@zion.cs.uiuc.edu> Author: baldrick Date: Wed Oct 7 09:10:43 2009 New Revision: 83460 URL: http://llvm.org/viewvc/llvm-project?rev=83460&view=rev Log: Change the type naming scheme to something closer to what GCC does. Also, give all types names rather than just a select few. Modified: gcc-plugin/trunk/llvm-types.cpp Modified: gcc-plugin/trunk/llvm-types.cpp URL: http://llvm.org/viewvc/llvm-project/gcc-plugin/trunk/llvm-types.cpp?rev=83460&r1=83459&r2=83460&view=diff ============================================================================== --- gcc-plugin/trunk/llvm-types.cpp (original) +++ gcc-plugin/trunk/llvm-types.cpp Wed Oct 7 09:10:43 2009 @@ -199,60 +199,72 @@ TREE_CODE(TYPE_SIZE(Type)) != INTEGER_CST; } -/// GetTypeName - Return a fully qualified (with namespace prefixes) name for -/// the specified type. -static std::string GetTypeName(const char *Prefix, tree type) { - const char *Name = "anon"; - if (TYPE_NAME(type)) { - if (TREE_CODE(TYPE_NAME(type)) == IDENTIFIER_NODE) - Name = IDENTIFIER_POINTER(TYPE_NAME(type)); - else if (DECL_NAME(TYPE_NAME(type))) - Name = IDENTIFIER_POINTER(DECL_NAME(TYPE_NAME(type))); - } - - std::string ContextStr; - tree Context = TYPE_CONTEXT(type); - while (Context) { - switch (TREE_CODE(Context)) { - case TRANSLATION_UNIT_DECL: Context = 0; break; // Done. - case RECORD_TYPE: - case NAMESPACE_DECL: - if (TREE_CODE(Context) == RECORD_TYPE) { - if (TYPE_NAME(Context)) { - std::string NameFrag; - if (TREE_CODE(TYPE_NAME(Context)) == IDENTIFIER_NODE) { - NameFrag = IDENTIFIER_POINTER(TYPE_NAME(Context)); - } else { - NameFrag = IDENTIFIER_POINTER(DECL_NAME(TYPE_NAME(Context))); - } +/// NameType - Try to name the given type after the given GCC tree node. If +/// the GCC tree node has no sensible name then it does nothing. +static void NameType(const Type *Ty, tree t, Twine Prefix = Twine(), + Twine Postfix = Twine()) { + // No sensible name - give up, discarding any pre- and post-fixes. + if (!t) + return; - ContextStr = NameFrag + "::" + ContextStr; - Context = TYPE_CONTEXT(Context); - break; - } - // Anonymous record, fall through. - } else if (DECL_NAME(Context) - /*&& DECL_NAME(Context) != anonymous_namespace_name*/){ - assert(TREE_CODE(DECL_NAME(Context)) == IDENTIFIER_NODE); - std::string NamespaceName = IDENTIFIER_POINTER(DECL_NAME(Context)); - ContextStr = NamespaceName + "::" + ContextStr; - Context = DECL_CONTEXT(Context); - break; + switch (TREE_CODE(t)) { + default: + // Unhandled case - give up. + return; + + case ARRAY_TYPE: + // If the element type is E, name the array E[] (regardless of the number + // of dimensions). + for (; TREE_CODE(t) == ARRAY_TYPE; t = TREE_TYPE(t)) ; + NameType(Ty, t, Prefix, "[]" + Postfix); + return; + + case BOOLEAN_TYPE: + case COMPLEX_TYPE: + case ENUMERAL_TYPE: + case FIXED_POINT_TYPE: + case FUNCTION_TYPE: + case INTEGER_TYPE: + case METHOD_TYPE: + case QUAL_UNION_TYPE: + case REAL_TYPE: + case RECORD_TYPE: + case UNION_TYPE: + case VECTOR_TYPE: { + // If the type has a name then use that, otherwise bail out. + if (!TYPE_NAME(t)) + return; // Unnamed type. + + tree identifier = NULL_TREE; + if (TREE_CODE(TYPE_NAME(t)) == IDENTIFIER_NODE) + identifier = TYPE_NAME(t); + else if (TREE_CODE(TYPE_NAME(t)) == TYPE_DECL) + identifier = DECL_NAME(TYPE_NAME(t)); + + if (identifier) { + const char *Class = ""; + if (TREE_CODE(t) == ENUMERAL_TYPE) + Class = "enum "; + if (TREE_CODE(t) == RECORD_TYPE) + Class = "struct "; + else if (TREE_CODE(t) == UNION_TYPE) + Class = "union "; + StringRef Ident(IDENTIFIER_POINTER(identifier), + IDENTIFIER_LENGTH(identifier)); + TheModule->addTypeName((Prefix + Class + Ident + Postfix).str(), Ty); } - // FALL THROUGH for anonymous namespaces and records! - - default: { - // If this is a structure type defined inside of a function or other block - // scope, make sure to make the type name unique by putting a unique ID - // in it. - static unsigned UniqueID = 0; - ContextStr = "." + utostr(UniqueID++); - Context = 0; // Stop looking at context - break; + return; } - } - } - return Prefix + ContextStr + Name; + + case POINTER_TYPE: + // If the element type is E, LLVM already calls this E*. + return; + + case REFERENCE_TYPE: + // If the element type is E, name the reference E&. + NameType(Ty, TREE_TYPE(t), Prefix, "&" + Postfix); + return; + } } /// isSequentialCompatible - Return true if the specified gcc array or pointer @@ -634,170 +646,190 @@ const Type *TypeConverter::ConvertType(tree orig_type) { if (orig_type == error_mark_node) return Type::getInt32Ty(Context); - + // LLVM doesn't care about variants such as const, volatile, or restrict. tree type = TYPE_MAIN_VARIANT(orig_type); + const Type *Ty; switch (TREE_CODE(type)) { default: - fprintf(stderr, "Unknown type to convert:\n"); debug_tree(type); - abort(); - case VOID_TYPE: return SET_TYPE_LLVM(type, Type::getVoidTy(Context)); - case RECORD_TYPE: return ConvertRECORD(type, orig_type); + llvm_unreachable("Unknown type to convert!"); + + case VOID_TYPE: + Ty = SET_TYPE_LLVM(type, Type::getVoidTy(Context)); + break; + + case RECORD_TYPE: + Ty = ConvertRECORD(type, orig_type); + break; + case QUAL_UNION_TYPE: - case UNION_TYPE: return ConvertUNION(type, orig_type); + case UNION_TYPE: + Ty = ConvertUNION(type, orig_type); + break; + case BOOLEAN_TYPE: { - if (const Type *Ty = GET_TYPE_LLVM(type)) + if ((Ty = GET_TYPE_LLVM(type))) return Ty; - return SET_TYPE_LLVM(type, IntegerType::get(Context, TYPE_PRECISION(type))); + Ty = SET_TYPE_LLVM(type, IntegerType::get(Context, TYPE_PRECISION(type))); + break; } + case ENUMERAL_TYPE: // Use of an enum that is implicitly declared? if (TYPE_SIZE(orig_type) == 0) { // If we already compiled this type, use the old type. - if (const Type *Ty = GET_TYPE_LLVM(orig_type)) + if ((Ty = GET_TYPE_LLVM(orig_type))) return Ty; - const Type *Ty = OpaqueType::get(Context); - TheModule->addTypeName(GetTypeName("enum.", orig_type), Ty); - return TypeDB.setType(orig_type, Ty); + Ty = OpaqueType::get(Context); + Ty = TypeDB.setType(orig_type, Ty); + break; } // FALL THROUGH. type = orig_type; case INTEGER_TYPE: { - if (const Type *Ty = GET_TYPE_LLVM(type)) return Ty; + if ((Ty = GET_TYPE_LLVM(type))) return Ty; // The ARM port defines __builtin_neon_xi as a 511-bit type because GCC's // type precision field has only 9 bits. Treat this as a special case. int precision = TYPE_PRECISION(type) == 511 ? 512 : TYPE_PRECISION(type); - return SET_TYPE_LLVM(type, IntegerType::get(Context, precision)); + Ty = SET_TYPE_LLVM(type, IntegerType::get(Context, precision)); + break; } + case REAL_TYPE: - if (const Type *Ty = GET_TYPE_LLVM(type)) return Ty; + if ((Ty = GET_TYPE_LLVM(type))) return Ty; switch (TYPE_PRECISION(type)) { default: - fprintf(stderr, "Unknown FP type!\n"); debug_tree(type); - abort(); - case 32: return SET_TYPE_LLVM(type, Type::getFloatTy(Context)); - case 64: return SET_TYPE_LLVM(type, Type::getDoubleTy(Context)); - case 80: return SET_TYPE_LLVM(type, Type::getX86_FP80Ty(Context)); + llvm_unreachable("Unknown FP type!"); + case 32: Ty = SET_TYPE_LLVM(type, Type::getFloatTy(Context)); break; + case 64: Ty = SET_TYPE_LLVM(type, Type::getDoubleTy(Context)); break; + case 80: Ty = SET_TYPE_LLVM(type, Type::getX86_FP80Ty(Context)); break; case 128: #ifdef TARGET_POWERPC - return SET_TYPE_LLVM(type, Type::getPPC_FP128Ty(Context)); + Ty = SET_TYPE_LLVM(type, Type::getPPC_FP128Ty(Context)); #elif defined(TARGET_ZARCH) || defined(TARGET_CPU_sparc) // FIXME: Use some generic define. // This is for IEEE double extended, e.g. Sparc - return SET_TYPE_LLVM(type, Type::getFP128Ty(Context)); + Ty = SET_TYPE_LLVM(type, Type::getFP128Ty(Context)); #else // 128-bit long doubles map onto { double, double }. - return SET_TYPE_LLVM(type, - StructType::get(Context, Type::getDoubleTy(Context), - Type::getDoubleTy(Context), NULL)); + Ty = SET_TYPE_LLVM(type, + StructType::get(Context, Type::getDoubleTy(Context), + Type::getDoubleTy(Context), NULL)); #endif + break; } - + break; + case COMPLEX_TYPE: { - if (const Type *Ty = GET_TYPE_LLVM(type)) return Ty; - const Type *Ty = ConvertType(TREE_TYPE(type)); + if ((Ty = GET_TYPE_LLVM(type))) return Ty; + Ty = ConvertType(TREE_TYPE(type)); assert(!Ty->isAbstract() && "should use TypeDB.setType()"); Ty = StructType::get(Context, Ty, Ty, NULL); - TheModule->addTypeName(GetTypeName("cpx.", orig_type), Ty); - return SET_TYPE_LLVM(type, Ty); + Ty = SET_TYPE_LLVM(type, Ty); + break; } + case VECTOR_TYPE: { - if (const Type *Ty = GET_TYPE_LLVM(type)) return Ty; - const Type *Ty = ConvertType(TREE_TYPE(type)); + if ((Ty = GET_TYPE_LLVM(type))) return Ty; + Ty = ConvertType(TREE_TYPE(type)); assert(!Ty->isAbstract() && "should use TypeDB.setType()"); Ty = VectorType::get(Ty, TYPE_VECTOR_SUBPARTS(type)); - return SET_TYPE_LLVM(type, Ty); + Ty = SET_TYPE_LLVM(type, Ty); + break; } - + case POINTER_TYPE: case REFERENCE_TYPE: - if (const PointerType *Ty = cast_or_null(GET_TYPE_LLVM(type))){ + if (const PointerType *PTy = cast_or_null(GET_TYPE_LLVM(type))){ // We already converted this type. If this isn't a case where we have to // reparse it, just return it. if (PointersToReresolve.empty() || PointersToReresolve.back() != type || ConvertingStruct) - return Ty; - + return PTy; + // Okay, we know that we're !ConvertingStruct and that type is on the end // of the vector. Remove this entry from the PointersToReresolve list and // get the pointee type. Note that this order is important in case the // pointee type uses this pointer. - assert(isa(Ty->getElementType()) && "Not a deferred ref!"); - + assert(isa(PTy->getElementType()) && "Not a deferred ref!"); + // We are actively resolving this pointer. We want to pop this value from // the stack, as we are no longer resolving it. However, we don't want to // make it look like we are now resolving the previous pointer on the // stack, so pop this value and push a null. PointersToReresolve.back() = 0; - - + + // Do not do any nested resolution. We know that there is a higher-level // loop processing deferred pointers, let it handle anything new. ConvertingStruct = true; - - // Note that we know that Ty cannot be resolved or invalidated here. + + // Note that we know that PTy cannot be resolved or invalidated here. const Type *Actual = ConvertType(TREE_TYPE(type)); - assert(GET_TYPE_LLVM(type) == Ty && "Pointer invalidated!"); + assert(GET_TYPE_LLVM(type) == PTy && "Pointer invalidated!"); // Restore ConvertingStruct for the caller. ConvertingStruct = false; - + if (Actual->isVoidTy()) Actual = Type::getInt8Ty(Context); // void* -> sbyte* - + // Update the type, potentially updating TYPE_LLVM(type). - const OpaqueType *OT = cast(Ty->getElementType()); + const OpaqueType *OT = cast(PTy->getElementType()); const_cast(OT)->refineAbstractTypeTo(Actual); - return GET_TYPE_LLVM(type); + Ty = GET_TYPE_LLVM(type); + break; } else { - const Type *Ty; - // If we are converting a struct, and if we haven't converted the pointee // type, add this pointer to PointersToReresolve and return an opaque*. if (ConvertingStruct) { - // If the pointee type has not already been converted to LLVM, create + // If the pointee type has not already been converted to LLVM, create // a new opaque type and remember it in the database. Ty = GET_TYPE_LLVM(TYPE_MAIN_VARIANT(TREE_TYPE(type))); if (Ty == 0) { PointersToReresolve.push_back(type); - return TypeDB.setType(type, - PointerType::getUnqual(OpaqueType::get(Context))); + Ty = TypeDB.setType(type, + PointerType::getUnqual(OpaqueType::get(Context))); + break; } - // A type has already been computed. However, this may be some sort of - // recursive struct. We don't want to call ConvertType on it, because - // this will try to resolve it, and not adding the type to the - // PointerToReresolve collection is just an optimization. Instead, - // we'll use the type returned by GET_TYPE_LLVM directly, even if this + // A type has already been computed. However, this may be some sort of + // recursive struct. We don't want to call ConvertType on it, because + // this will try to resolve it, and not adding the type to the + // PointerToReresolve collection is just an optimization. Instead, + // we'll use the type returned by GET_TYPE_LLVM directly, even if this // may be resolved further in the future. } else { - // If we're not in a struct, just call ConvertType. If it has already - // been converted, this will return the precomputed value, otherwise + // If we're not in a struct, just call ConvertType. If it has already + // been converted, this will return the precomputed value, otherwise // this will compute and return the new type. Ty = ConvertType(TREE_TYPE(type)); } - + if (Ty->isVoidTy()) Ty = Type::getInt8Ty(Context); // void* -> sbyte* - return TypeDB.setType(type, Ty->getPointerTo()); + Ty = TypeDB.setType(type, Ty->getPointerTo()); + break; } - + case METHOD_TYPE: case FUNCTION_TYPE: { - if (const Type *Ty = GET_TYPE_LLVM(type)) + if ((Ty = GET_TYPE_LLVM(type))) return Ty; - + // No declaration to pass through, passing NULL. CallingConv::ID CallingConv; AttrListPtr PAL; - return TypeDB.setType(type, ConvertFunctionType(type, NULL, NULL, - CallingConv, PAL)); + Ty = TypeDB.setType(type, ConvertFunctionType(type, NULL, NULL, + CallingConv, PAL)); + break; } + case ARRAY_TYPE: { - if (const Type *Ty = GET_TYPE_LLVM(type)) + if ((Ty = GET_TYPE_LLVM(type))) return Ty; uint64_t ElementSize; @@ -844,18 +876,23 @@ NumElements /= ElementSize; } - return TypeDB.setType(type, ArrayType::get(ElementTy, NumElements)); + Ty = TypeDB.setType(type, ArrayType::get(ElementTy, NumElements)); + break; } + case OFFSET_TYPE: // Handle OFFSET_TYPE specially. This is used for pointers to members, // which are really just integer offsets. As such, return the appropriate // integer directly. switch (getTargetData().getPointerSize()) { default: assert(0 && "Unknown pointer size!"); - case 4: return Type::getInt32Ty(Context); - case 8: return Type::getInt64Ty(Context); + case 4: Ty = Type::getInt32Ty(Context); break; + case 8: Ty = Type::getInt64Ty(Context); break; } } + + NameType(Ty, orig_type); + return Ty; } //===----------------------------------------------------------------------===// @@ -1847,7 +1884,6 @@ if (TYPE_SIZE(type) == 0) { // Forward declaration? const Type *Ty = OpaqueType::get(Context); - TheModule->addTypeName(GetTypeName("struct.", orig_type), Ty); return TypeDB.setType(type, Ty); } @@ -1974,10 +2010,6 @@ if (OldTy) const_cast(OldTy)->refineAbstractTypeTo(ResultTy); - // Finally, set the name for the type. - TheModule->addTypeName(GetTypeName("struct.", orig_type), - GET_TYPE_LLVM(type)); - // We have finished converting this struct. See if the is the outer-most // struct being converted by ConvertType. ConvertingStruct = OldConvertingStruct; @@ -2014,7 +2046,6 @@ if (TYPE_SIZE(type) == 0) { // Forward declaraion? const Type *Ty = OpaqueType::get(Context); - TheModule->addTypeName(GetTypeName("union.", orig_type), Ty); return TypeDB.setType(type, Ty); } @@ -2156,10 +2187,6 @@ if (OldTy) const_cast(OldTy)->refineAbstractTypeTo(ResultTy); - // Finally, set the name for the type. - TheModule->addTypeName(GetTypeName("struct.", orig_type), - GET_TYPE_LLVM(type)); - // We have finished converting this union. See if the is the outer-most // union being converted by ConvertType. ConvertingStruct = OldConvertingStruct; From stuart at apple.com Wed Oct 7 11:11:19 2009 From: stuart at apple.com (Stuart Hastings) Date: Wed, 07 Oct 2009 16:11:19 -0000 Subject: [llvm-commits] [llvm] r83461 - /llvm/tags/Apple/llvmCore-2311.1/ Message-ID: <200910071611.n97GBJkA020225@zion.cs.uiuc.edu> Author: stuart Date: Wed Oct 7 11:11:18 2009 New Revision: 83461 URL: http://llvm.org/viewvc/llvm-project?rev=83461&view=rev Log: llvmCore-2311.1 Added: llvm/tags/Apple/llvmCore-2311.1/ - copied from r83460, llvm/tags/Apple/llvmCore-2311/ From dpatel at apple.com Wed Oct 7 11:37:55 2009 From: dpatel at apple.com (Devang Patel) Date: Wed, 07 Oct 2009 16:37:55 -0000 Subject: [llvm-commits] [llvm] r83462 - /llvm/trunk/lib/VMCore/AsmWriter.cpp Message-ID: <200910071637.n97GbtBd023781@zion.cs.uiuc.edu> Author: dpatel Date: Wed Oct 7 11:37:55 2009 New Revision: 83462 URL: http://llvm.org/viewvc/llvm-project?rev=83462&view=rev Log: Do not assume that the module is set. Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AsmWriter.cpp?rev=83462&r1=83461&r2=83462&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/AsmWriter.cpp (original) +++ llvm/trunk/lib/VMCore/AsmWriter.cpp Wed Oct 7 11:37:55 2009 @@ -1303,12 +1303,14 @@ : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M); // FIXME: Provide MDPrinter - MetadataContext &TheMetadata = M->getContext().getMetadata(); - const StringMap *Names = TheMetadata.getHandlerNames(); - for (StringMapConstIterator I = Names->begin(), - E = Names->end(); I != E; ++I) { - const StringMapEntry &Entry = *I; - MDNames[I->second] = Entry.getKeyData(); + if (M) { + MetadataContext &TheMetadata = M->getContext().getMetadata(); + const StringMap *Names = TheMetadata.getHandlerNames(); + for (StringMapConstIterator I = Names->begin(), + E = Names->end(); I != E; ++I) { + const StringMapEntry &Entry = *I; + MDNames[I->second] = Entry.getKeyData(); + } } } @@ -2029,15 +2031,16 @@ } // Print Metadata info - MetadataContext &TheMetadata = I.getContext().getMetadata(); - const MetadataContext::MDMapTy *MDMap = TheMetadata.getMDs(&I); - if (MDMap) - for (MetadataContext::MDMapTy::const_iterator MI = MDMap->begin(), - ME = MDMap->end(); MI != ME; ++MI) - if (const MDNode *MD = dyn_cast_or_null(MI->second)) - Out << ", !" << MDNames[MI->first] - << " !" << Machine.getMetadataSlot(MD); - + if (!MDNames.empty()) { + MetadataContext &TheMetadata = I.getContext().getMetadata(); + const MetadataContext::MDMapTy *MDMap = TheMetadata.getMDs(&I); + if (MDMap) + for (MetadataContext::MDMapTy::const_iterator MI = MDMap->begin(), + ME = MDMap->end(); MI != ME; ++MI) + if (const MDNode *MD = dyn_cast_or_null(MI->second)) + Out << ", !" << MDNames[MI->first] + << " !" << Machine.getMetadataSlot(MD); + } printInfoComment(I); } From dpatel at apple.com Wed Oct 7 11:38:39 2009 From: dpatel at apple.com (Devang Patel) Date: Wed, 7 Oct 2009 09:38:39 -0700 Subject: [llvm-commits] [llvm] r83010 - /llvm/trunk/lib/VMCore/AsmWriter.cpp In-Reply-To: References: <200909282056.n8SKu0ca012458@zion.cs.uiuc.edu> Message-ID: On Oct 6, 2009, at 5:44 PM, Jeffrey Yasskin wrote: > This patch assumes that the Module* parameter to AssemblyWriter() is > non-null, but Value::print() in this file can pass NULL. This > crashes if you run `opt -instcombine -debug-only=instcombine` on any > input where instcombine will replace values. Yup. Fixed in r83462. - Devang > Let me know if you need a more precise test case. > > On Mon, Sep 28, 2009 at 1:56 PM, Devang Patel > wrote: > Author: dpatel > Date: Mon Sep 28 15:56:00 2009 > New Revision: 83010 > > URL: http://llvm.org/viewvc/llvm-project?rev=83010&view=rev > Log: > Do not hardcode metadata names. > > Modified: > llvm/trunk/lib/VMCore/AsmWriter.cpp > > Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AsmWriter.cpp?rev=83010&r1=83009&r2=83010&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/VMCore/AsmWriter.cpp (original) > +++ llvm/trunk/lib/VMCore/AsmWriter.cpp Mon Sep 28 15:56:00 2009 > @@ -1266,6 +1266,7 @@ > TypePrinting TypePrinter; > AssemblyAnnotationWriter *AnnotationWriter; > std::vector NumberedTypes; > + DenseMap MDNames; > > public: > inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, > @@ -1273,6 +1274,14 @@ > AssemblyAnnotationWriter *AAW) > : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { > AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M); > + // FIXME: Provide MDPrinter > + Metadata &TheMetadata = M->getContext().getMetadata(); > + const StringMap *Names = TheMetadata.getHandlerNames(); > + for (StringMapConstIterator I = Names->begin(), > + E = Names->end(); I != E; ++I) { > + const StringMapEntry &Entry = *I; > + MDNames[I->second] = Entry.getKeyData(); > + } > } > > void write(const Module *M) { printModule(M); } > @@ -1991,11 +2000,16 @@ > Out << ", align " << cast(I).getAlignment(); > } > > - // Print DebugInfo > + // Print Metadata info > Metadata &TheMetadata = I.getContext().getMetadata(); > - unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); > - if (const MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &I)) > - Out << ", dbg !" << Machine.getMetadataSlot(Dbg); > + const Metadata::MDMapTy *MDMap = TheMetadata.getMDs(&I); > + if (MDMap) > + for (Metadata::MDMapTy::const_iterator MI = MDMap->begin(), > + ME = MDMap->end(); MI != ME; ++MI) > + if (const MDNode *MD = dyn_cast_or_null(MI->second)) > + Out << ", " << MDNames[MI->first] > + << " !" << Machine.getMetadataSlot(MD); > + > printInfoComment(I); > } > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091007/1fa6fc01/attachment.html From stuart at apple.com Wed Oct 7 12:05:26 2009 From: stuart at apple.com (Stuart Hastings) Date: Wed, 07 Oct 2009 17:05:26 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83465 - /llvm-gcc-4.2/tags/Apple/llvmgcc42-2311.1/ Message-ID: <200910071705.n97H5Qif027244@zion.cs.uiuc.edu> Author: stuart Date: Wed Oct 7 12:05:26 2009 New Revision: 83465 URL: http://llvm.org/viewvc/llvm-project?rev=83465&view=rev Log: llvmgcc42-2311.1 Added: llvm-gcc-4.2/tags/Apple/llvmgcc42-2311.1/ - copied from r83464, llvm-gcc-4.2/tags/Apple/llvmgcc42-2311/ From grosbach at apple.com Wed Oct 7 12:12:56 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 07 Oct 2009 17:12:56 -0000 Subject: [llvm-commits] [llvm] r83467 - in /llvm/trunk: include/llvm/CodeGen/ include/llvm/Target/ lib/CodeGen/ lib/Target/ARM/ lib/Target/Alpha/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MSP430/ lib/Target/Mips/ lib/Target/PIC16/ lib/Target/PowerPC/ lib/Target/Sparc/ lib/Target/SystemZ/ lib/Target/X86/ lib/Target/XCore/ Message-ID: <200910071712.n97HCvGJ028253@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 12:12:56 2009 New Revision: 83467 URL: http://llvm.org/viewvc/llvm-project?rev=83467&view=rev Log: Add register-reuse to frame-index register scavenging. When a target uses a virtual register to eliminate a frame index, it can return that register and the constant stored there to PEI to track. When scavenging to allocate for those registers, PEI then tracks the last-used register and value, and if it is still available and matches the value for the next index, reuses the existing value rather and removes the re-materialization instructions. Fancier tracking and adjustment of scavenger allocations to keep more values live for longer is possible, but not yet implemented and would likely be better done via a different, less special-purpose, approach to the problem. eliminateFrameIndex() is modified so the target implementations can return the registers they wish to be tracked for reuse. ARM Thumb1 implements and utilizes the new mechanism. All other targets are simply modified to adjust for the changed eliminateFrameIndex() prototype. Modified: llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h llvm/trunk/include/llvm/Target/TargetRegisterInfo.h llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp llvm/trunk/lib/CodeGen/PrologEpilogInserter.h llvm/trunk/lib/CodeGen/RegisterScavenging.cpp llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp llvm/trunk/lib/Target/X86/X86RegisterInfo.h llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h Modified: llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h (original) +++ llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h Wed Oct 7 12:12:56 2009 @@ -117,6 +117,9 @@ return scavengeRegister(RegClass, MBBI, SPAdj); } + /// setUsed - Tell the scavenger a register is used. + /// + void setUsed(unsigned Reg); private: /// isReserved - Returns true if a register is reserved. It is never "unused". bool isReserved(unsigned Reg) const { return ReservedRegs.test(Reg); } @@ -131,7 +134,6 @@ /// setUsed / setUnused - Mark the state of one or a number of registers. /// - void setUsed(unsigned Reg); void setUsed(BitVector &Regs) { RegsAvailable &= ~Regs; } Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -660,8 +660,13 @@ /// specified instruction, as long as it keeps the iterator pointing the the /// finished product. SPAdj is the SP adjustment due to call frame setup /// instruction. - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS=NULL) const = 0; + /// + /// When -enable-frame-index-scavenging is enabled, the virtual register + /// allocated for this frame index is returned and its value is stored in + /// *Value. + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, int *Value = NULL, + RegScavenger *RS=NULL) const = 0; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 12:12:56 2009 @@ -655,6 +655,11 @@ int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + // Pre-allocate space for frame index mappings. If more space is needed, + // the map will be grown later. + if (FrameIndexVirtualScavenging) + FrameConstantRegMap.grow(Fn.getRegInfo().getLastVirtReg() + 128); + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. @@ -703,9 +708,17 @@ // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - - TRI.eliminateFrameIndex(MI, SPAdj, FrameIndexVirtualScavenging ? - NULL : RS); + int Value; + unsigned VReg = + TRI.eliminateFrameIndex(MI, SPAdj, &Value, + FrameIndexVirtualScavenging ? NULL : RS); + if (VReg) { + assert (FrameIndexVirtualScavenging && + "Not scavenging, but virtual returned from " + "eliminateFrameIndex()!"); + FrameConstantRegMap.grow(VReg); + FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); + } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -727,6 +740,35 @@ } } +/// findLastUseReg - find the killing use of the specified register within +/// the instruciton range. Return the operand number of the kill in Operand. +static MachineBasicBlock::iterator +findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, + unsigned Reg, unsigned *Operand) { + // Scan forward to find the last use of this virtual register + for (++I; I != ME; ++I) { + MachineInstr *MI = I; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isReg()) { + unsigned OpReg = MI->getOperand(i).getReg(); + if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg)) + continue; + assert (OpReg == Reg + && "overlapping use of scavenged index register!"); + // If this is the killing use, we're done + if (MI->getOperand(i).isKill()) { + if (Operand) + *Operand = i; + return I; + } + } + } + // If we hit the end of the basic block, there was no kill of + // the virtual register, which is wrong. + assert (0 && "scavenged index register never killed!"); + return ME; +} + /// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. @@ -738,12 +780,21 @@ unsigned CurrentVirtReg = 0; unsigned CurrentScratchReg = 0; + unsigned PrevScratchReg = 0; + int PrevValue; + MachineInstr *PrevLastUseMI; + unsigned PrevLastUseOp; + // The instruction stream may change in the loop, so check BB->end() + // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + // Likewise, call getNumOperands() each iteration, as the MI may change + // inside the loop (with 'i' updated accordingly). + for (unsigned i = 0; i != MI->getNumOperands(); ++i) if (MI->getOperand(i).isReg()) { - unsigned Reg = MI->getOperand(i).getReg(); + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -751,33 +802,81 @@ // seeing any references to it. assert (Reg != CurrentScratchReg && "overlapping use of scavenged frame index register!"); + + // If we have a previous scratch reg, check and see if anything + // here kills whatever value is in there. + if (Reg == PrevScratchReg) { + if (MO.isUse()) { + // Two-address operands implicitly kill + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + PrevScratchReg = 0; + } else { + assert (MO.isDef()); + PrevScratchReg = 0; + } + } continue; } // If we already have a scratch for this virtual register, use it if (Reg != CurrentVirtReg) { - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // We can't have nested virtual register live ranges because - // there's only a guarantee of one scavenged register at a time. - assert (CurrentVirtReg == 0 && - "overlapping frame index virtual registers!"); - CurrentVirtReg = Reg; - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->FindUnusedReg(RC); - if (CurrentScratchReg == 0) - // No register is "free". Scavenge a register. - // FIXME: Track SPAdj. Zero won't always be right - CurrentScratchReg = RS->scavengeRegister(RC, I, 0); + int Value = FrameConstantRegMap[Reg].first; + int SPAdj = FrameConstantRegMap[Reg].second; + + // If the scratch register from the last allocation is still + // available, see if the value matches. If it does, just re-use it. + if (PrevScratchReg && Value == PrevValue) { + // FIXME: This assumes that the instructions in the live range + // for the virtual register are exclusively for the purpose + // of populating the value in the register. That reasonable + // for these frame index registers, but it's still a very, very + // strong assumption. Perhaps this implies that the frame index + // elimination should be before register allocation, with + // conservative heuristics since we'll know less then, and + // the reuse calculations done directly when doing the code-gen? + + // Find the last use of the new virtual register. Remove all + // instruction between here and there, and update the current + // instruction to reference the last use insn instead. + MachineBasicBlock::iterator LastUseMI = + findLastUseReg(I, BB->end(), Reg, &i); + // Remove all instructions up 'til the last use, since they're + // just calculating the value we already have. + BB->erase(I, LastUseMI); + MI = I = LastUseMI; + + CurrentScratchReg = PrevScratchReg; + // Extend the live range of the register + PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); + RS->setUsed(CurrentScratchReg); + } else { + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // We can't have nested virtual register live ranges because + // there's only a guarantee of one scavenged register at a time. + assert (CurrentVirtReg == 0 && + "overlapping frame index virtual registers!"); + CurrentVirtReg = Reg; + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + CurrentScratchReg = RS->FindUnusedReg(RC); + if (CurrentScratchReg == 0) + // No register is "free". Scavenge a register. + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); + + PrevValue = Value; + } } assert (CurrentScratchReg && "Missing scratch register!"); MI->getOperand(i).setReg(CurrentScratchReg); // If this is the last use of the register, stop tracking it. - if (MI->getOperand(i).isKill()) + if (MI->getOperand(i).isKill()) { + PrevScratchReg = CurrentScratchReg; + PrevLastUseMI = MI; CurrentScratchReg = CurrentVirtReg = 0; + } } RS->forward(MI); } Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 12:12:56 2009 @@ -27,6 +27,8 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class RegScavenger; @@ -93,6 +95,12 @@ // functions. bool ShrinkWrapThisFunction; + // When using the scavenger post-pass to resolve frame reference + // materialization registers, maintain a map of the registers to + // the constant value and SP adjustment associated with it. + typedef std::pair FrameConstantEntry; + IndexedMap FrameConstantRegMap; + #ifndef NDEBUG // Machine function handle. MachineFunction* MF; Modified: llvm/trunk/lib/CodeGen/RegisterScavenging.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterScavenging.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegisterScavenging.cpp (original) +++ llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Wed Oct 7 12:12:56 2009 @@ -306,7 +306,7 @@ "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, this); + TRI->eliminateFrameIndex(II, SPAdj, NULL, this); // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC); Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -1023,9 +1023,10 @@ return Reg; } -void +unsigned ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, int *Value, + RegScavenger *RS) const { unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); @@ -1067,7 +1068,7 @@ Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); } if (Done) - return; + return 0; // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is @@ -1102,6 +1103,7 @@ } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); } + return 0; } /// Move iterator pass the next bunch of callee save load / store ops for Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -128,8 +128,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -427,8 +427,11 @@ TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); } -void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ +unsigned +Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const{ + unsigned VReg = 0; unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); @@ -484,7 +487,7 @@ MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); MI.getOperand(i).ChangeToRegister(FrameReg, false); MI.RemoveOperand(i+1); - return; + return 0; } // Common case: small offset, fits into instruction. @@ -500,7 +503,7 @@ MI.getOperand(i).ChangeToRegister(FrameReg, false); MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); } - return; + return 0; } unsigned DestReg = MI.getOperand(0).getReg(); @@ -512,7 +515,7 @@ emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, *this, dl); MBB.erase(II); - return; + return 0; } if (Offset > 0) { @@ -545,7 +548,7 @@ AddDefaultPred(MIB); } } - return; + return 0; } else { unsigned ImmIdx = 0; int InstrOffs = 0; @@ -575,7 +578,7 @@ // Replace the FrameIndex with sp MI.getOperand(i).ChangeToRegister(FrameReg, false); ImmOp.ChangeToImmediate(ImmedOffset); - return; + return 0; } bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; @@ -633,22 +636,24 @@ MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { if (FrameIndexVirtualScavenging) { - unsigned TmpReg = - MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); + *Value = Offset; bool UseRR = false; + if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, Offset, false, TII, *this, dl); else { - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); + emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); UseRR = true; } } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, *this, dl); MI.setDesc(TII.get(ARM::tSTR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + MI.getOperand(i).ChangeToRegister(VReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); else // tSTR has an extra register operand. @@ -707,6 +712,7 @@ MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } + return VReg; } void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -62,8 +62,9 @@ MachineBasicBlock::iterator I, const TargetRegisterClass *RC, unsigned Reg) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -151,8 +151,10 @@ //variable locals //<- SP -void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -197,6 +199,7 @@ } else { MI.getOperand(i).ChangeToImmediate(Offset); } + return 0; } Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h (original) +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -41,8 +41,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -219,9 +219,10 @@ return Reg; } -void BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, - RegScavenger *RS) const { +unsigned +BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -258,20 +259,20 @@ MI.setDesc(TII.get(isStore ? BF::STORE32p_uimm6m4 : BF::LOAD32p_uimm6m4)); - return; + return 0; } if (BaseReg == BF::FP && isUint<7>(-Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32fp_nimm7m4 : BF::LOAD32fp_nimm7m4)); MI.getOperand(FIPos+1).setImm(-Offset); - return; + return 0; } if (isInt<18>(Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32p_imm18m4 : BF::LOAD32p_imm18m4)); - return; + return 0; } // Use RegScavenger to calculate proper offset... MI.dump(); @@ -356,6 +357,7 @@ llvm_unreachable("Cannot eliminate frame index"); break; } + return 0; } void BlackfinRegisterInfo:: Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h (original) +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -64,8 +64,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const; Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -326,9 +326,9 @@ MBB.erase(I); } -void +unsigned SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const + int *Value, RegScavenger *RS) const { unsigned i = 0; MachineInstr &MI = *II; @@ -371,6 +371,7 @@ } else { MO.ChangeToImmediate(Offset); } + return 0; } /// determineFrameLayout - Determine the size of the frame and maximum call Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h (original) +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -63,8 +63,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; //! Convert frame indicies into machine operands - void eliminateFrameIndex(MachineBasicBlock::iterator II, int, - RegScavenger *RS) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + int *Value = NULL, + RegScavenger *RS = NULL) const; //! Determine the frame's layour void determineFrameLayout(MachineFunction &MF) const; Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -147,9 +147,10 @@ MBB.erase(I); } -void +unsigned MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -187,7 +188,7 @@ MI.getOperand(i).ChangeToRegister(BasePtr, false); if (Offset == 0) - return; + return 0; // We need to materialize the offset via add instruction. unsigned DstReg = MI.getOperand(0).getReg(); @@ -198,11 +199,12 @@ BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) .addReg(DstReg).addImm(Offset); - return; + return 0; } MI.getOperand(i).ChangeToRegister(BasePtr, false); MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; } void Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h (original) +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -49,8 +49,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -348,9 +348,9 @@ // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. -void MipsRegisterInfo:: -eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const +unsigned MipsRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + int *Value, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); @@ -382,6 +382,7 @@ MI.getOperand(i-1).ChangeToImmediate(Offset); MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + return 0; } void MipsRegisterInfo:: Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h (original) +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -54,8 +54,9 @@ MachineBasicBlock::iterator I) const; /// Stack Frame Processing Methods - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -51,10 +51,13 @@ return false; } -void PIC16RegisterInfo:: +unsigned PIC16RegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const -{ /* NOT YET IMPLEMENTED */ } + int *Value, RegScavenger *RS) const +{ + /* NOT YET IMPLEMENTED */ + return 0; +} void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const { /* NOT YET IMPLEMENTED */ } Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h (original) +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -48,8 +48,9 @@ virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool hasFP(const MachineFunction &MF) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS=NULL) const; + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, int *Value = NULL, + RegScavenger *RS=NULL) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -699,8 +699,10 @@ MBB.erase(II); } -void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -739,14 +741,14 @@ if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { lowerDynamicAlloc(II, SPAdj, RS); - return; + return 0; } // Special case for pseudo-op SPILL_CR. if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; + return 0; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -788,7 +790,7 @@ if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); - return; + return 0; } // The offset doesn't fit into a single register, scavenge one to build the @@ -828,6 +830,7 @@ unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); + return 0; } /// VRRegNo - Map from a numbered VR register to its enum value. Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h (original) +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -66,8 +66,9 @@ int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -76,8 +76,10 @@ MBB.erase(I); } -void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -113,6 +115,7 @@ MI.getOperand(i).ChangeToRegister(SP::G1, false); MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); } + return 0; } void SparcRegisterInfo:: Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h (original) +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -43,8 +43,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -107,8 +107,10 @@ return Offset; } -void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unxpected"); unsigned i = 0; @@ -136,6 +138,7 @@ MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; } void Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -55,8 +55,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -579,8 +579,10 @@ MBB.erase(I); } -void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ +unsigned +X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const{ assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -617,6 +619,7 @@ (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } + return 0; } void Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -139,8 +139,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp Wed Oct 7 12:12:56 2009 @@ -171,8 +171,10 @@ MBB.erase(I); } -void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); @@ -311,6 +313,7 @@ } // Erase old instruction. MBB.erase(II); + return 0; } void Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h (original) +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h Wed Oct 7 12:12:56 2009 @@ -57,8 +57,9 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; From bob.wilson at apple.com Wed Oct 7 12:19:14 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 17:19:14 -0000 Subject: [llvm-commits] [llvm] r83468 - /llvm/trunk/test/CodeGen/ARM/vld2.ll Message-ID: <200910071719.n97HJEdQ029116@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 12:19:13 2009 New Revision: 83468 URL: http://llvm.org/viewvc/llvm-project?rev=83468&view=rev Log: Add tests for vld2 of 128-bit vectors. Modified: llvm/trunk/test/CodeGen/ARM/vld2.ll Modified: llvm/trunk/test/CodeGen/ARM/vld2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld2.ll?rev=83468&r1=83467&r2=83468&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld2.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld2.ll Wed Oct 7 12:19:13 2009 @@ -5,6 +5,11 @@ %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } %struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } +%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + define <8 x i8> @vld2i8(i8* %A) nounwind { ;CHECK: vld2i8: ;CHECK: vld2.8 @@ -45,7 +50,52 @@ ret <2 x float> %tmp4 } +define <16 x i8> @vld2Qi8(i8* %A) nounwind { +;CHECK: vld2Qi8: +;CHECK: vld2.8 + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 + %tmp4 = add <16 x i8> %tmp2, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld2Qi16(i16* %A) nounwind { +;CHECK: vld2Qi16: +;CHECK: vld2.16 + %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1 + %tmp4 = add <8 x i16> %tmp2, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld2Qi32(i32* %A) nounwind { +;CHECK: vld2Qi32: +;CHECK: vld2.32 + %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1 + %tmp4 = add <4 x i32> %tmp2, %tmp3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @vld2Qf(float* %A) nounwind { +;CHECK: vld2Qf: +;CHECK: vld2.32 + %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1 + %tmp4 = add <4 x float> %tmp2, %tmp3 + ret <4 x float> %tmp4 +} + declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly + +declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly From bob.wilson at apple.com Wed Oct 7 12:23:09 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 17:23:09 -0000 Subject: [llvm-commits] [llvm] r83470 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <200910071723.n97HN9UU029622@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 12:23:09 2009 New Revision: 83470 URL: http://llvm.org/viewvc/llvm-project?rev=83470&view=rev Log: Rearrange code for selecting vld2 intrinsics. No functionality change. This is just to be more consistent with the forthcoming code for vld3/4. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83470&r1=83469&r2=83470&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 12:23:09 2009 @@ -1350,13 +1350,22 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; - EVT RegVT = VT; + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld2 type"); + case MVT::v8i8: Opc = ARM::VLD2d8; break; + case MVT::v4i16: Opc = ARM::VLD2d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD2d32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 4); + } + // Quad registers are loaded as pairs of double registers. + EVT RegVT; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld2 type"); - case MVT::v8i8: Opc = ARM::VLD2d8; break; - case MVT::v4i16: Opc = ARM::VLD2d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD2d32; break; case MVT::v16i8: Opc = ARM::VLD2q8; RegVT = MVT::v8i8; break; case MVT::v8i16: Opc = ARM::VLD2q16; RegVT = MVT::v4i16; break; case MVT::v4f32: Opc = ARM::VLD2q32; RegVT = MVT::v2f32; break; @@ -1364,10 +1373,6 @@ } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; - if (RegVT == VT) - return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 4); - - // Quad registers are loaded as pairs of double registers. std::vector ResTys(4, RegVT); ResTys.push_back(MVT::Other); SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); From bob.wilson at apple.com Wed Oct 7 12:24:55 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 17:24:55 -0000 Subject: [llvm-commits] [llvm] r83471 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vld3.ll Message-ID: <200910071724.n97HOtNv029864@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 12:24:55 2009 New Revision: 83471 URL: http://llvm.org/viewvc/llvm-project?rev=83471&view=rev Log: Add codegen support for NEON vld3 intrinsics with 128-bit vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vld3.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83471&r1=83470&r2=83471&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 12:24:55 2009 @@ -444,7 +444,7 @@ SDValue &Addr, SDValue &Update, SDValue &Opc) { Addr = N; - // The optional writeback is handled in ARMLoadStoreOpt. + // Default to no writeback. Update = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); return true; @@ -1388,16 +1388,57 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld3 type"); + case MVT::v8i8: Opc = ARM::VLD3d8; break; + case MVT::v4i16: Opc = ARM::VLD3d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD3d32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 4); + } + // Quad registers are loaded with two separate instructions, where one + // loads the even registers and the other loads the odd registers. + EVT RegVT = VT; + unsigned Opc2 = 0; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld3 type"); - case MVT::v8i8: Opc = ARM::VLD3d8; break; - case MVT::v4i16: Opc = ARM::VLD3d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD3d32; break; + case MVT::v16i8: + Opc = ARM::VLD3q8a; Opc2 = ARM::VLD3q8b; RegVT = MVT::v8i8; break; + case MVT::v8i16: + Opc = ARM::VLD3q16a; Opc2 = ARM::VLD3q16b; RegVT = MVT::v4i16; break; + case MVT::v4f32: + Opc = ARM::VLD3q32a; Opc2 = ARM::VLD3q32b; RegVT = MVT::v2f32; break; + case MVT::v4i32: + Opc = ARM::VLD3q32a; Opc2 = ARM::VLD3q32b; RegVT = MVT::v2i32; break; } SDValue Chain = N->getOperand(0); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 4); + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + std::vector ResTys(3, RegVT); + ResTys.push_back(MemAddr.getValueType()); + ResTys.push_back(MVT::Other); + + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); + Chain = SDValue(VLdA, 4); + + const SDValue OpsB[] = { SDValue(VLdA, 3), MemUpdate, MemOpc, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc2, dl, ResTys, OpsB, 4); + Chain = SDValue(VLdB, 4); + + SDNode *Q0 = PairDRegs(VT, SDValue(VLdA, 0), SDValue(VLdB, 0)); + SDNode *Q1 = PairDRegs(VT, SDValue(VLdA, 1), SDValue(VLdB, 1)); + SDNode *Q2 = PairDRegs(VT, SDValue(VLdA, 2), SDValue(VLdB, 2)); + ReplaceUses(SDValue(N, 0), SDValue(Q0, 0)); + ReplaceUses(SDValue(N, 1), SDValue(Q1, 0)); + ReplaceUses(SDValue(N, 2), SDValue(Q2, 0)); + ReplaceUses(SDValue(N, 3), Chain); + return NULL; } case Intrinsic::arm_neon_vld4: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83471&r1=83470&r2=83471&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 12:24:55 2009 @@ -201,11 +201,26 @@ : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; +class VLD3WB + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr), IIC_VLD3, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), + "$addr.addr = $wb", []>; def VLD3d8 : VLD3D<"vld3.8">; def VLD3d16 : VLD3D<"vld3.16">; def VLD3d32 : VLD3D<"vld3.32">; +// vld3 to double-spaced even registers. +def VLD3q8a : VLD3WB<"vld3.8">; +def VLD3q16a : VLD3WB<"vld3.16">; +def VLD3q32a : VLD3WB<"vld3.32">; + +// vld3 to double-spaced odd registers. +def VLD3q8b : VLD3WB<"vld3.8">; +def VLD3q16b : VLD3WB<"vld3.16">; +def VLD3q32b : VLD3WB<"vld3.32">; + // VLD4 : Vector Load (multiple 4-element structures) class VLD4D : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83471&r1=83470&r2=83471&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 12:24:55 2009 @@ -36,8 +36,12 @@ char NEONPreAllocPass::ID = 0; } -static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, - unsigned &NumRegs) { +static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, + unsigned &Offset, unsigned &Stride) { + // Default to unit stride with no offset. + Stride = 1; + Offset = 0; + switch (Opcode) { default: break; @@ -69,6 +73,24 @@ NumRegs = 3; return true; + case ARM::VLD3q8a: + case ARM::VLD3q16a: + case ARM::VLD3q32a: + FirstOpnd = 0; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD3q8b: + case ARM::VLD3q16b: + case ARM::VLD3q32b: + FirstOpnd = 0; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: @@ -149,8 +171,8 @@ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); for (; MBBI != E; ++MBBI) { MachineInstr *MI = &*MBBI; - unsigned FirstOpnd, NumRegs; - if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs)) + unsigned FirstOpnd, NumRegs, Offset, Stride; + if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; MachineBasicBlock::iterator NextI = next(MBBI); @@ -164,9 +186,10 @@ // For now, just assign a fixed set of adjacent registers. // This leaves plenty of room for future improvements. static const unsigned NEONDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3 + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; - MO.setReg(NEONDRegs[R]); + MO.setReg(NEONDRegs[Offset + R * Stride]); if (MO.isUse()) { // Insert a copy from VirtReg. Modified: llvm/trunk/test/CodeGen/ARM/vld3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld3.ll?rev=83471&r1=83470&r2=83471&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld3.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld3.ll Wed Oct 7 12:24:55 2009 @@ -5,6 +5,11 @@ %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } %struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + define <8 x i8> @vld3i8(i8* %A) nounwind { ;CHECK: vld3i8: ;CHECK: vld3.8 @@ -45,7 +50,56 @@ ret <2 x float> %tmp4 } +define <16 x i8> @vld3Qi8(i8* %A) nounwind { +;CHECK: vld3Qi8: +;CHECK: vld3.8 +;CHECK: vld3.8 + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld3Qi16(i16* %A) nounwind { +;CHECK: vld3Qi16: +;CHECK: vld3.16 +;CHECK: vld3.16 + %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2 + %tmp4 = add <8 x i16> %tmp2, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld3Qi32(i32* %A) nounwind { +;CHECK: vld3Qi32: +;CHECK: vld3.32 +;CHECK: vld3.32 + %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 + %tmp4 = add <4 x i32> %tmp2, %tmp3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @vld3Qf(float* %A) nounwind { +;CHECK: vld3Qf: +;CHECK: vld3.32 +;CHECK: vld3.32 + %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2 + %tmp4 = add <4 x float> %tmp2, %tmp3 + ret <4 x float> %tmp4 +} + declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly + +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly From gohman at apple.com Wed Oct 7 12:36:00 2009 From: gohman at apple.com (Dan Gohman) Date: Wed, 07 Oct 2009 17:36:00 -0000 Subject: [llvm-commits] [llvm] r83474 - /llvm/trunk/lib/CodeGen/MachineVerifier.cpp Message-ID: <200910071736.n97Ha0eV031283@zion.cs.uiuc.edu> Author: djg Date: Wed Oct 7 12:36:00 2009 New Revision: 83474 URL: http://llvm.org/viewvc/llvm-project?rev=83474&view=rev Log: Add a few simple MachineVerifier checks for MachineMemOperands. Modified: llvm/trunk/lib/CodeGen/MachineVerifier.cpp Modified: llvm/trunk/lib/CodeGen/MachineVerifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineVerifier.cpp?rev=83474&r1=83473&r2=83474&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineVerifier.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineVerifier.cpp Wed Oct 7 12:36:00 2009 @@ -27,6 +27,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetMachine.h" @@ -457,6 +458,15 @@ *OS << TI.getNumOperands() << " operands expected, but " << MI->getNumExplicitOperands() << " given.\n"; } + + // Check the MachineMemOperands for basic consistency. + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + if ((*I)->isLoad() && !TI.mayLoad()) + report("Missing mayLoad flag", MI); + if ((*I)->isStore() && !TI.mayStore()) + report("Missing mayStore flag", MI); + } } void From gohman at apple.com Wed Oct 7 12:38:06 2009 From: gohman at apple.com (Dan Gohman) Date: Wed, 07 Oct 2009 17:38:06 -0000 Subject: [llvm-commits] [llvm] r83475 - in /llvm/trunk: include/llvm/CodeGen/MachineInstr.h include/llvm/Target/TargetInstrInfo.h lib/CodeGen/MachineInstr.cpp lib/CodeGen/MachineLICM.cpp lib/CodeGen/MachineSink.cpp lib/CodeGen/ScheduleDAGInstrs.cpp lib/Target/SystemZ/SystemZInstrInfo.cpp lib/Target/SystemZ/SystemZInstrInfo.h lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.h lib/Target/XCore/XCoreInstrInfo.cpp lib/Target/XCore/XCoreInstrInfo.h Message-ID: <200910071738.n97Hc7EG031574@zion.cs.uiuc.edu> Author: djg Date: Wed Oct 7 12:38:06 2009 New Revision: 83475 URL: http://llvm.org/viewvc/llvm-project?rev=83475&view=rev Log: Replace TargetInstrInfo::isInvariantLoad and its target-specific implementations with a new MachineInstr::isInvariantLoad, which uses MachineMemOperands and is target-independent. This brings MachineLICM and other functionality to targets which previously lacked an isInvariantLoad implementation. Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h llvm/trunk/include/llvm/Target/TargetInstrInfo.h llvm/trunk/lib/CodeGen/MachineInstr.cpp llvm/trunk/lib/CodeGen/MachineLICM.cpp llvm/trunk/lib/CodeGen/MachineSink.cpp llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.h llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Wed Oct 7 12:38:06 2009 @@ -26,6 +26,7 @@ namespace llvm { +class AliasAnalysis; class TargetInstrDesc; class TargetInstrInfo; class TargetRegisterInfo; @@ -286,6 +287,13 @@ /// have no volatile memory references. bool hasVolatileMemoryRef() const; + /// isInvariantLoad - Return true if this instruction is loading from a + /// location whose value is invariant across the function. For example, + /// loading a value from the constant pool or from from the argument area of + /// a function if it does not change. This should only return true of *all* + /// loads the instruction does are invariant (if it does multiple loads). + bool isInvariantLoad(AliasAnalysis *AA = 0) const; + // // Debugging support // Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Wed Oct 7 12:38:06 2009 @@ -157,16 +157,6 @@ unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig) const = 0; - /// isInvariantLoad - Return true if the specified instruction (which is - /// marked mayLoad) is loading from a location whose value is invariant across - /// the function. For example, loading a value from the constant pool or from - /// from the argument area of a function if it does not change. This should - /// only return true of *all* loads the instruction does are invariant (if it - /// does multiple loads). - virtual bool isInvariantLoad(const MachineInstr *MI) const { - return false; - } - /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target /// may be able to convert a two-address instruction into one or more true Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Wed Oct 7 12:38:06 2009 @@ -24,6 +24,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" @@ -946,7 +947,7 @@ // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (TID->mayLoad() && !TII->isInvariantLoad(this)) + if (TID->mayLoad() && !isInvariantLoad()) // Otherwise, this is a real load. If there is a store between the load and // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); @@ -1005,6 +1006,46 @@ return false; } +/// isInvariantLoad - Return true if this instruction is loading from a +/// location whose value is invariant across the function. For example, +/// loading a value from the constant pool or from from the argument area +/// of a function if it does not change. This should only return true of +/// *all* loads the instruction does are invariant (if it does multiple loads). +bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { + // If the instruction doesn't load at all, it isn't an invariant load. + if (!TID->mayLoad()) + return false; + + // If the instruction has lost its memoperands, conservatively assume that + // it may not be an invariant load. + if (memoperands_empty()) + return false; + + const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); + + for (mmo_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) { + if ((*I)->isVolatile()) return false; + if ((*I)->isStore()) return false; + + if (const Value *V = (*I)->getValue()) { + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = dyn_cast(V)) + if (PSV->isConstant(MFI)) + continue; + // If we have an AliasAnalysis, ask it whether the memory is constant. + if (AA && AA->pointsToConstantMemory(V)) + continue; + } + + // Otherwise assume conservatively. + return false; + } + + // Everything checks out. + return true; +} + void MachineInstr::dump() const { errs() << " " << *this; } Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Wed Oct 7 12:38:06 2009 @@ -28,6 +28,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" @@ -47,6 +48,7 @@ BitVector AllocatableSet; // Various analyses that we use... + AliasAnalysis *AA; // Alias analysis info. MachineLoopInfo *LI; // Current MachineLoopInfo MachineDominatorTree *DT; // Machine dominator tree for the cur loop MachineRegisterInfo *RegInfo; // Machine register information @@ -72,6 +74,7 @@ AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); @@ -144,6 +147,7 @@ // Get our Loop information... LI = &getAnalysis(); DT = &getAnalysis(); + AA = &getAnalysis(); for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { @@ -214,7 +218,7 @@ // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. - if (!TII->isInvariantLoad(&I)) + if (!I.isInvariantLoad(AA)) // FIXME: we should be able to sink loads with no other side effects if // there is nothing that can change memory from here until the end of // block. This is a trivial form of alias analysis. @@ -259,8 +263,6 @@ // Don't hoist an instruction that uses or defines a physical register. if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // If this is a physical register use, we can't move it. If it is a def, - // we can move it, but only if the def is dead. if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, Modified: llvm/trunk/lib/CodeGen/MachineSink.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSink.cpp?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineSink.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineSink.cpp Wed Oct 7 12:38:06 2009 @@ -178,8 +178,6 @@ if (Reg == 0) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // If this is a physical register use, we can't move it. If it is a def, - // we can move it, but only if the def is dead. if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original) +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Wed Oct 7 12:38:06 2009 @@ -375,7 +375,7 @@ // Treat all other stores conservatively. goto new_chain; } else if (TID.mayLoad()) { - if (TII->isInvariantLoad(MI)) { + if (MI->isInvariantLoad()) { // Invariant load, no chain dependencies needed! } else if (const Value *V = getUnderlyingObjectForInstr(MI)) { // A load from a specific PseudoSourceValue. Add precise dependencies. Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp Wed Oct 7 12:38:06 2009 @@ -266,30 +266,6 @@ return 0; } -bool SystemZInstrInfo::isInvariantLoad(const MachineInstr *MI) const { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - // Loads from constant pools are trivially invariant. - if (MO.isCPI()) - return true; - - if (MO.isGlobal()) - return isGVStub(MO.getGlobal(), TM); - - // If this is a load from an invariant stack slot, the load is a constant. - if (MO.isFI()) { - const MachineFrameInfo &MFI = - *MI->getParent()->getParent()->getFrameInfo(); - int Idx = MO.getIndex(); - return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); - } - } - - // All other instances of these instructions are presumed to have other - // issues. - return false; -} - bool SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h Wed Oct 7 12:38:06 2009 @@ -70,7 +70,6 @@ unsigned &SrcSubIdx, unsigned &DstSubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - bool isInvariantLoad(const MachineInstr *MI) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Oct 7 12:38:06 2009 @@ -958,43 +958,6 @@ NewMI->getOperand(0).setSubReg(SubIdx); } -/// isInvariantLoad - Return true if the specified instruction (which is marked -/// mayLoad) is loading from a location whose value is invariant across the -/// function. For example, loading a value from the constant pool or from -/// from the argument area of a function if it does not change. This should -/// only return true of *all* loads the instruction does are invariant (if it -/// does multiple loads). -bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { - // This code cares about loads from three cases: constant pool entries, - // invariant argument slots, and global stubs. In order to handle these cases - // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV - // operand and base our analysis on it. This is safe because the address of - // none of these three cases is ever used as anything other than a load base - // and X86 doesn't have any instructions that load from multiple places. - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - // Loads from constant pools are trivially invariant. - if (MO.isCPI()) - return true; - - if (MO.isGlobal()) - return isGlobalStubReference(MO.getTargetFlags()); - - // If this is a load from an invariant stack slot, the load is a constant. - if (MO.isFI()) { - const MachineFrameInfo &MFI = - *MI->getParent()->getParent()->getFrameInfo(); - int Idx = MO.getIndex(); - return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); - } - } - - // All other instances of these instructions are presumed to have other - // issues. - return false; -} - /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that /// is not marked dead. static bool hasLiveCondCodeDef(MachineInstr *MI) { Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Oct 7 12:38:06 2009 @@ -456,8 +456,6 @@ unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig) const; - bool isInvariantLoad(const MachineInstr *MI) const; - /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target /// may be able to convert a two-address instruction into a true Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp Wed Oct 7 12:38:06 2009 @@ -116,30 +116,6 @@ return 0; } -/// isInvariantLoad - Return true if the specified instruction (which is marked -/// mayLoad) is loading from a location whose value is invariant across the -/// function. For example, loading a value from the constant pool or from -/// from the argument area of a function if it does not change. This should -/// only return true of *all* loads the instruction does are invariant (if it -/// does multiple loads). -bool -XCoreInstrInfo::isInvariantLoad(const MachineInstr *MI) const { - // Loads from constants pools and loads from invariant argument slots are - // invariant - int Opcode = MI->getOpcode(); - if (Opcode == XCore::LDWCP_ru6 || Opcode == XCore::LDWCP_lru6) { - return MI->getOperand(1).isCPI(); - } - int FrameIndex; - if (isLoadFromStackSlot(MI, FrameIndex)) { - const MachineFrameInfo &MFI = - *MI->getParent()->getParent()->getFrameInfo(); - return MFI.isFixedObjectIndex(FrameIndex) && - MFI.isImmutableObjectIndex(FrameIndex); - } - return false; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h (original) +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h Wed Oct 7 12:38:06 2009 @@ -52,8 +52,6 @@ virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool isInvariantLoad(const MachineInstr *MI) const; - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, From gohman at apple.com Wed Oct 7 12:47:20 2009 From: gohman at apple.com (Dan Gohman) Date: Wed, 07 Oct 2009 17:47:20 -0000 Subject: [llvm-commits] [llvm] r83476 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Message-ID: <200910071747.n97HlKp6000397@zion.cs.uiuc.edu> Author: djg Date: Wed Oct 7 12:47:20 2009 New Revision: 83476 URL: http://llvm.org/viewvc/llvm-project?rev=83476&view=rev Log: Replace some code for aggressive-remat with MachineInstr::isInvariantLoad, and teach it how to recognize invariant physical registers. Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=83476&r1=83475&r2=83476&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Oct 7 12:47:20 2009 @@ -1428,32 +1428,16 @@ if (!EnableAggressiveRemat) return false; - // If the instruction accesses memory but the memoperands have been lost, - // we can't analyze it. const TargetInstrDesc &TID = MI->getDesc(); - if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty()) - return false; // Avoid instructions obviously unsafe for remat. - if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable()) + if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() || + TID.mayStore()) return false; - // If the instruction accesses memory and the memory could be non-constant, - // assume the instruction is not rematerializable. - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); I != E; ++I){ - const MachineMemOperand *MMO = *I; - if (MMO->isVolatile() || MMO->isStore()) - return false; - const Value *V = MMO->getValue(); - if (!V) - return false; - if (const PseudoSourceValue *PSV = dyn_cast(V)) { - if (!PSV->isConstant(mf_->getFrameInfo())) - return false; - } else if (!aa_->pointsToConstantMemory(V)) - return false; - } + // Avoid instructions which load from potentially varying memory. + if (TID.mayLoad() && !MI->isInvariantLoad(aa_)) + return false; // If any of the registers accessed are non-constant, conservatively assume // the instruction is not rematerializable. @@ -1464,8 +1448,29 @@ unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!mri_->def_empty(Reg)) + return false; + if (allocatableRegs_.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = tri_->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!mri_->def_empty(AliasReg)) + return false; + if (allocatableRegs_.test(AliasReg)) + return false; + } + } else { + // A physreg def. We can't remat it. + return false; + } + continue; + } // Only allow one def, and that in the first operand. if (MO.isDef() != (i == 0)) From devang.patel at gmail.com Wed Oct 7 12:47:58 2009 From: devang.patel at gmail.com (Devang Patel) Date: Wed, 7 Oct 2009 10:47:58 -0700 Subject: [llvm-commits] [llvm] r83475 - in /llvm/trunk: include/llvm/CodeGen/MachineInstr.h include/llvm/Target/TargetInstrInfo.h lib/CodeGen/MachineInstr.cpp lib/CodeGen/MachineLICM.cpp lib/CodeGen/MachineSink.cpp lib/CodeGen/ScheduleDAGInstrs.cpp lib/Ta Message-ID: <352a1fb20910071047t7b85196ar6185e8b1f45906ff@mail.gmail.com> On Wed, Oct 7, 2009 at 10:38 AM, Dan Gohman wrote: > Author: djg > Date: Wed Oct ?7 12:38:06 2009 > New Revision: 83475 > > URL: http://llvm.org/viewvc/llvm-project?rev=83475&view=rev > Log: > Replace TargetInstrInfo::isInvariantLoad and its target-specific > implementations with a new MachineInstr::isInvariantLoad, which uses > MachineMemOperands and is target-independent. This brings MachineLICM > and other functionality to targets which previously lacked an > isInvariantLoad implementation. > > Modified: > ? ?llvm/trunk/include/llvm/CodeGen/MachineInstr.h > ? ?llvm/trunk/include/llvm/Target/TargetInstrInfo.h > ? ?llvm/trunk/lib/CodeGen/MachineInstr.cpp > ? ?llvm/trunk/lib/CodeGen/MachineLICM.cpp > ? ?llvm/trunk/lib/CodeGen/MachineSink.cpp > ? ?llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp > ? ?llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp > ? ?llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h > ? ?llvm/trunk/lib/Target/X86/X86InstrInfo.cpp > ? ?llvm/trunk/lib/Target/X86/X86InstrInfo.h > ? ?llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp > ? ?llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h > > Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original) > +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Wed Oct ?7 12:38:06 2009 > @@ -26,6 +26,7 @@ > > ?namespace llvm { > > +class AliasAnalysis; > ?class TargetInstrDesc; > ?class TargetInstrInfo; > ?class TargetRegisterInfo; > @@ -286,6 +287,13 @@ > ? /// have no volatile memory references. > ? bool hasVolatileMemoryRef() const; > > + ?/// isInvariantLoad - Return true if this instruction is loading from a > + ?/// location whose value is invariant across the function. ?For example, > + ?/// loading a value from the constant pool or from from the argument area of > + ?/// a function if it does not change. ?This should only return true of *all* > + ?/// loads the instruction does are invariant (if it does multiple loads). > + ?bool isInvariantLoad(AliasAnalysis *AA = 0) const; > + > ? // > ? // Debugging support > ? // > > Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original) > +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Wed Oct ?7 12:38:06 2009 > @@ -157,16 +157,6 @@ > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned DestReg, unsigned SubIdx, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?const MachineInstr *Orig) const = 0; > > - ?/// isInvariantLoad - Return true if the specified instruction (which is > - ?/// marked mayLoad) is loading from a location whose value is invariant across > - ?/// the function. ?For example, loading a value from the constant pool or from > - ?/// from the argument area of a function if it does not change. ?This should > - ?/// only return true of *all* loads the instruction does are invariant (if it > - ?/// does multiple loads). > - ?virtual bool isInvariantLoad(const MachineInstr *MI) const { > - ? ?return false; > - ?} > - > ? /// convertToThreeAddress - This method must be implemented by targets that > ? /// set the M_CONVERTIBLE_TO_3_ADDR flag. ?When this flag is set, the target > ? /// may be able to convert a two-address instruction into one or more true > > Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original) > +++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Wed Oct ?7 12:38:06 2009 > @@ -24,6 +24,7 @@ > ?#include "llvm/Target/TargetInstrInfo.h" > ?#include "llvm/Target/TargetInstrDesc.h" > ?#include "llvm/Target/TargetRegisterInfo.h" > +#include "llvm/Analysis/AliasAnalysis.h" > ?#include "llvm/Analysis/DebugInfo.h" > ?#include "llvm/Support/ErrorHandling.h" > ?#include "llvm/Support/LeakDetector.h" > @@ -946,7 +947,7 @@ > ? // destination. The check for isInvariantLoad gives the targe the chance to > ? // classify the load as always returning a constant, e.g. a constant pool > ? // load. > - ?if (TID->mayLoad() && !TII->isInvariantLoad(this)) > + ?if (TID->mayLoad() && !isInvariantLoad()) Now, isInvariantLoad() checks mayLoad() so this (and other instances) can be simplified. > ? ? // Otherwise, this is a real load. ?If there is a store between the load and > ? ? // end of block, or if the load is volatile, we can't move it. > ? ? return !SawStore && !hasVolatileMemoryRef(); > @@ -1005,6 +1006,46 @@ > ? return false; > ?} > > +/// isInvariantLoad - Return true if this instruction is loading from a > +/// location whose value is invariant across the function. ?For example, > +/// loading a value from the constant pool or from from the argument area > +/// of a function if it does not change. ?This should only return true of > +/// *all* loads the instruction does are invariant (if it does multiple loads). This routine returns false if the instruction does do any load. If intentional then I'd explicitly put it in comments here. - Devang > +bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { > + ?// If the instruction doesn't load at all, it isn't an invariant load. > + ?if (!TID->mayLoad()) > + ? ?return false; > + > + ?// If the instruction has lost its memoperands, conservatively assume that > + ?// it may not be an invariant load. > + ?if (memoperands_empty()) > + ? ?return false; > + ?const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); > + > + ?for (mmo_iterator I = memoperands_begin(), > + ? ? ? E = memoperands_end(); I != E; ++I) { > + ? ?if ((*I)->isVolatile()) return false; > + ? ?if ((*I)->isStore()) return false; > + > + ? ?if (const Value *V = (*I)->getValue()) { > + ? ? ?// A load from a constant PseudoSourceValue is invariant. > + ? ? ?if (const PseudoSourceValue *PSV = dyn_cast(V)) > + ? ? ? ?if (PSV->isConstant(MFI)) > + ? ? ? ? ?continue; > + ? ? ?// If we have an AliasAnalysis, ask it whether the memory is constant. > + ? ? ?if (AA && AA->pointsToConstantMemory(V)) > + ? ? ? ?continue; > + ? ?} > + > + ? ?// Otherwise assume conservatively. > + ? ?return false; > + ?} > + > + ?// Everything checks out. > + ?return true; > +} > + > ?void MachineInstr::dump() const { > ? errs() << " ?" << *this; > ?} > > Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) > +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Wed Oct ?7 12:38:06 2009 > @@ -28,6 +28,7 @@ > ?#include "llvm/Target/TargetRegisterInfo.h" > ?#include "llvm/Target/TargetInstrInfo.h" > ?#include "llvm/Target/TargetMachine.h" > +#include "llvm/Analysis/AliasAnalysis.h" > ?#include "llvm/ADT/DenseMap.h" > ?#include "llvm/ADT/Statistic.h" > ?#include "llvm/Support/Compiler.h" > @@ -47,6 +48,7 @@ > ? ? BitVector AllocatableSet; > > ? ? // Various analyses that we use... > + ? ?AliasAnalysis ? ? ? ?*AA; ? ? ?// Alias analysis info. > ? ? MachineLoopInfo ? ? ?*LI; ? ? ?// Current MachineLoopInfo > ? ? MachineDominatorTree *DT; ? ? ?// Machine dominator tree for the cur loop > ? ? MachineRegisterInfo ?*RegInfo; // Machine register information > @@ -72,6 +74,7 @@ > ? ? ? AU.setPreservesCFG(); > ? ? ? AU.addRequired(); > ? ? ? AU.addRequired(); > + ? ? ?AU.addRequired(); > ? ? ? AU.addPreserved(); > ? ? ? AU.addPreserved(); > ? ? ? MachineFunctionPass::getAnalysisUsage(AU); > @@ -144,6 +147,7 @@ > ? // Get our Loop information... > ? LI = &getAnalysis(); > ? DT = &getAnalysis(); > + ?AA = &getAnalysis(); > > ? for (MachineLoopInfo::iterator > ? ? ? ? ?I = LI->begin(), E = LI->end(); I != E; ++I) { > @@ -214,7 +218,7 @@ > ? ? // Okay, this instruction does a load. As a refinement, we allow the target > ? ? // to decide whether the loaded value is actually a constant. If so, we can > ? ? // actually use it as a load. > - ? ?if (!TII->isInvariantLoad(&I)) > + ? ?if (!I.isInvariantLoad(AA)) > ? ? ? // FIXME: we should be able to sink loads with no other side effects if > ? ? ? // there is nothing that can change memory from here until the end of > ? ? ? // block. This is a trivial form of alias analysis. > @@ -259,8 +263,6 @@ > > ? ? // Don't hoist an instruction that uses or defines a physical register. > ? ? if (TargetRegisterInfo::isPhysicalRegister(Reg)) { > - ? ? ?// If this is a physical register use, we can't move it. ?If it is a def, > - ? ? ?// we can move it, but only if the def is dead. > ? ? ? if (MO.isUse()) { > ? ? ? ? // If the physreg has no defs anywhere, it's just an ambient register > ? ? ? ? // and we can freely move its uses. Alternatively, if it's allocatable, > > Modified: llvm/trunk/lib/CodeGen/MachineSink.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSink.cpp?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/MachineSink.cpp (original) > +++ llvm/trunk/lib/CodeGen/MachineSink.cpp Wed Oct ?7 12:38:06 2009 > @@ -178,8 +178,6 @@ > ? ? if (Reg == 0) continue; > > ? ? if (TargetRegisterInfo::isPhysicalRegister(Reg)) { > - ? ? ?// If this is a physical register use, we can't move it. ?If it is a def, > - ? ? ?// we can move it, but only if the def is dead. > ? ? ? if (MO.isUse()) { > ? ? ? ? // If the physreg has no defs anywhere, it's just an ambient register > ? ? ? ? // and we can freely move its uses. Alternatively, if it's allocatable, > > Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original) > +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Wed Oct ?7 12:38:06 2009 > @@ -375,7 +375,7 @@ > ? ? ? ? // Treat all other stores conservatively. > ? ? ? ? goto new_chain; > ? ? } else if (TID.mayLoad()) { > - ? ? ?if (TII->isInvariantLoad(MI)) { > + ? ? ?if (MI->isInvariantLoad()) { > ? ? ? ? // Invariant load, no chain dependencies needed! > ? ? ? } else if (const Value *V = getUnderlyingObjectForInstr(MI)) { > ? ? ? ? // A load from a specific PseudoSourceValue. Add precise dependencies. > > Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp (original) > +++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp Wed Oct ?7 12:38:06 2009 > @@ -266,30 +266,6 @@ > ? return 0; > ?} > > -bool SystemZInstrInfo::isInvariantLoad(const MachineInstr *MI) const { > - ?for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { > - ? ?const MachineOperand &MO = MI->getOperand(i); > - ? ?// Loads from constant pools are trivially invariant. > - ? ?if (MO.isCPI()) > - ? ? ?return true; > - > - ? ?if (MO.isGlobal()) > - ? ? ?return isGVStub(MO.getGlobal(), TM); > - > - ? ?// If this is a load from an invariant stack slot, the load is a constant. > - ? ?if (MO.isFI()) { > - ? ? ?const MachineFrameInfo &MFI = > - ? ? ? ?*MI->getParent()->getParent()->getFrameInfo(); > - ? ? ?int Idx = MO.getIndex(); > - ? ? ?return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); > - ? ?} > - ?} > - > - ?// All other instances of these instructions are presumed to have other > - ?// issues. > - ?return false; > -} > - > ?bool > ?SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MachineBasicBlock::iterator MI, > > Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h (original) > +++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h Wed Oct ?7 12:38:06 2009 > @@ -70,7 +70,6 @@ > ? ? ? ? ? ? ? ? ? ?unsigned &SrcSubIdx, unsigned &DstSubIdx) const; > ? unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; > ? unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; > - ?bool isInvariantLoad(const MachineInstr *MI) const; > > ? virtual void storeRegToStackSlot(MachineBasicBlock &MBB, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MachineBasicBlock::iterator MI, > > Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Oct ?7 12:38:06 2009 > @@ -958,43 +958,6 @@ > ? NewMI->getOperand(0).setSubReg(SubIdx); > ?} > > -/// isInvariantLoad - Return true if the specified instruction (which is marked > -/// mayLoad) is loading from a location whose value is invariant across the > -/// function. ?For example, loading a value from the constant pool or from > -/// from the argument area of a function if it does not change. ?This should > -/// only return true of *all* loads the instruction does are invariant (if it > -/// does multiple loads). > -bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { > - ?// This code cares about loads from three cases: constant pool entries, > - ?// invariant argument slots, and global stubs. ?In order to handle these cases > - ?// for all of the myriad of X86 instructions, we just scan for a CP/FI/GV > - ?// operand and base our analysis on it. ?This is safe because the address of > - ?// none of these three cases is ever used as anything other than a load base > - ?// and X86 doesn't have any instructions that load from multiple places. > - > - ?for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { > - ? ?const MachineOperand &MO = MI->getOperand(i); > - ? ?// Loads from constant pools are trivially invariant. > - ? ?if (MO.isCPI()) > - ? ? ?return true; > - > - ? ?if (MO.isGlobal()) > - ? ? ?return isGlobalStubReference(MO.getTargetFlags()); > - > - ? ?// If this is a load from an invariant stack slot, the load is a constant. > - ? ?if (MO.isFI()) { > - ? ? ?const MachineFrameInfo &MFI = > - ? ? ? ?*MI->getParent()->getParent()->getFrameInfo(); > - ? ? ?int Idx = MO.getIndex(); > - ? ? ?return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); > - ? ?} > - ?} > - > - ?// All other instances of these instructions are presumed to have other > - ?// issues. > - ?return false; > -} > - > ?/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that > ?/// is not marked dead. > ?static bool hasLiveCondCodeDef(MachineInstr *MI) { > > Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original) > +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Oct ?7 12:38:06 2009 > @@ -456,8 +456,6 @@ > ? ? ? ? ? ? ? ? ? ? ?unsigned DestReg, unsigned SubIdx, > ? ? ? ? ? ? ? ? ? ? ?const MachineInstr *Orig) const; > > - ?bool isInvariantLoad(const MachineInstr *MI) const; > - > ? /// convertToThreeAddress - This method must be implemented by targets that > ? /// set the M_CONVERTIBLE_TO_3_ADDR flag. ?When this flag is set, the target > ? /// may be able to convert a two-address instruction into a true > > Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp (original) > +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.cpp Wed Oct ?7 12:38:06 2009 > @@ -116,30 +116,6 @@ > ? return 0; > ?} > > -/// isInvariantLoad - Return true if the specified instruction (which is marked > -/// mayLoad) is loading from a location whose value is invariant across the > -/// function. ?For example, loading a value from the constant pool or from > -/// from the argument area of a function if it does not change. ?This should > -/// only return true of *all* loads the instruction does are invariant (if it > -/// does multiple loads). > -bool > -XCoreInstrInfo::isInvariantLoad(const MachineInstr *MI) const { > - ?// Loads from constants pools and loads from invariant argument slots are > - ?// invariant > - ?int Opcode = MI->getOpcode(); > - ?if (Opcode == XCore::LDWCP_ru6 || Opcode == XCore::LDWCP_lru6) { > - ? ?return MI->getOperand(1).isCPI(); > - ?} > - ?int FrameIndex; > - ?if (isLoadFromStackSlot(MI, FrameIndex)) { > - ? ?const MachineFrameInfo &MFI = > - ? ? ?*MI->getParent()->getParent()->getFrameInfo(); > - ? ?return MFI.isFixedObjectIndex(FrameIndex) && > - ? ? ? ? ? MFI.isImmutableObjectIndex(FrameIndex); > - ?} > - ?return false; > -} > - > ?//===----------------------------------------------------------------------===// > ?// Branch Analysis > ?//===----------------------------------------------------------------------===// > > Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h?rev=83475&r1=83474&r2=83475&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h (original) > +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.h Wed Oct ?7 12:38:06 2009 > @@ -52,8 +52,6 @@ > ? virtual unsigned isStoreToStackSlot(const MachineInstr *MI, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int &FrameIndex) const; > > - ?virtual bool isInvariantLoad(const MachineInstr *MI) const; > - > ? virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MachineBasicBlock *&FBB, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVectorImpl &Cond, > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > -- - Devang From enderby at apple.com Wed Oct 7 13:01:35 2009 From: enderby at apple.com (Kevin Enderby) Date: Wed, 07 Oct 2009 18:01:35 -0000 Subject: [llvm-commits] [llvm] r83477 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Message-ID: <200910071801.n97I1Z2M002204@zion.cs.uiuc.edu> Author: enderby Date: Wed Oct 7 13:01:35 2009 New Revision: 83477 URL: http://llvm.org/viewvc/llvm-project?rev=83477&view=rev Log: Add another bit of the ARM target assembler to llvm-mc to parse registers with writeback, things like "sp!", etc. Also added some more stuff to the temporarily hacked methods ARMAsmParser::MatchRegisterName and ARMAsmParser::MatchInstruction to allow more parser testing. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=83477&r1=83476&r2=83477&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Wed Oct 7 13:01:35 2009 @@ -100,6 +100,7 @@ struct { unsigned RegNum; + bool Writeback; } Reg; // This is for all forms of ARM address expressions @@ -146,10 +147,11 @@ return Res; } - static ARMOperand CreateReg(unsigned RegNum) { + static ARMOperand CreateReg(unsigned RegNum, bool Writeback) { ARMOperand Res; Res.Kind = Register; Res.Reg.RegNum = RegNum; + Res.Reg.Writeback = Writeback; return Res; } @@ -193,10 +195,17 @@ RegNum = MatchRegisterName(Tok.getString()); if (RegNum == 0) return true; - - Op = ARMOperand::CreateReg(RegNum); getLexer().Lex(); // Eat identifier token. + bool Writeback = false; + const AsmToken &ExclaimTok = getLexer().getTok(); + if (ExclaimTok.is(AsmToken::Exclaim)) { + Writeback = true; + getLexer().Lex(); // Eat exclaim token + } + + Op = ARMOperand::CreateReg(RegNum, Writeback); + return false; } @@ -396,6 +405,8 @@ return 2; else if (Name == "r3") return 3; + else if (Name == "sp") + return 13; return 0; } @@ -406,6 +417,9 @@ assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); const StringRef &Mnemonic = Op0.getToken(); if (Mnemonic == "add" || + Mnemonic == "stmfd" || + Mnemonic == "str" || + Mnemonic == "ldmfd" || Mnemonic == "ldr") return false; From dpatel at apple.com Wed Oct 7 13:06:10 2009 From: dpatel at apple.com (Devang Patel) Date: Wed, 07 Oct 2009 18:06:10 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83478 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200910071806.n97I6A07002867@zion.cs.uiuc.edu> Author: dpatel Date: Wed Oct 7 13:06:10 2009 New Revision: 83478 URL: http://llvm.org/viewvc/llvm-project?rev=83478&view=rev Log: Attach location info to AllocaInst. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83478&r1=83477&r2=83478&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Wed Oct 7 13:06:10 2009 @@ -1630,6 +1630,12 @@ if (!Size) { // Fixed size alloca -> entry block. AI = CreateTemporary(Ty); AI->setName(Name); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (TheDebugInfo) { + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); + Builder.SetDebugLocation(AI); + } +#endif } else { AI = Builder.CreateAlloca(Ty, Size, Name); } From bob.wilson at apple.com Wed Oct 7 13:09:32 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 18:09:32 -0000 Subject: [llvm-commits] [llvm] r83479 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vld4.ll Message-ID: <200910071809.n97I9WZn003299@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 13:09:32 2009 New Revision: 83479 URL: http://llvm.org/viewvc/llvm-project?rev=83479&view=rev Log: Add codegen support for NEON vld4 intrinsics with 128-bit vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vld4.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83479&r1=83478&r2=83479&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 13:09:32 2009 @@ -1445,18 +1445,61 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld4 type"); + case MVT::v8i8: Opc = ARM::VLD4d8; break; + case MVT::v4i16: Opc = ARM::VLD4d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD4d32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + std::vector ResTys(4, VT); + ResTys.push_back(MVT::Other); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + } + // Quad registers are loaded with two separate instructions, where one + // loads the even registers and the other loads the odd registers. + EVT RegVT = VT; + unsigned Opc2 = 0; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld4 type"); - case MVT::v8i8: Opc = ARM::VLD4d8; break; - case MVT::v4i16: Opc = ARM::VLD4d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD4d32; break; + case MVT::v16i8: + Opc = ARM::VLD4q8a; Opc2 = ARM::VLD4q8b; RegVT = MVT::v8i8; break; + case MVT::v8i16: + Opc = ARM::VLD4q16a; Opc2 = ARM::VLD4q16b; RegVT = MVT::v4i16; break; + case MVT::v4f32: + Opc = ARM::VLD4q32a; Opc2 = ARM::VLD4q32b; RegVT = MVT::v2f32; break; + case MVT::v4i32: + Opc = ARM::VLD4q32a; Opc2 = ARM::VLD4q32b; RegVT = MVT::v2i32; break; } SDValue Chain = N->getOperand(0); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; - std::vector ResTys(4, VT); + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + std::vector ResTys(4, RegVT); + ResTys.push_back(MemAddr.getValueType()); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); + Chain = SDValue(VLdA, 5); + + const SDValue OpsB[] = { SDValue(VLdA, 4), MemUpdate, MemOpc, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc2, dl, ResTys, OpsB, 4); + Chain = SDValue(VLdB, 5); + + SDNode *Q0 = PairDRegs(VT, SDValue(VLdA, 0), SDValue(VLdB, 0)); + SDNode *Q1 = PairDRegs(VT, SDValue(VLdA, 1), SDValue(VLdB, 1)); + SDNode *Q2 = PairDRegs(VT, SDValue(VLdA, 2), SDValue(VLdB, 2)); + SDNode *Q3 = PairDRegs(VT, SDValue(VLdA, 3), SDValue(VLdB, 3)); + ReplaceUses(SDValue(N, 0), SDValue(Q0, 0)); + ReplaceUses(SDValue(N, 1), SDValue(Q1, 0)); + ReplaceUses(SDValue(N, 2), SDValue(Q2, 0)); + ReplaceUses(SDValue(N, 3), SDValue(Q3, 0)); + ReplaceUses(SDValue(N, 4), Chain); + return NULL; } case Intrinsic::arm_neon_vld2lane: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83479&r1=83478&r2=83479&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 13:09:32 2009 @@ -227,11 +227,26 @@ (ins addrmode6:$addr), IIC_VLD4, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), "", []>; +class VLD4WB + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr), IIC_VLD4, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + "$addr.addr = $wb", []>; def VLD4d8 : VLD4D<"vld4.8">; def VLD4d16 : VLD4D<"vld4.16">; def VLD4d32 : VLD4D<"vld4.32">; +// vld4 to double-spaced even registers. +def VLD4q8a : VLD4WB<"vld4.8">; +def VLD4q16a : VLD4WB<"vld4.16">; +def VLD4q32a : VLD4WB<"vld4.32">; + +// vld4 to double-spaced odd registers. +def VLD4q8b : VLD4WB<"vld4.8">; +def VLD4q16b : VLD4WB<"vld4.16">; +def VLD4q32b : VLD4WB<"vld4.32">; + // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LND : NLdSt<(outs DPR:$dst1, DPR:$dst2), Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83479&r1=83478&r2=83479&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 13:09:32 2009 @@ -101,6 +101,24 @@ NumRegs = 4; return true; + case ARM::VLD4q8a: + case ARM::VLD4q16a: + case ARM::VLD4q32a: + FirstOpnd = 0; + NumRegs = 4; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD4q8b: + case ARM::VLD4q16b: + case ARM::VLD4q32b: + FirstOpnd = 0; + NumRegs = 4; + Offset = 1; + Stride = 2; + return true; + case ARM::VST2d8: case ARM::VST2d16: case ARM::VST2d32: Modified: llvm/trunk/test/CodeGen/ARM/vld4.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld4.ll?rev=83479&r1=83478&r2=83479&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld4.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld4.ll Wed Oct 7 13:09:32 2009 @@ -5,6 +5,11 @@ %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } + define <8 x i8> @vld4i8(i8* %A) nounwind { ;CHECK: vld4i8: ;CHECK: vld4.8 @@ -45,7 +50,56 @@ ret <2 x float> %tmp4 } +define <16 x i8> @vld4Qi8(i8* %A) nounwind { +;CHECK: vld4Qi8: +;CHECK: vld4.8 +;CHECK: vld4.8 + %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld4Qi16(i16* %A) nounwind { +;CHECK: vld4Qi16: +;CHECK: vld4.16 +;CHECK: vld4.16 + %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2 + %tmp4 = add <8 x i16> %tmp2, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld4Qi32(i32* %A) nounwind { +;CHECK: vld4Qi32: +;CHECK: vld4.32 +;CHECK: vld4.32 + %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2 + %tmp4 = add <4 x i32> %tmp2, %tmp3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @vld4Qf(float* %A) nounwind { +;CHECK: vld4Qf: +;CHECK: vld4.32 +;CHECK: vld4.32 + %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2 + %tmp4 = add <4 x float> %tmp2, %tmp3 + ret <4 x float> %tmp4 +} + declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly + +declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly From evan.cheng at apple.com Wed Oct 7 13:38:00 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 7 Oct 2009 11:38:00 -0700 Subject: [llvm-commits] [llvm] r83467 - in /llvm/trunk: include/llvm/CodeGen/ include/llvm/Target/ lib/CodeGen/ lib/Target/ARM/ lib/Target/Alpha/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MSP430/ lib/Target/Mips/ lib/Target/PIC16/ lib/Target/PowerPC/ lib/Target/Sparc/ lib/Target/SystemZ/ lib/Target/X86/ lib/Target/XCore/ In-Reply-To: <200910071712.n97HCvGJ028253@zion.cs.uiuc.edu> References: <200910071712.n97HCvGJ028253@zion.cs.uiuc.edu> Message-ID: On Oct 7, 2009, at 10:12 AM, Jim Grosbach wrote: > Author: grosbach > Date: Wed Oct 7 12:12:56 2009 > New Revision: 83467 > > URL: http://llvm.org/viewvc/llvm-project?rev=83467&view=rev > Log: > Add register-reuse to frame-index register scavenging. When a target > uses > a virtual register to eliminate a frame index, it can return that > register > and the constant stored there to PEI to track. When scavenging to > allocate > for those registers, PEI then tracks the last-used register and > value, and > if it is still available and matches the value for the next index, > reuses > the existing value rather and removes the re-materialization > instructions. > Fancier tracking and adjustment of scavenger allocations to keep more > values live for longer is possible, but not yet implemented and > would likely > be better done via a different, less special-purpose, approach to the > problem. Hi Jim, Is PEI responsible for allocating physical registers to the virtual registers created during eliminateFrameIndex()? That's ok for now. But the question is when that is moved to a separate pass does this reuse scheme still work? More comments below. Evan > > eliminateFrameIndex() is modified so the target implementations can > return > the registers they wish to be tracked for reuse. > > ARM Thumb1 implements and utilizes the new mechanism. All other > targets are > simply modified to adjust for the changed eliminateFrameIndex() > prototype. > > > Modified: > llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h > llvm/trunk/include/llvm/Target/TargetRegisterInfo.h > llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp > llvm/trunk/lib/CodeGen/PrologEpilogInserter.h > llvm/trunk/lib/CodeGen/RegisterScavenging.cpp > llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp > llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h > llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp > llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h > llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp > llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h > llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp > llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h > llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp > llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h > llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp > llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h > llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp > llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h > llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp > llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h > llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp > llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h > llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp > llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h > llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp > llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h > llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp > llvm/trunk/lib/Target/X86/X86RegisterInfo.h > llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp > llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h > > Modified: llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h (original) > +++ llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h Wed Oct 7 > 12:12:56 2009 > @@ -117,6 +117,9 @@ > return scavengeRegister(RegClass, MBBI, SPAdj); > } > > + /// setUsed - Tell the scavenger a register is used. > + /// > + void setUsed(unsigned Reg); I'd prefer not to do this if it can be helped. RS should be entirely responsible for tracking the liveness. Why is this needed? > private: > /// isReserved - Returns true if a register is reserved. It is > never "unused". > bool isReserved(unsigned Reg) const { return ReservedRegs.test > (Reg); } > @@ -131,7 +134,6 @@ > > /// setUsed / setUnused - Mark the state of one or a number of > registers. > /// > - void setUsed(unsigned Reg); > void setUsed(BitVector &Regs) { > RegsAvailable &= ~Regs; > } > > Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) > +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -660,8 +660,13 @@ > /// specified instruction, as long as it keeps the iterator > pointing the the > /// finished product. SPAdj is the SP adjustment due to call frame > setup > /// instruction. > - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, > - int SPAdj, RegScavenger > *RS=NULL) const = 0; > + /// > + /// When -enable-frame-index-scavenging is enabled, the virtual > register > + /// allocated for this frame index is returned and its value is > stored in > + /// *Value. > + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator > MI, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS=NULL) const > = 0; > > /// emitProlog/emitEpilog - These methods insert prolog and epilog > code into > /// the function. > > Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) > +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 > 12:12:56 2009 > @@ -655,6 +655,11 @@ > int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); > int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); > > + // Pre-allocate space for frame index mappings. If more space is > needed, > + // the map will be grown later. > + if (FrameIndexVirtualScavenging) > + FrameConstantRegMap.grow(Fn.getRegInfo().getLastVirtReg() + 128); > + > for (MachineFunction::iterator BB = Fn.begin(), > E = Fn.end(); BB != E; ++BB) { > int SPAdj = 0; // SP offset due to call frame setup / destroy. > @@ -703,9 +708,17 @@ > // If this instruction has a FrameIndex operand, we need to > // use that target machine register info object to eliminate > // it. > - > - TRI.eliminateFrameIndex(MI, SPAdj, > FrameIndexVirtualScavenging ? > - NULL : RS); > + int Value; > + unsigned VReg = > + TRI.eliminateFrameIndex(MI, SPAdj, &Value, > + FrameIndexVirtualScavenging ? > NULL : RS); > + if (VReg) { > + assert (FrameIndexVirtualScavenging && > + "Not scavenging, but virtual returned from " > + "eliminateFrameIndex()!"); > + FrameConstantRegMap.grow(VReg); > + FrameConstantRegMap[VReg] = FrameConstantEntry(Value, > SPAdj); > + } > > // Reset the iterator if we were at the beginning of the BB. > if (AtBeginning) { > @@ -727,6 +740,35 @@ > } > } > > +/// findLastUseReg - find the killing use of the specified register > within > +/// the instruciton range. Return the operand number of the kill in > Operand. > +static MachineBasicBlock::iterator > +findLastUseReg(MachineBasicBlock::iterator I, > MachineBasicBlock::iterator ME, > + unsigned Reg, unsigned *Operand) { > + // Scan forward to find the last use of this virtual register > + for (++I; I != ME; ++I) { > + MachineInstr *MI = I; > + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) > + if (MI->getOperand(i).isReg()) { > + unsigned OpReg = MI->getOperand(i).getReg(); > + if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister > (OpReg)) > + continue; > + assert (OpReg == Reg > + && "overlapping use of scavenged index register!"); > + // If this is the killing use, we're done > + if (MI->getOperand(i).isKill()) { > + if (Operand) > + *Operand = i; > + return I; > + } > + } > + } > + // If we hit the end of the basic block, there was no kill of > + // the virtual register, which is wrong. > + assert (0 && "scavenged index register never killed!"); > + return ME; > +} > + > /// scavengeFrameVirtualRegs - Replace all frame index virtual > registers > /// with physical registers. Use the register scavenger to find an > /// appropriate register to use. > @@ -738,12 +780,21 @@ > > unsigned CurrentVirtReg = 0; > unsigned CurrentScratchReg = 0; > + unsigned PrevScratchReg = 0; > + int PrevValue; > + MachineInstr *PrevLastUseMI; > + unsigned PrevLastUseOp; > > + // The instruction stream may change in the loop, so check BB- > >end() > + // directly. > for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); > ++I) { > MachineInstr *MI = I; > - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) > + // Likewise, call getNumOperands() each iteration, as the MI > may change > + // inside the loop (with 'i' updated accordingly). Why not just re-compute the end limit when MI is updated? > + for (unsigned i = 0; i != MI->getNumOperands(); ++i) > if (MI->getOperand(i).isReg()) { > - unsigned Reg = MI->getOperand(i).getReg(); > + MachineOperand &MO = MI->getOperand(i); > + unsigned Reg = MO.getReg(); > if (Reg == 0) > continue; > if (!TargetRegisterInfo::isVirtualRegister(Reg)) { > @@ -751,33 +802,81 @@ > // seeing any references to it. > assert (Reg != CurrentScratchReg > && "overlapping use of scavenged frame index > register!"); > + > + // If we have a previous scratch reg, check and see if > anything > + // here kills whatever value is in there. > + if (Reg == PrevScratchReg) { > + if (MO.isUse()) { > + // Two-address operands implicitly kill > + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) > + PrevScratchReg = 0; > + } else { > + assert (MO.isDef()); > + PrevScratchReg = 0; > + } > + } > continue; > } > > // If we already have a scratch for this virtual register, > use it > if (Reg != CurrentVirtReg) { > - // When we first encounter a new virtual register, it > - // must be a definition. > - assert(MI->getOperand(i).isDef() && > - "frame index virtual missing def!"); > - // We can't have nested virtual register live ranges > because > - // there's only a guarantee of one scavenged register > at a time. > - assert (CurrentVirtReg == 0 && > - "overlapping frame index virtual registers!"); > - CurrentVirtReg = Reg; > - const TargetRegisterClass *RC = Fn.getRegInfo > ().getRegClass(Reg); > - CurrentScratchReg = RS->FindUnusedReg(RC); > - if (CurrentScratchReg == 0) > - // No register is "free". Scavenge a register. > - // FIXME: Track SPAdj. Zero won't always be right > - CurrentScratchReg = RS->scavengeRegister(RC, I, 0); > + int Value = FrameConstantRegMap[Reg].first; > + int SPAdj = FrameConstantRegMap[Reg].second; > + > + // If the scratch register from the last allocation is > still > + // available, see if the value matches. If it does, > just re-use it. > + if (PrevScratchReg && Value == PrevValue) { This means the reuse can only happen when you have consecutive uses of the same frame indices. That seems very restrictive. The implementation makes it difficult to separate the allocation phase from PEI. You're trying to solve two problems at the same time (allocation and reuse). I'd much rather see the first part done and well tested first before you try to solve the reuse problem. > + // FIXME: This assumes that the instructions in the > live range > + // for the virtual register are exclusively for the > purpose > + // of populating the value in the register. That > reasonable That -> That's. > + // for these frame index registers, but it's still a > very, very > + // strong assumption. Perhaps this implies that the > frame index > + // elimination should be before register allocation, > with > + // conservative heuristics since we'll know less > then, and > + // the reuse calculations done directly when doing > the code-gen? This can be solved later. > + > + // Find the last use of the new virtual register. > Remove all > + // instruction between here and there, and update the > current > + // instruction to reference the last use insn instead. > + MachineBasicBlock::iterator LastUseMI = > + findLastUseReg(I, BB->end(), Reg, &i); > + // Remove all instructions up 'til the last use, > since they're > + // just calculating the value we already have. > + BB->erase(I, LastUseMI); > + MI = I = LastUseMI; Rather than doing this, you could simply continue to iterate forward until you have reached the kill. > + > + CurrentScratchReg = PrevScratchReg; > + // Extend the live range of the register > + PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill > (false); > + RS->setUsed(CurrentScratchReg); > + } else { > + // When we first encounter a new virtual register, it > + // must be a definition. > + assert(MI->getOperand(i).isDef() && > + "frame index virtual missing def!"); > + // We can't have nested virtual register live ranges > because > + // there's only a guarantee of one scavenged register > at a time. > + assert (CurrentVirtReg == 0 && > + "overlapping frame index virtual registers!"); > + CurrentVirtReg = Reg; > + const TargetRegisterClass *RC = Fn.getRegInfo > ().getRegClass(Reg); > + CurrentScratchReg = RS->FindUnusedReg(RC); > + if (CurrentScratchReg == 0) > + // No register is "free". Scavenge a register. > + CurrentScratchReg = RS->scavengeRegister(RC, I, > SPAdj); > + > + PrevValue = Value; > + } > } > assert (CurrentScratchReg && "Missing scratch register!"); > MI->getOperand(i).setReg(CurrentScratchReg); > > // If this is the last use of the register, stop tracking it. > - if (MI->getOperand(i).isKill()) > + if (MI->getOperand(i).isKill()) { > + PrevScratchReg = CurrentScratchReg; > + PrevLastUseMI = MI; > CurrentScratchReg = CurrentVirtReg = 0; > + } > } > RS->forward(MI); > } > > Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) > +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 > 12:12:56 2009 > @@ -27,6 +27,8 @@ > #include "llvm/CodeGen/MachineLoopInfo.h" > #include "llvm/ADT/SparseBitVector.h" > #include "llvm/ADT/DenseMap.h" > +#include "llvm/ADT/IndexedMap.h" > +#include "llvm/Target/TargetRegisterInfo.h" > > namespace llvm { > class RegScavenger; > @@ -93,6 +95,12 @@ > // functions. > bool ShrinkWrapThisFunction; > > + // When using the scavenger post-pass to resolve frame reference > + // materialization registers, maintain a map of the registers to > + // the constant value and SP adjustment associated with it. > + typedef std::pair FrameConstantEntry; > + IndexedMap > FrameConstantRegMap; > + > #ifndef NDEBUG > // Machine function handle. > MachineFunction* MF; > > Modified: llvm/trunk/lib/CodeGen/RegisterScavenging.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterScavenging.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/RegisterScavenging.cpp (original) > +++ llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Wed Oct 7 > 12:12:56 2009 > @@ -306,7 +306,7 @@ > "Cannot scavenge register without an emergency spill > slot!"); > TII->storeRegToStackSlot(*MBB, I, SReg, true, > ScavengingFrameIndex, RC); > MachineBasicBlock::iterator II = prior(I); > - TRI->eliminateFrameIndex(II, SPAdj, this); > + TRI->eliminateFrameIndex(II, SPAdj, NULL, this); > > // Restore the scavenged register before its use (or first > terminator). > TII->loadRegFromStackSlot(*MBB, UseMI, SReg, > ScavengingFrameIndex, RC); > > Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -1023,9 +1023,10 @@ > return Reg; > } > > -void > +unsigned > ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator > II, > - int SPAdj, RegScavenger > *RS) const { > + int SPAdj, int *Value, > + RegScavenger *RS) const { > unsigned i = 0; > MachineInstr &MI = *II; > MachineBasicBlock &MBB = *MI.getParent(); > @@ -1067,7 +1068,7 @@ > Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); > } > if (Done) > - return; > + return 0; > > // If we get here, the immediate doesn't fit into the instruction. > We folded > // as much as possible above, handle the rest, providing a register > that is > @@ -1102,6 +1103,7 @@ > } > MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); > } > + return 0; > } > > /// Move iterator pass the next bunch of callee save load / store > ops for > > Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -128,8 +128,9 @@ > MachineBasicBlock &MBB, > > MachineBasicBlock::iterator I) const; > > - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = > NULL) const; > + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator > II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) > const; > > virtual void emitPrologue(MachineFunction &MF) const; > virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock > &MBB) const; > > Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -427,8 +427,11 @@ > TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); > } > > -void Thumb1RegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, > RegScavenger *RS) const{ > +unsigned > +Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator > II, > + int SPAdj, int *Value, > + RegScavenger *RS) const{ > + unsigned VReg = 0; > unsigned i = 0; > MachineInstr &MI = *II; > MachineBasicBlock &MBB = *MI.getParent(); > @@ -484,7 +487,7 @@ > MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); > MI.getOperand(i).ChangeToRegister(FrameReg, false); > MI.RemoveOperand(i+1); > - return; > + return 0; > } > > // Common case: small offset, fits into instruction. > @@ -500,7 +503,7 @@ > MI.getOperand(i).ChangeToRegister(FrameReg, false); > MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); > } > - return; > + return 0; > } > > unsigned DestReg = MI.getOperand(0).getReg(); > @@ -512,7 +515,7 @@ > emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, > TII, > *this, dl); > MBB.erase(II); > - return; > + return 0; > } > > if (Offset > 0) { > @@ -545,7 +548,7 @@ > AddDefaultPred(MIB); > } > } > - return; > + return 0; > } else { > unsigned ImmIdx = 0; > int InstrOffs = 0; > @@ -575,7 +578,7 @@ > // Replace the FrameIndex with sp > MI.getOperand(i).ChangeToRegister(FrameReg, false); > ImmOp.ChangeToImmediate(ImmedOffset); > - return; > + return 0; > } > > bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == > ARM::tSpill; > @@ -633,22 +636,24 @@ > MI.addOperand(MachineOperand::CreateReg(0, false)); > } else if (Desc.mayStore()) { > if (FrameIndexVirtualScavenging) { > - unsigned TmpReg = > - MF.getRegInfo().createVirtualRegister > (ARM::tGPRRegisterClass); > + VReg = MF.getRegInfo().createVirtualRegister > (ARM::tGPRRegisterClass); > + assert (Value && "Frame index virtual allocated, but Value > arg is NULL!"); > + *Value = Offset; > bool UseRR = false; > + > if (Opcode == ARM::tSpill) { > if (FrameReg == ARM::SP) > - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, > + emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, > Offset, false, TII, *this, dl); > else { > - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); > + emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); > UseRR = true; > } > } else > - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, > Offset, TII, > + emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, > TII, > *this, dl); > MI.setDesc(TII.get(ARM::tSTR)); > - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); > + MI.getOperand(i).ChangeToRegister(VReg, false, false, true); > if (UseRR) // Use [reg, reg] addrmode. > MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); > else // tSTR has an extra register operand. > @@ -707,6 +712,7 @@ > MachineInstrBuilder MIB(&MI); > AddDefaultPred(MIB); > } > + return VReg; > } > > void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { > > Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) > +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -62,8 +62,9 @@ > MachineBasicBlock::iterator I, > const TargetRegisterClass *RC, > unsigned Reg) const; > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > void emitPrologue(MachineFunction &MF) const; > void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; > > Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -151,8 +151,10 @@ > //variable locals > //<- SP > > -void AlphaRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger > *RS) const { > +unsigned > +AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator > II, > + int SPAdj, int *Value, > + RegScavenger *RS) const { > assert(SPAdj == 0 && "Unexpected"); > > unsigned i = 0; > @@ -197,6 +199,7 @@ > } else { > MI.getOperand(i).ChangeToImmediate(Offset); > } > + return 0; > } > > > > Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -41,8 +41,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator I) > const; > > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > //void processFunctionBeforeFrameFinalized(MachineFunction &MF) > const; > > > Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp Wed Oct > 7 12:12:56 2009 > @@ -219,9 +219,10 @@ > return Reg; > } > > -void BlackfinRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, > - RegScavenger *RS) > const { > +unsigned > +BlackfinRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > + int SPAdj, int *Value, > + RegScavenger *RS) const { > MachineInstr &MI = *II; > MachineBasicBlock &MBB = *MI.getParent(); > MachineFunction &MF = *MBB.getParent(); > @@ -258,20 +259,20 @@ > MI.setDesc(TII.get(isStore > ? BF::STORE32p_uimm6m4 > : BF::LOAD32p_uimm6m4)); > - return; > + return 0; > } > if (BaseReg == BF::FP && isUint<7>(-Offset)) { > MI.setDesc(TII.get(isStore > ? BF::STORE32fp_nimm7m4 > : BF::LOAD32fp_nimm7m4)); > MI.getOperand(FIPos+1).setImm(-Offset); > - return; > + return 0; > } > if (isInt<18>(Offset)) { > MI.setDesc(TII.get(isStore > ? BF::STORE32p_imm18m4 > : BF::LOAD32p_imm18m4)); > - return; > + return 0; > } > // Use RegScavenger to calculate proper offset... > MI.dump(); > @@ -356,6 +357,7 @@ > llvm_unreachable("Cannot eliminate frame index"); > break; > } > + return 0; > } > > void BlackfinRegisterInfo:: > > Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -64,8 +64,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator I) > const; > > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) > const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, > RegScavenger *RS) const; > > Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -326,9 +326,9 @@ > MBB.erase(I); > } > > -void > +unsigned > SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, > int SPAdj, > - RegScavenger *RS) const > + int *Value, RegScavenger *RS) > const > { > unsigned i = 0; > MachineInstr &MI = *II; > @@ -371,6 +371,7 @@ > } else { > MO.ChangeToImmediate(Offset); > } > + return 0; > } > > /// determineFrameLayout - Determine the size of the frame and > maximum call > > Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h (original) > +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -63,8 +63,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator I) > const; > //! Convert frame indicies into machine operands > - void eliminateFrameIndex(MachineBasicBlock::iterator II, int, > - RegScavenger *RS) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > int SPAdj, > + int *Value = NULL, > + RegScavenger *RS = NULL) const; > //! Determine the frame's layour > void determineFrameLayout(MachineFunction &MF) const; > > > Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -147,9 +147,10 @@ > MBB.erase(I); > } > > -void > +unsigned > MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator > II, > - int SPAdj, RegScavenger > *RS) const { > + int SPAdj, int *Value, > + RegScavenger *RS) const { > assert(SPAdj == 0 && "Unexpected"); > > unsigned i = 0; > @@ -187,7 +188,7 @@ > MI.getOperand(i).ChangeToRegister(BasePtr, false); > > if (Offset == 0) > - return; > + return 0; > > // We need to materialize the offset via add instruction. > unsigned DstReg = MI.getOperand(0).getReg(); > @@ -198,11 +199,12 @@ > BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) > .addReg(DstReg).addImm(Offset); > > - return; > + return 0; > } > > MI.getOperand(i).ChangeToRegister(BasePtr, false); > MI.getOperand(i+1).ChangeToImmediate(Offset); > + return 0; > } > > void > > Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h (original) > +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -49,8 +49,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator I) > const; > > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > void emitPrologue(MachineFunction &MF) const; > void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; > > Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -348,9 +348,9 @@ > // FrameIndex represent objects inside a abstract stack. > // We must replace FrameIndex with an stack/frame pointer > // direct reference. > -void MipsRegisterInfo:: > -eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, > - RegScavenger *RS) const > +unsigned MipsRegisterInfo:: > +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, > + int *Value, RegScavenger *RS) const > { > MachineInstr &MI = *II; > MachineFunction &MF = *MI.getParent()->getParent(); > @@ -382,6 +382,7 @@ > > MI.getOperand(i-1).ChangeToImmediate(Offset); > MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); > + return 0; > } > > void MipsRegisterInfo:: > > Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -54,8 +54,9 @@ > MachineBasicBlock::iterator I) > const; > > /// Stack Frame Processing Methods > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; > > > Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -51,10 +51,13 @@ > return false; > } > > -void PIC16RegisterInfo:: > +unsigned PIC16RegisterInfo:: > eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, > - RegScavenger *RS) const > -{ /* NOT YET IMPLEMENTED */ } > + int *Value, RegScavenger *RS) const > +{ > + /* NOT YET IMPLEMENTED */ > + return 0; > +} > > void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const > { /* NOT YET IMPLEMENTED */ } > > Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h (original) > +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -48,8 +48,9 @@ > virtual BitVector getReservedRegs(const MachineFunction &MF) const; > virtual bool hasFP(const MachineFunction &MF) const; > > - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, > - int SPAdj, RegScavenger *RS=NULL) const; > + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator > MI, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS=NULL) const; > > void eliminateCallFramePseudoInstr(MachineFunction &MF, > MachineBasicBlock &MBB, > > Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -699,8 +699,10 @@ > MBB.erase(II); > } > > -void PPCRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger > *RS) const { > +unsigned > +PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value, > + RegScavenger *RS) const { > assert(SPAdj == 0 && "Unexpected"); > > // Get the instruction. > @@ -739,14 +741,14 @@ > if (FPSI && FrameIndex == FPSI && > (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { > lowerDynamicAlloc(II, SPAdj, RS); > - return; > + return 0; > } > > // Special case for pseudo-op SPILL_CR. > if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. > if (OpC == PPC::SPILL_CR) { > lowerCRSpilling(II, FrameIndex, SPAdj, RS); > - return; > + return 0; > } > > // Replace the FrameIndex with base register with GPR1 (SP) or > GPR31 (FP). > @@ -788,7 +790,7 @@ > if (isIXAddr) > Offset >>= 2; // The actual encoded value has the low two > bits zero. > MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); > - return; > + return 0; > } > > // The offset doesn't fit into a single register, scavenge one to > build the > @@ -828,6 +830,7 @@ > unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); > MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); > MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); > + return 0; > } > > /// VRRegNo - Map from a numbered VR register to its enum value. > > Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -66,8 +66,9 @@ > int SPAdj, RegScavenger *RS) const; > void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned > FrameIndex, > int SPAdj, RegScavenger *RS) const; > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > /// determineFrameLayout - Determine the size of the frame and > maximum call > /// frame size. > > Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -76,8 +76,10 @@ > MBB.erase(I); > } > > -void SparcRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger > *RS) const { > +unsigned > +SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator > II, > + int SPAdj, int *Value, > + RegScavenger *RS) const { > assert(SPAdj == 0 && "Unexpected"); > > unsigned i = 0; > @@ -113,6 +115,7 @@ > MI.getOperand(i).ChangeToRegister(SP::G1, false); > MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); > } > + return 0; > } > > void SparcRegisterInfo:: > > Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -43,8 +43,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator I) > const; > > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; > > > Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -107,8 +107,10 @@ > return Offset; > } > > -void SystemZRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger > *RS) const { > +unsigned > +SystemZRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > + int SPAdj, int *Value, > + RegScavenger *RS) const { > assert(SPAdj == 0 && "Unxpected"); > > unsigned i = 0; > @@ -136,6 +138,7 @@ > MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); > > MI.getOperand(i+1).ChangeToImmediate(Offset); > + return 0; > } > > void > > Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -55,8 +55,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator I) > const; > > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > > void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, > > Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -579,8 +579,10 @@ > MBB.erase(I); > } > > -void X86RegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger > *RS) const{ > +unsigned > +X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value, > + RegScavenger *RS) const{ > assert(SPAdj == 0 && "Unexpected"); > > unsigned i = 0; > @@ -617,6 +619,7 @@ > (uint64_t)MI.getOperand(i+3).getOffset(); > MI.getOperand(i+3).setOffset(Offset); > } > + return 0; > } > > void > > Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original) > +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Oct 7 12:12:56 > 2009 > @@ -139,8 +139,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator MI) > const; > > - void eliminateFrameIndex(MachineBasicBlock::iterator MI, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, > RegScavenger *RS = NULL) > const; > > Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp Wed Oct 7 > 12:12:56 2009 > @@ -171,8 +171,10 @@ > MBB.erase(I); > } > > -void XCoreRegisterInfo::eliminateFrameIndex > (MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger > *RS) const { > +unsigned > +XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator > II, > + int SPAdj, int *Value, > + RegScavenger *RS) const { > assert(SPAdj == 0 && "Unexpected"); > MachineInstr &MI = *II; > DebugLoc dl = MI.getDebugLoc(); > @@ -311,6 +313,7 @@ > } > // Erase old instruction. > MBB.erase(II); > + return 0; > } > > void > > Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h (original) > +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h Wed Oct 7 > 12:12:56 2009 > @@ -57,8 +57,9 @@ > MachineBasicBlock &MBB, > MachineBasicBlock::iterator I) > const; > > - void eliminateFrameIndex(MachineBasicBlock::iterator II, > - int SPAdj, RegScavenger *RS = NULL) const; > + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, > + int SPAdj, int *Value = NULL, > + RegScavenger *RS = NULL) const; > > void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, > RegScavenger *RS = > NULL) const; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From resistor at mac.com Wed Oct 7 13:40:18 2009 From: resistor at mac.com (Owen Anderson) Date: Wed, 07 Oct 2009 18:40:18 -0000 Subject: [llvm-commits] [llvm] r83480 - /llvm/trunk/lib/CodeGen/LazyLiveness.cpp Message-ID: <200910071840.n97IeIQ3007255@zion.cs.uiuc.edu> Author: resistor Date: Wed Oct 7 13:40:17 2009 New Revision: 83480 URL: http://llvm.org/viewvc/llvm-project?rev=83480&view=rev Log: Remove LazyLiveness from the tree. It doesn't work right now, and I'm not going to have the time to finish it any time soon. If someone's interested it, they can resurrect it from SVN history. Removed: llvm/trunk/lib/CodeGen/LazyLiveness.cpp Removed: llvm/trunk/lib/CodeGen/LazyLiveness.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LazyLiveness.cpp?rev=83479&view=auto ============================================================================== --- llvm/trunk/lib/CodeGen/LazyLiveness.cpp (original) +++ llvm/trunk/lib/CodeGen/LazyLiveness.cpp (removed) @@ -1,168 +0,0 @@ -//===- LazyLiveness.cpp - Lazy, CFG-invariant liveness information --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass implements a lazy liveness analysis as per "Fast Liveness Checking -// for SSA-form Programs," by Boissinot, et al. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lazyliveness" -#include "llvm/CodeGen/LazyLiveness.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -using namespace llvm; - -char LazyLiveness::ID = 0; -static RegisterPass X("lazy-liveness", "Lazy Liveness Analysis"); - -void LazyLiveness::computeBackedgeChain(MachineFunction& mf, - MachineBasicBlock* MBB) { - SparseBitVector<128> tmp = rv[MBB]; - tmp.set(preorder[MBB]); - tmp &= backedge_source; - calculated.set(preorder[MBB]); - - for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) { - assert(rev_preorder.size() > *I && "Unknown block!"); - - MachineBasicBlock* SrcMBB = rev_preorder[*I]; - - for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin(), - SE = SrcMBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* TgtMBB = *SI; - - if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) && - !rv[MBB].test(preorder[TgtMBB])) { - if (!calculated.test(preorder[TgtMBB])) - computeBackedgeChain(mf, TgtMBB); - - tv[MBB].set(preorder[TgtMBB]); - SparseBitVector<128> right = tv[TgtMBB]; - tv[MBB] |= right; - } - } - - tv[MBB].reset(preorder[MBB]); - } -} - -bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) { - rv.clear(); - tv.clear(); - backedges.clear(); - backedge_source.clear(); - backedge_target.clear(); - calculated.clear(); - preorder.clear(); - rev_preorder.clear(); - - rv.resize(mf.size()); - tv.resize(mf.size()); - preorder.resize(mf.size()); - rev_preorder.reserve(mf.size()); - - MRI = &mf.getRegInfo(); - MachineDominatorTree& MDT = getAnalysis(); - - // Step 0: Compute preorder numbering for all MBBs. - unsigned num = 0; - for (df_iterator DI = df_begin(MDT.getRootNode()), - DE = df_end(MDT.getRootNode()); DI != DE; ++DI) { - preorder[(*DI)->getBlock()] = num++; - rev_preorder.push_back((*DI)->getBlock()); - } - - // Step 1: Compute the transitive closure of the CFG, ignoring backedges. - for (po_iterator POI = po_begin(&*mf.begin()), - POE = po_end(&*mf.begin()); POI != POE; ++POI) { - MachineBasicBlock* MBB = *POI; - SparseBitVector<128>& entry = rv[MBB]; - entry.set(preorder[MBB]); - - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - DenseMap >::iterator SII = - rv.find(*SI); - - // Because we're iterating in postorder, any successor that does not yet - // have an rv entry must be on a backedge. - if (SII != rv.end()) { - entry |= SII->second; - } else { - backedges.insert(std::make_pair(MBB, *SI)); - backedge_source.set(preorder[MBB]); - backedge_target.set(preorder[*SI]); - } - } - } - - for (SparseBitVector<128>::iterator I = backedge_source.begin(); - I != backedge_source.end(); ++I) - computeBackedgeChain(mf, rev_preorder[*I]); - - for (po_iterator POI = po_begin(&*mf.begin()), - POE = po_end(&*mf.begin()); POI != POE; ++POI) - if (!backedge_target.test(preorder[*POI])) - for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(), - SE = (*POI)->succ_end(); SI != SE; ++SI) - if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) { - SparseBitVector<128> right = tv[*SI]; - tv[*POI] |= right; - } - - for (po_iterator POI = po_begin(&*mf.begin()), - POE = po_end(&*mf.begin()); POI != POE; ++POI) - tv[*POI].set(preorder[*POI]); - - return false; -} - -bool LazyLiveness::vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB) { - MachineDominatorTree& MDT = getAnalysis(); - - MachineBasicBlock* DefMBB = MRI->def_begin(vreg)->getParent(); - unsigned def = preorder[DefMBB]; - unsigned max_dom = 0; - for (df_iterator DI = df_begin(MDT[DefMBB]), - DE = df_end(MDT[DefMBB]); DI != DE; ++DI) { - if (preorder[DI->getBlock()] > max_dom) { - max_dom = preorder[(*DI)->getBlock()]; - } - } - - if (preorder[MBB] <= def || max_dom < preorder[MBB]) - return false; - - SparseBitVector<128>::iterator I = tv[MBB].begin(); - while (I != tv[MBB].end() && *I <= def) ++I; - while (I != tv[MBB].end() && *I < max_dom) { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(vreg), - UE = MachineRegisterInfo::use_end(); UI != UE; ++UI) { - MachineBasicBlock* UseMBB = UI->getParent(); - if (rv[rev_preorder[*I]].test(preorder[UseMBB])) - return true; - - unsigned t_dom = 0; - for (df_iterator DI = - df_begin(MDT[rev_preorder[*I]]), DE = df_end(MDT[rev_preorder[*I]]); - DI != DE; ++DI) - if (preorder[DI->getBlock()] > t_dom) { - max_dom = preorder[(*DI)->getBlock()]; - } - I = tv[MBB].begin(); - while (I != tv[MBB].end() && *I < t_dom) ++I; - } - } - - return false; -} From grosbach at apple.com Wed Oct 7 13:44:24 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 07 Oct 2009 18:44:24 -0000 Subject: [llvm-commits] [llvm] r83481 - /llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Message-ID: <200910071844.n97IiOia007768@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 13:44:24 2009 New Revision: 83481 URL: http://llvm.org/viewvc/llvm-project?rev=83481&view=rev Log: add initializers for clarity. Add missing assignment of PrevLastUseOp. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83481&r1=83480&r2=83481&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 13:44:24 2009 @@ -782,8 +782,8 @@ unsigned CurrentScratchReg = 0; unsigned PrevScratchReg = 0; int PrevValue; - MachineInstr *PrevLastUseMI; - unsigned PrevLastUseOp; + MachineInstr *PrevLastUseMI = NULL; + unsigned PrevLastUseOp = 0; // The instruction stream may change in the loop, so check BB->end() // directly. @@ -875,6 +875,7 @@ if (MI->getOperand(i).isKill()) { PrevScratchReg = CurrentScratchReg; PrevLastUseMI = MI; + PrevLastUseOp = i; CurrentScratchReg = CurrentVirtReg = 0; } } From bob.wilson at apple.com Wed Oct 7 13:47:39 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 18:47:39 -0000 Subject: [llvm-commits] [llvm] r83482 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vst2.ll Message-ID: <200910071847.n97IldtV008177@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 13:47:39 2009 New Revision: 83482 URL: http://llvm.org/viewvc/llvm-project?rev=83482&view=rev Log: Add codegen support for NEON vst2 intrinsics with 128-bit vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vst2.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83482&r1=83481&r2=83482&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 13:47:39 2009 @@ -1563,17 +1563,41 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; - switch (N->getOperand(3).getValueType().getSimpleVT().SimpleTy) { + VT = N->getOperand(3).getValueType(); + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst2 type"); + case MVT::v8i8: Opc = ARM::VST2d8; break; + case MVT::v4i16: Opc = ARM::VST2d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VST2d32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6); + } + // Quad registers are stored as pairs of double registers. + EVT RegVT; + switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vst2 type"); - case MVT::v8i8: Opc = ARM::VST2d8; break; - case MVT::v4i16: Opc = ARM::VST2d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VST2d32; break; - } - SDValue Chain = N->getOperand(0); + case MVT::v16i8: Opc = ARM::VST2q8; RegVT = MVT::v8i8; break; + case MVT::v8i16: Opc = ARM::VST2q16; RegVT = MVT::v4i16; break; + case MVT::v4f32: Opc = ARM::VST2q32; RegVT = MVT::v2f32; break; + case MVT::v4i32: Opc = ARM::VST2q32; RegVT = MVT::v2i32; break; + } + SDValue Chain = N->getOperand(0); + SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(3)); + SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(3)); + SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(4)); + SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(4)); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), Chain }; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6); + D0, D1, D2, D3, Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8); } case Intrinsic::arm_neon_vst3: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83482&r1=83481&r2=83482&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 13:47:39 2009 @@ -316,11 +316,20 @@ class VST2D : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; +class VST2Q + : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + DPR:$src4), IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + "", []>; def VST2d8 : VST2D<"vst2.8">; def VST2d16 : VST2D<"vst2.16">; def VST2d32 : VST2D<"vst2.32">; +def VST2q8 : VST2Q<"vst2.8">; +def VST2q16 : VST2Q<"vst2.16">; +def VST2q32 : VST2Q<"vst2.32">; + // VST3 : Vector Store (multiple 3-element structures) class VST3D : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83482&r1=83481&r2=83482&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 13:47:39 2009 @@ -129,6 +129,13 @@ NumRegs = 2; return true; + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + FirstOpnd = 3; + NumRegs = 4; + return true; + case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: Modified: llvm/trunk/test/CodeGen/ARM/vst2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst2.ll?rev=83482&r1=83481&r2=83482&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vst2.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vst2.ll Wed Oct 7 13:47:39 2009 @@ -32,7 +32,44 @@ ret void } +define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst2Qi8: +;CHECK: vst2.8 + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1) + ret void +} + +define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst2Qi16: +;CHECK: vst2.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst2.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1) + ret void +} + +define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst2Qi32: +;CHECK: vst2.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst2.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1) + ret void +} + +define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst2Qf: +;CHECK: vst2.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst2.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1) + ret void +} + declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind + +declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind +declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind From grosbach at apple.com Wed Oct 7 14:08:36 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 07 Oct 2009 19:08:36 -0000 Subject: [llvm-commits] [llvm] r83483 - /llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Message-ID: <200910071908.n97J8a0h010903@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 14:08:36 2009 New Revision: 83483 URL: http://llvm.org/viewvc/llvm-project?rev=83483&view=rev Log: grammar Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83483&r1=83482&r2=83483&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 14:08:36 2009 @@ -828,7 +828,7 @@ if (PrevScratchReg && Value == PrevValue) { // FIXME: This assumes that the instructions in the live range // for the virtual register are exclusively for the purpose - // of populating the value in the register. That reasonable + // of populating the value in the register. That's reasonable // for these frame index registers, but it's still a very, very // strong assumption. Perhaps this implies that the frame index // elimination should be before register allocation, with From grosbach at apple.com Wed Oct 7 14:12:10 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 7 Oct 2009 12:12:10 -0700 Subject: [llvm-commits] [llvm] r83467 - in /llvm/trunk: include/llvm/CodeGen/ include/llvm/Target/ lib/CodeGen/ lib/Target/ARM/ lib/Target/Alpha/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MSP430/ lib/Target/Mips/ lib/Target/PIC16/ lib/Target/PowerPC/ lib/Target/Sparc/ lib/Target/SystemZ/ lib/Target/X86/ lib/Target/XCore/ In-Reply-To: References: <200910071712.n97HCvGJ028253@zion.cs.uiuc.edu> Message-ID: <38EE6921-502A-4F02-BD8E-C36062039025@apple.com> On Oct 7, 2009, at 11:38 AM, Evan Cheng wrote: > > On Oct 7, 2009, at 10:12 AM, Jim Grosbach wrote: > >> Author: grosbach >> Date: Wed Oct 7 12:12:56 2009 >> New Revision: 83467 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83467&view=rev >> Log: >> Add register-reuse to frame-index register scavenging. When a >> target uses >> a virtual register to eliminate a frame index, it can return that >> register >> and the constant stored there to PEI to track. When scavenging to >> allocate >> for those registers, PEI then tracks the last-used register and >> value, and >> if it is still available and matches the value for the next index, >> reuses >> the existing value rather and removes the re-materialization >> instructions. >> Fancier tracking and adjustment of scavenger allocations to keep more >> values live for longer is possible, but not yet implemented and >> would likely >> be better done via a different, less special-purpose, approach to the >> problem. > > Hi Jim, > > Is PEI responsible for allocating physical registers to the virtual > registers created during eliminateFrameIndex()? That's ok for now. > But the question is when that is moved to a separate pass does this > reuse scheme still work? The allocation is currently done in PEI, yes. There's nothing preventing moving it out; however, with a bit of analysis data passed between them. > > More comments below. > > Evan > >> >> eliminateFrameIndex() is modified so the target implementations can >> return >> the registers they wish to be tracked for reuse. >> >> ARM Thumb1 implements and utilizes the new mechanism. All other >> targets are >> simply modified to adjust for the changed eliminateFrameIndex() >> prototype. >> >> >> Modified: >> llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h >> llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >> llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >> llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >> llvm/trunk/lib/CodeGen/RegisterScavenging.cpp >> llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp >> llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h >> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >> llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp >> llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h >> llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp >> llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h >> llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp >> llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h >> llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp >> llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h >> llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp >> llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h >> llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp >> llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h >> llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp >> llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h >> llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp >> llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h >> llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp >> llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h >> llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp >> llvm/trunk/lib/Target/X86/X86RegisterInfo.h >> llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp >> llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h >> >> Modified: llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h (original) >> +++ llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h Wed Oct 7 >> 12:12:56 2009 >> @@ -117,6 +117,9 @@ >> return scavengeRegister(RegClass, MBBI, SPAdj); >> } >> >> + /// setUsed - Tell the scavenger a register is used. >> + /// >> + void setUsed(unsigned Reg); > > I'd prefer not to do this if it can be helped. RS should be entirely > responsible for tracking the liveness. Why is this needed? When we recognize that we can reuse a register, we reach back to the previously killing use and clear the kill flag, since the live range for the register has been extended. The scavenger state needs to be updated to know that the register still have a live value in it at this point, so we also need to flag it as used. Otherwise, the scavenger state isn't reflective of the new code. > >> private: >> /// isReserved - Returns true if a register is reserved. It is >> never "unused". >> bool isReserved(unsigned Reg) const { return ReservedRegs.test >> (Reg); } >> @@ -131,7 +134,6 @@ >> >> /// setUsed / setUnused - Mark the state of one or a number of >> registers. >> /// >> - void setUsed(unsigned Reg); >> void setUsed(BitVector &Regs) { >> RegsAvailable &= ~Regs; >> } >> >> Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) >> +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -660,8 +660,13 @@ >> /// specified instruction, as long as it keeps the iterator >> pointing the the >> /// finished product. SPAdj is the SP adjustment due to call frame >> setup >> /// instruction. >> - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, >> - int SPAdj, RegScavenger >> *RS=NULL) const = 0; >> + /// >> + /// When -enable-frame-index-scavenging is enabled, the virtual >> register >> + /// allocated for this frame index is returned and its value is >> stored in >> + /// *Value. >> + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator >> MI, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS=NULL) >> const = 0; >> >> /// emitProlog/emitEpilog - These methods insert prolog and epilog >> code into >> /// the function. >> >> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) >> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -655,6 +655,11 @@ >> int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); >> int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); >> >> + // Pre-allocate space for frame index mappings. If more space is >> needed, >> + // the map will be grown later. >> + if (FrameIndexVirtualScavenging) >> + FrameConstantRegMap.grow(Fn.getRegInfo().getLastVirtReg() + >> 128); >> + >> for (MachineFunction::iterator BB = Fn.begin(), >> E = Fn.end(); BB != E; ++BB) { >> int SPAdj = 0; // SP offset due to call frame setup / destroy. >> @@ -703,9 +708,17 @@ >> // If this instruction has a FrameIndex operand, we need to >> // use that target machine register info object to eliminate >> // it. >> - >> - TRI.eliminateFrameIndex(MI, SPAdj, >> FrameIndexVirtualScavenging ? >> - NULL : RS); >> + int Value; >> + unsigned VReg = >> + TRI.eliminateFrameIndex(MI, SPAdj, &Value, >> + FrameIndexVirtualScavenging ? >> NULL : RS); >> + if (VReg) { >> + assert (FrameIndexVirtualScavenging && >> + "Not scavenging, but virtual returned from " >> + "eliminateFrameIndex()!"); >> + FrameConstantRegMap.grow(VReg); >> + FrameConstantRegMap[VReg] = FrameConstantEntry(Value, >> SPAdj); >> + } >> >> // Reset the iterator if we were at the beginning of the BB. >> if (AtBeginning) { >> @@ -727,6 +740,35 @@ >> } >> } >> >> +/// findLastUseReg - find the killing use of the specified >> register within >> +/// the instruciton range. Return the operand number of the kill >> in Operand. >> +static MachineBasicBlock::iterator >> +findLastUseReg(MachineBasicBlock::iterator I, >> MachineBasicBlock::iterator ME, >> + unsigned Reg, unsigned *Operand) { >> + // Scan forward to find the last use of this virtual register >> + for (++I; I != ME; ++I) { >> + MachineInstr *MI = I; >> + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) >> + if (MI->getOperand(i).isReg()) { >> + unsigned OpReg = MI->getOperand(i).getReg(); >> + if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister >> (OpReg)) >> + continue; >> + assert (OpReg == Reg >> + && "overlapping use of scavenged index register!"); >> + // If this is the killing use, we're done >> + if (MI->getOperand(i).isKill()) { >> + if (Operand) >> + *Operand = i; >> + return I; >> + } >> + } >> + } >> + // If we hit the end of the basic block, there was no kill of >> + // the virtual register, which is wrong. >> + assert (0 && "scavenged index register never killed!"); >> + return ME; >> +} >> + >> /// scavengeFrameVirtualRegs - Replace all frame index virtual >> registers >> /// with physical registers. Use the register scavenger to find an >> /// appropriate register to use. >> @@ -738,12 +780,21 @@ >> >> unsigned CurrentVirtReg = 0; >> unsigned CurrentScratchReg = 0; >> + unsigned PrevScratchReg = 0; >> + int PrevValue; >> + MachineInstr *PrevLastUseMI; >> + unsigned PrevLastUseOp; >> >> + // The instruction stream may change in the loop, so check BB- >> >end() >> + // directly. >> for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); >> ++I) { >> MachineInstr *MI = I; >> - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) >> + // Likewise, call getNumOperands() each iteration, as the MI >> may change >> + // inside the loop (with 'i' updated accordingly). > > Why not just re-compute the end limit when MI is updated? We could. To me the code just reads more clearly this way. i.e., stylistic choice. If you prefer, I'll change it. No big deal to me either way. > >> + for (unsigned i = 0; i != MI->getNumOperands(); ++i) >> if (MI->getOperand(i).isReg()) { >> - unsigned Reg = MI->getOperand(i).getReg(); >> + MachineOperand &MO = MI->getOperand(i); >> + unsigned Reg = MO.getReg(); >> if (Reg == 0) >> continue; >> if (!TargetRegisterInfo::isVirtualRegister(Reg)) { >> @@ -751,33 +802,81 @@ >> // seeing any references to it. >> assert (Reg != CurrentScratchReg >> && "overlapping use of scavenged frame index >> register!"); >> + >> + // If we have a previous scratch reg, check and see if >> anything >> + // here kills whatever value is in there. >> + if (Reg == PrevScratchReg) { >> + if (MO.isUse()) { >> + // Two-address operands implicitly kill >> + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) >> + PrevScratchReg = 0; >> + } else { >> + assert (MO.isDef()); >> + PrevScratchReg = 0; >> + } >> + } >> continue; >> } >> >> // If we already have a scratch for this virtual register, >> use it >> if (Reg != CurrentVirtReg) { >> - // When we first encounter a new virtual register, it >> - // must be a definition. >> - assert(MI->getOperand(i).isDef() && >> - "frame index virtual missing def!"); >> - // We can't have nested virtual register live ranges >> because >> - // there's only a guarantee of one scavenged register >> at a time. >> - assert (CurrentVirtReg == 0 && >> - "overlapping frame index virtual registers!"); >> - CurrentVirtReg = Reg; >> - const TargetRegisterClass *RC = Fn.getRegInfo >> ().getRegClass(Reg); >> - CurrentScratchReg = RS->FindUnusedReg(RC); >> - if (CurrentScratchReg == 0) >> - // No register is "free". Scavenge a register. >> - // FIXME: Track SPAdj. Zero won't always be right >> - CurrentScratchReg = RS->scavengeRegister(RC, I, 0); >> + int Value = FrameConstantRegMap[Reg].first; >> + int SPAdj = FrameConstantRegMap[Reg].second; >> + >> + // If the scratch register from the last allocation is >> still >> + // available, see if the value matches. If it does, >> just re-use it. >> + if (PrevScratchReg && Value == PrevValue) { > > This means the reuse can only happen when you have consecutive uses > of the same frame indices. That seems very restrictive. The > implementation makes it difficult to separate the allocation phase > from PEI. The former is currently true, yes. It seemed a reasonable subset of the general problem to solve. Doing more would require tracking the liveness of multiple values, and I'm concerned about worst case performance if I do too much of that sort of thing when iterating over the instruction list. I'm not sure I follow why it is more difficult to separate into another pass due to implementation. There are some inherent complications that make it tricky, however, yes. Specifically, I think the constant value references need help from the target. I'll think about this a bit more and see if that's always true, or if there's something we can do about it. If that can be simplified, it would be great. > > >> + // FIXME: This assumes that the instructions in the >> live range >> + // for the virtual register are exclusively for the >> purpose >> + // of populating the value in the register. That >> reasonable > > That -> That's. Woops. Thanks. Fixed. > >> + // for these frame index registers, but it's still a >> very, very >> + // strong assumption. Perhaps this implies that the >> frame index >> + // elimination should be before register allocation, >> with >> + // conservative heuristics since we'll know less >> then, and >> + // the reuse calculations done directly when doing >> the code-gen? > > This can be solved later. Agreed. Just putting it as a fixme to remind us. > >> + >> + // Find the last use of the new virtual register. >> Remove all >> + // instruction between here and there, and update >> the current >> + // instruction to reference the last use insn instead. >> + MachineBasicBlock::iterator LastUseMI = >> + findLastUseReg(I, BB->end(), Reg, &i); > >> + // Remove all instructions up 'til the last use, >> since they're >> + // just calculating the value we already have. >> + BB->erase(I, LastUseMI); >> + MI = I = LastUseMI; > > Rather than doing this, you could simply continue to iterate forward > until you have reached the kill. There's a couple ways I think the code can be adjusted to have a cleaner flow. This is definitely a good example. > >> + >> + CurrentScratchReg = PrevScratchReg; >> + // Extend the live range of the register >> + PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill >> (false); >> + RS->setUsed(CurrentScratchReg); >> + } else { >> + // When we first encounter a new virtual register, it >> + // must be a definition. >> + assert(MI->getOperand(i).isDef() && >> + "frame index virtual missing def!"); >> + // We can't have nested virtual register live ranges >> because >> + // there's only a guarantee of one scavenged >> register at a time. >> + assert (CurrentVirtReg == 0 && >> + "overlapping frame index virtual registers!"); >> + CurrentVirtReg = Reg; >> + const TargetRegisterClass *RC = Fn.getRegInfo >> ().getRegClass(Reg); >> + CurrentScratchReg = RS->FindUnusedReg(RC); >> + if (CurrentScratchReg == 0) >> + // No register is "free". Scavenge a register. >> + CurrentScratchReg = RS->scavengeRegister(RC, I, >> SPAdj); >> + >> + PrevValue = Value; >> + } >> } >> assert (CurrentScratchReg && "Missing scratch register!"); >> MI->getOperand(i).setReg(CurrentScratchReg); >> >> // If this is the last use of the register, stop tracking it. >> - if (MI->getOperand(i).isKill()) >> + if (MI->getOperand(i).isKill()) { >> + PrevScratchReg = CurrentScratchReg; >> + PrevLastUseMI = MI; >> CurrentScratchReg = CurrentVirtReg = 0; >> + } >> } >> RS->forward(MI); >> } >> >> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) >> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 >> 12:12:56 2009 >> @@ -27,6 +27,8 @@ >> #include "llvm/CodeGen/MachineLoopInfo.h" >> #include "llvm/ADT/SparseBitVector.h" >> #include "llvm/ADT/DenseMap.h" >> +#include "llvm/ADT/IndexedMap.h" >> +#include "llvm/Target/TargetRegisterInfo.h" >> >> namespace llvm { >> class RegScavenger; >> @@ -93,6 +95,12 @@ >> // functions. >> bool ShrinkWrapThisFunction; >> >> + // When using the scavenger post-pass to resolve frame reference >> + // materialization registers, maintain a map of the registers to >> + // the constant value and SP adjustment associated with it. >> + typedef std::pair FrameConstantEntry; >> + IndexedMap >> FrameConstantRegMap; >> + >> #ifndef NDEBUG >> // Machine function handle. >> MachineFunction* MF; >> >> Modified: llvm/trunk/lib/CodeGen/RegisterScavenging.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterScavenging.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/CodeGen/RegisterScavenging.cpp (original) >> +++ llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -306,7 +306,7 @@ >> "Cannot scavenge register without an emergency spill >> slot!"); >> TII->storeRegToStackSlot(*MBB, I, SReg, true, >> ScavengingFrameIndex, RC); >> MachineBasicBlock::iterator II = prior(I); >> - TRI->eliminateFrameIndex(II, SPAdj, this); >> + TRI->eliminateFrameIndex(II, SPAdj, NULL, this); >> >> // Restore the scavenged register before its use (or first >> terminator). >> TII->loadRegFromStackSlot(*MBB, UseMI, SReg, >> ScavengingFrameIndex, RC); >> >> Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -1023,9 +1023,10 @@ >> return Reg; >> } >> >> -void >> +unsigned >> ARMBaseRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger >> *RS) const { >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> unsigned i = 0; >> MachineInstr &MI = *II; >> MachineBasicBlock &MBB = *MI.getParent(); >> @@ -1067,7 +1068,7 @@ >> Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); >> } >> if (Done) >> - return; >> + return 0; >> >> // If we get here, the immediate doesn't fit into the instruction. >> We folded >> // as much as possible above, handle the rest, providing a register >> that is >> @@ -1102,6 +1103,7 @@ >> } >> MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); >> } >> + return 0; >> } >> >> /// Move iterator pass the next bunch of callee save load / store >> ops for >> >> Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -128,8 +128,9 @@ >> MachineBasicBlock &MBB, >> >> MachineBasicBlock::iterator I) const; >> >> - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = >> NULL) const; >> + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator >> II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) >> const; >> >> virtual void emitPrologue(MachineFunction &MF) const; >> virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock >> &MBB) const; >> >> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -427,8 +427,11 @@ >> TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); >> } >> >> -void Thumb1RegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, >> RegScavenger *RS) const{ >> +unsigned >> +Thumb1RegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const{ >> + unsigned VReg = 0; >> unsigned i = 0; >> MachineInstr &MI = *II; >> MachineBasicBlock &MBB = *MI.getParent(); >> @@ -484,7 +487,7 @@ >> MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); >> MI.getOperand(i).ChangeToRegister(FrameReg, false); >> MI.RemoveOperand(i+1); >> - return; >> + return 0; >> } >> >> // Common case: small offset, fits into instruction. >> @@ -500,7 +503,7 @@ >> MI.getOperand(i).ChangeToRegister(FrameReg, false); >> MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); >> } >> - return; >> + return 0; >> } >> >> unsigned DestReg = MI.getOperand(0).getReg(); >> @@ -512,7 +515,7 @@ >> emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, >> TII, >> *this, dl); >> MBB.erase(II); >> - return; >> + return 0; >> } >> >> if (Offset > 0) { >> @@ -545,7 +548,7 @@ >> AddDefaultPred(MIB); >> } >> } >> - return; >> + return 0; >> } else { >> unsigned ImmIdx = 0; >> int InstrOffs = 0; >> @@ -575,7 +578,7 @@ >> // Replace the FrameIndex with sp >> MI.getOperand(i).ChangeToRegister(FrameReg, false); >> ImmOp.ChangeToImmediate(ImmedOffset); >> - return; >> + return 0; >> } >> >> bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == >> ARM::tSpill; >> @@ -633,22 +636,24 @@ >> MI.addOperand(MachineOperand::CreateReg(0, false)); >> } else if (Desc.mayStore()) { >> if (FrameIndexVirtualScavenging) { >> - unsigned TmpReg = >> - MF.getRegInfo().createVirtualRegister >> (ARM::tGPRRegisterClass); >> + VReg = MF.getRegInfo().createVirtualRegister >> (ARM::tGPRRegisterClass); >> + assert (Value && "Frame index virtual allocated, but Value >> arg is NULL!"); >> + *Value = Offset; >> bool UseRR = false; >> + >> if (Opcode == ARM::tSpill) { >> if (FrameReg == ARM::SP) >> - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, >> + emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, >> Offset, false, TII, *this, dl); >> else { >> - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); >> + emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); >> UseRR = true; >> } >> } else >> - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, >> Offset, TII, >> + emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, >> TII, >> *this, dl); >> MI.setDesc(TII.get(ARM::tSTR)); >> - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); >> + MI.getOperand(i).ChangeToRegister(VReg, false, false, true); >> if (UseRR) // Use [reg, reg] addrmode. >> MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); >> else // tSTR has an extra register operand. >> @@ -707,6 +712,7 @@ >> MachineInstrBuilder MIB(&MI); >> AddDefaultPred(MIB); >> } >> + return VReg; >> } >> >> void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { >> >> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -62,8 +62,9 @@ >> MachineBasicBlock::iterator I, >> const TargetRegisterClass *RC, >> unsigned Reg) const; >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> void emitPrologue(MachineFunction &MF) const; >> void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; >> >> Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -151,8 +151,10 @@ >> //variable locals >> //<- SP >> >> -void AlphaRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, >> RegScavenger *RS) const { >> +unsigned >> +AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >> II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> assert(SPAdj == 0 && "Unexpected"); >> >> unsigned i = 0; >> @@ -197,6 +199,7 @@ >> } else { >> MI.getOperand(i).ChangeToImmediate(Offset); >> } >> + return 0; >> } >> >> >> >> Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -41,8 +41,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator I) >> const; >> >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> //void processFunctionBeforeFrameFinalized(MachineFunction &MF) >> const; >> >> >> Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp >> (original) >> +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp Wed >> Oct 7 12:12:56 2009 >> @@ -219,9 +219,10 @@ >> return Reg; >> } >> >> -void BlackfinRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, >> - RegScavenger *RS) >> const { >> +unsigned >> +BlackfinRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> MachineInstr &MI = *II; >> MachineBasicBlock &MBB = *MI.getParent(); >> MachineFunction &MF = *MBB.getParent(); >> @@ -258,20 +259,20 @@ >> MI.setDesc(TII.get(isStore >> ? BF::STORE32p_uimm6m4 >> : BF::LOAD32p_uimm6m4)); >> - return; >> + return 0; >> } >> if (BaseReg == BF::FP && isUint<7>(-Offset)) { >> MI.setDesc(TII.get(isStore >> ? BF::STORE32fp_nimm7m4 >> : BF::LOAD32fp_nimm7m4)); >> MI.getOperand(FIPos+1).setImm(-Offset); >> - return; >> + return 0; >> } >> if (isInt<18>(Offset)) { >> MI.setDesc(TII.get(isStore >> ? BF::STORE32p_imm18m4 >> : BF::LOAD32p_imm18m4)); >> - return; >> + return 0; >> } >> // Use RegScavenger to calculate proper offset... >> MI.dump(); >> @@ -356,6 +357,7 @@ >> llvm_unreachable("Cannot eliminate frame index"); >> break; >> } >> + return 0; >> } >> >> void BlackfinRegisterInfo:: >> >> Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h Wed Oct >> 7 12:12:56 2009 >> @@ -64,8 +64,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator I) >> const; >> >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >> RegScavenger *RS) const; >> >> Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -326,9 +326,9 @@ >> MBB.erase(I); >> } >> >> -void >> +unsigned >> SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >> II, int SPAdj, >> - RegScavenger *RS) const >> + int *Value, RegScavenger *RS) >> const >> { >> unsigned i = 0; >> MachineInstr &MI = *II; >> @@ -371,6 +371,7 @@ >> } else { >> MO.ChangeToImmediate(Offset); >> } >> + return 0; >> } >> >> /// determineFrameLayout - Determine the size of the frame and >> maximum call >> >> Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -63,8 +63,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator I) >> const; >> //! Convert frame indicies into machine operands >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, int, >> - RegScavenger *RS) const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> int SPAdj, >> + int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> //! Determine the frame's layour >> void determineFrameLayout(MachineFunction &MF) const; >> >> >> Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -147,9 +147,10 @@ >> MBB.erase(I); >> } >> >> -void >> +unsigned >> MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >> II, >> - int SPAdj, RegScavenger >> *RS) const { >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> assert(SPAdj == 0 && "Unexpected"); >> >> unsigned i = 0; >> @@ -187,7 +188,7 @@ >> MI.getOperand(i).ChangeToRegister(BasePtr, false); >> >> if (Offset == 0) >> - return; >> + return 0; >> >> // We need to materialize the offset via add instruction. >> unsigned DstReg = MI.getOperand(0).getReg(); >> @@ -198,11 +199,12 @@ >> BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) >> .addReg(DstReg).addImm(Offset); >> >> - return; >> + return 0; >> } >> >> MI.getOperand(i).ChangeToRegister(BasePtr, false); >> MI.getOperand(i+1).ChangeToImmediate(Offset); >> + return 0; >> } >> >> void >> >> Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -49,8 +49,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator I) >> const; >> >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> void emitPrologue(MachineFunction &MF) const; >> void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; >> >> Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -348,9 +348,9 @@ >> // FrameIndex represent objects inside a abstract stack. >> // We must replace FrameIndex with an stack/frame pointer >> // direct reference. >> -void MipsRegisterInfo:: >> -eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, >> - RegScavenger *RS) const >> +unsigned MipsRegisterInfo:: >> +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, >> + int *Value, RegScavenger *RS) const >> { >> MachineInstr &MI = *II; >> MachineFunction &MF = *MI.getParent()->getParent(); >> @@ -382,6 +382,7 @@ >> >> MI.getOperand(i-1).ChangeToImmediate(Offset); >> MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); >> + return 0; >> } >> >> void MipsRegisterInfo:: >> >> Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -54,8 +54,9 @@ >> MachineBasicBlock::iterator I) >> const; >> >> /// Stack Frame Processing Methods >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; >> >> >> Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -51,10 +51,13 @@ >> return false; >> } >> >> -void PIC16RegisterInfo:: >> +unsigned PIC16RegisterInfo:: >> eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, >> - RegScavenger *RS) const >> -{ /* NOT YET IMPLEMENTED */ } >> + int *Value, RegScavenger *RS) const >> +{ >> + /* NOT YET IMPLEMENTED */ >> + return 0; >> +} >> >> void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const >> { /* NOT YET IMPLEMENTED */ } >> >> Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -48,8 +48,9 @@ >> virtual BitVector getReservedRegs(const MachineFunction &MF) const; >> virtual bool hasFP(const MachineFunction &MF) const; >> >> - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, >> - int SPAdj, RegScavenger *RS=NULL) const; >> + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator >> MI, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS=NULL) const; >> >> void eliminateCallFramePseudoInstr(MachineFunction &MF, >> MachineBasicBlock &MBB, >> >> Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -699,8 +699,10 @@ >> MBB.erase(II); >> } >> >> -void PPCRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger >> *RS) const { >> +unsigned >> +PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> assert(SPAdj == 0 && "Unexpected"); >> >> // Get the instruction. >> @@ -739,14 +741,14 @@ >> if (FPSI && FrameIndex == FPSI && >> (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { >> lowerDynamicAlloc(II, SPAdj, RS); >> - return; >> + return 0; >> } >> >> // Special case for pseudo-op SPILL_CR. >> if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. >> if (OpC == PPC::SPILL_CR) { >> lowerCRSpilling(II, FrameIndex, SPAdj, RS); >> - return; >> + return 0; >> } >> >> // Replace the FrameIndex with base register with GPR1 (SP) or >> GPR31 (FP). >> @@ -788,7 +790,7 @@ >> if (isIXAddr) >> Offset >>= 2; // The actual encoded value has the low two >> bits zero. >> MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); >> - return; >> + return 0; >> } >> >> // The offset doesn't fit into a single register, scavenge one to >> build the >> @@ -828,6 +830,7 @@ >> unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); >> MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); >> MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); >> + return 0; >> } >> >> /// VRRegNo - Map from a numbered VR register to its enum value. >> >> Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -66,8 +66,9 @@ >> int SPAdj, RegScavenger *RS) const; >> void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned >> FrameIndex, >> int SPAdj, RegScavenger *RS) const; >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> /// determineFrameLayout - Determine the size of the frame and >> maximum call >> /// frame size. >> >> Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -76,8 +76,10 @@ >> MBB.erase(I); >> } >> >> -void SparcRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, >> RegScavenger *RS) const { >> +unsigned >> +SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >> II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> assert(SPAdj == 0 && "Unexpected"); >> >> unsigned i = 0; >> @@ -113,6 +115,7 @@ >> MI.getOperand(i).ChangeToRegister(SP::G1, false); >> MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); >> } >> + return 0; >> } >> >> void SparcRegisterInfo:: >> >> Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -43,8 +43,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator I) >> const; >> >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; >> >> >> Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Wed Oct >> 7 12:12:56 2009 >> @@ -107,8 +107,10 @@ >> return Offset; >> } >> >> -void SystemZRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, >> RegScavenger *RS) const { >> +unsigned >> +SystemZRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> assert(SPAdj == 0 && "Unxpected"); >> >> unsigned i = 0; >> @@ -136,6 +138,7 @@ >> MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); >> >> MI.getOperand(i+1).ChangeToImmediate(Offset); >> + return 0; >> } >> >> void >> >> Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -55,8 +55,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator I) >> const; >> >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> >> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >> >> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -579,8 +579,10 @@ >> MBB.erase(I); >> } >> >> -void X86RegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger >> *RS) const{ >> +unsigned >> +X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const{ >> assert(SPAdj == 0 && "Unexpected"); >> >> unsigned i = 0; >> @@ -617,6 +619,7 @@ >> (uint64_t)MI.getOperand(i+3).getOffset(); >> MI.getOperand(i+3).setOffset(Offset); >> } >> + return 0; >> } >> >> void >> >> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Oct 7 12:12:56 >> 2009 >> @@ -139,8 +139,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator MI) >> const; >> >> - void eliminateFrameIndex(MachineBasicBlock::iterator MI, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >> RegScavenger *RS = NULL) >> const; >> >> Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp Wed Oct 7 >> 12:12:56 2009 >> @@ -171,8 +171,10 @@ >> MBB.erase(I); >> } >> >> -void XCoreRegisterInfo::eliminateFrameIndex >> (MachineBasicBlock::iterator II, >> - int SPAdj, >> RegScavenger *RS) const { >> +unsigned >> +XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >> II, >> + int SPAdj, int *Value, >> + RegScavenger *RS) const { >> assert(SPAdj == 0 && "Unexpected"); >> MachineInstr &MI = *II; >> DebugLoc dl = MI.getDebugLoc(); >> @@ -311,6 +313,7 @@ >> } >> // Erase old instruction. >> MBB.erase(II); >> + return 0; >> } >> >> void >> >> Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h Wed Oct 7 >> 12:12:56 2009 >> @@ -57,8 +57,9 @@ >> MachineBasicBlock &MBB, >> MachineBasicBlock::iterator I) >> const; >> >> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >> - int SPAdj, RegScavenger *RS = NULL) >> const; >> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >> + int SPAdj, int *Value = NULL, >> + RegScavenger *RS = NULL) const; >> >> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >> RegScavenger *RS = >> NULL) const; >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From tonic at nondot.org Wed Oct 7 14:42:34 2009 From: tonic at nondot.org (Tanya Lattner) Date: Wed, 7 Oct 2009 14:42:34 -0500 Subject: [llvm-commits] CVS: llvm-www/devmtg/2009-10/index.php Message-ID: <200910071942.n97JgYAY015410@zion.cs.uiuc.edu> Changes in directory llvm-www/devmtg/2009-10: index.php updated: 1.39 -> 1.40 --- Log message: Videos are going to take awhile.. relax. --- Diffs of the changes: (+2 -0) index.php | 2 ++ 1 files changed, 2 insertions(+) Index: llvm-www/devmtg/2009-10/index.php diff -u llvm-www/devmtg/2009-10/index.php:1.39 llvm-www/devmtg/2009-10/index.php:1.40 --- llvm-www/devmtg/2009-10/index.php:1.39 Thu Oct 1 21:32:12 2009 +++ llvm-www/devmtg/2009-10/index.php Wed Oct 7 14:41:35 2009 @@ -85,6 +85,8 @@
Agenda
+

Videos will take 2-4 weeks to be online. Please be patient.

+

2009 LLVM Developers' Meeting Agenda:

From bob.wilson at apple.com Wed Oct 7 15:30:10 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 20:30:10 -0000 Subject: [llvm-commits] [llvm] r83484 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vst3.ll Message-ID: <200910072030.n97KUB50021635@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 15:30:08 2009 New Revision: 83484 URL: http://llvm.org/viewvc/llvm-project?rev=83484&view=rev Log: Add codegen support for NEON vst3 intrinsics with 128-bit vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vst3.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83484&r1=83483&r2=83484&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 15:30:08 2009 @@ -1604,18 +1604,64 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; - switch (N->getOperand(3).getValueType().getSimpleVT().SimpleTy) { + VT = N->getOperand(3).getValueType(); + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst3 type"); + case MVT::v8i8: Opc = ARM::VST3d8; break; + case MVT::v4i16: Opc = ARM::VST3d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VST3d32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7); + } + // Quad registers are stored with two separate instructions, where one + // stores the even registers and the other stores the odd registers. + EVT RegVT; + unsigned Opc2 = 0; + switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vst3 type"); - case MVT::v8i8: Opc = ARM::VST3d8; break; - case MVT::v4i16: Opc = ARM::VST3d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VST3d32; break; + case MVT::v16i8: + Opc = ARM::VST3q8a; Opc2 = ARM::VST3q8b; RegVT = MVT::v8i8; break; + case MVT::v8i16: + Opc = ARM::VST3q16a; Opc2 = ARM::VST3q16b; RegVT = MVT::v4i16; break; + case MVT::v4f32: + Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2f32; break; + case MVT::v4i32: + Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2i32; break; } SDValue Chain = N->getOperand(0); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), Chain }; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7); + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(3)); + SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(4)); + SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(5)); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, D0, D2, D4, Chain }; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, OpsA, 7); + Chain = SDValue(VStA, 1); + + SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(3)); + SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(4)); + SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(5)); + MemAddr = SDValue(VStA, 0); + const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, D1, D3, D5, Chain }; + SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(), + MVT::Other, OpsB, 7); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; } case Intrinsic::arm_neon_vst4: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83484&r1=83483&r2=83484&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 15:30:08 2009 @@ -335,11 +335,26 @@ : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; +class VST3WB + : NLdSt<(outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, + DPR:$src3), IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), + "$addr.addr = $wb", []>; def VST3d8 : VST3D<"vst3.8">; def VST3d16 : VST3D<"vst3.16">; def VST3d32 : VST3D<"vst3.32">; +// vst3 to double-spaced even registers. +def VST3q8a : VST3WB<"vst3.8">; +def VST3q16a : VST3WB<"vst3.16">; +def VST3q32a : VST3WB<"vst3.32">; + +// vst3 to double-spaced odd registers. +def VST3q8b : VST3WB<"vst3.8">; +def VST3q16b : VST3WB<"vst3.16">; +def VST3q32b : VST3WB<"vst3.32">; + // VST4 : Vector Store (multiple 4-element structures) class VST4D : NLdSt<(outs), (ins addrmode6:$addr, Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83484&r1=83483&r2=83484&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 15:30:08 2009 @@ -146,6 +146,24 @@ NumRegs = 3; return true; + case ARM::VST3q8a: + case ARM::VST3q16a: + case ARM::VST3q32a: + FirstOpnd = 4; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VST3q8b: + case ARM::VST3q16b: + case ARM::VST3q32b: + FirstOpnd = 4; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + case ARM::VST4d8: case ARM::VST4d16: case ARM::VST4d32: Modified: llvm/trunk/test/CodeGen/ARM/vst3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst3.ll?rev=83484&r1=83483&r2=83484&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vst3.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vst3.ll Wed Oct 7 15:30:08 2009 @@ -32,7 +32,48 @@ ret void } +define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst3Qi8: +;CHECK: vst3.8 +;CHECK: vst3.8 + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + ret void +} + +define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst3Qi16: +;CHECK: vst3.16 +;CHECK: vst3.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst3.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + ret void +} + +define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst3Qi32: +;CHECK: vst3.32 +;CHECK: vst3.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst3.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + ret void +} + +define void @vst3Qf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst3Qf: +;CHECK: vst3.32 +;CHECK: vst3.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst3.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + ret void +} + declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind + +declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind From bob.wilson at apple.com Wed Oct 7 15:49:18 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 20:49:18 -0000 Subject: [llvm-commits] [llvm] r83486 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vst4.ll Message-ID: <200910072049.n97KnI3I024181@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 15:49:18 2009 New Revision: 83486 URL: http://llvm.org/viewvc/llvm-project?rev=83486&view=rev Log: Add codegen support for NEON vst4 intrinsics with 128-bit vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vst4.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83486&r1=83485&r2=83486&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 15:49:18 2009 @@ -1668,18 +1668,70 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; - switch (N->getOperand(3).getValueType().getSimpleVT().SimpleTy) { + VT = N->getOperand(3).getValueType(); + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst4 type"); + case MVT::v8i8: Opc = ARM::VST4d8; break; + case MVT::v4i16: Opc = ARM::VST4d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VST4d32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), N->getOperand(6), Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8); + } + // Quad registers are stored with two separate instructions, where one + // stores the even registers and the other stores the odd registers. + EVT RegVT; + unsigned Opc2 = 0; + switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vst4 type"); - case MVT::v8i8: Opc = ARM::VST4d8; break; - case MVT::v4i16: Opc = ARM::VST4d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VST4d32; break; + case MVT::v16i8: + Opc = ARM::VST4q8a; Opc2 = ARM::VST4q8b; RegVT = MVT::v8i8; break; + case MVT::v8i16: + Opc = ARM::VST4q16a; Opc2 = ARM::VST4q16b; RegVT = MVT::v4i16; break; + case MVT::v4f32: + Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2f32; break; + case MVT::v4i32: + Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2i32; break; } SDValue Chain = N->getOperand(0); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), N->getOperand(6), Chain }; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8); + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(3)); + SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(4)); + SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(5)); + SDValue D6 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(6)); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, + D0, D2, D4, D6, Chain }; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, OpsA, 8); + Chain = SDValue(VStA, 1); + + SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(3)); + SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(4)); + SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(5)); + SDValue D7 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(6)); + MemAddr = SDValue(VStA, 0); + const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, + D1, D3, D5, D7, Chain }; + SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(), + MVT::Other, OpsB, 8); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; } case Intrinsic::arm_neon_vst2lane: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83486&r1=83485&r2=83486&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 15:49:18 2009 @@ -361,11 +361,26 @@ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), "", []>; +class VST4WB + : NLdSt<(outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, + DPR:$src3, DPR:$src4), IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + "$addr.addr = $wb", []>; def VST4d8 : VST4D<"vst4.8">; def VST4d16 : VST4D<"vst4.16">; def VST4d32 : VST4D<"vst4.32">; +// vst4 to double-spaced even registers. +def VST4q8a : VST4WB<"vst4.8">; +def VST4q16a : VST4WB<"vst4.16">; +def VST4q32a : VST4WB<"vst4.32">; + +// vst4 to double-spaced odd registers. +def VST4q8b : VST4WB<"vst4.8">; +def VST4q16b : VST4WB<"vst4.16">; +def VST4q32b : VST4WB<"vst4.32">; + // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LND : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83486&r1=83485&r2=83486&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 15:49:18 2009 @@ -174,6 +174,24 @@ NumRegs = 4; return true; + case ARM::VST4q8a: + case ARM::VST4q16a: + case ARM::VST4q32a: + FirstOpnd = 4; + NumRegs = 4; + Offset = 0; + Stride = 2; + return true; + + case ARM::VST4q8b: + case ARM::VST4q16b: + case ARM::VST4q32b: + FirstOpnd = 4; + NumRegs = 4; + Offset = 1; + Stride = 2; + return true; + case ARM::VTBL2: FirstOpnd = 1; NumRegs = 2; Modified: llvm/trunk/test/CodeGen/ARM/vst4.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst4.ll?rev=83486&r1=83485&r2=83486&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vst4.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vst4.ll Wed Oct 7 15:49:18 2009 @@ -32,7 +32,48 @@ ret void } +define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst4Qi8: +;CHECK: vst4.8 +;CHECK: vst4.8 + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + ret void +} + +define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst4Qi16: +;CHECK: vst4.16 +;CHECK: vst4.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst4.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + ret void +} + +define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst4Qi32: +;CHECK: vst4.32 +;CHECK: vst4.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst4.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + ret void +} + +define void @vst4Qf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst4Qf: +;CHECK: vst4.32 +;CHECK: vst4.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst4.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + ret void +} + declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind + +declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind +declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind From bob.wilson at apple.com Wed Oct 7 15:51:42 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 20:51:42 -0000 Subject: [llvm-commits] [llvm] r83487 - /llvm/trunk/test/CodeGen/ARM/vicmp.ll Message-ID: <200910072051.n97KpgoA024489@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 15:51:42 2009 New Revision: 83487 URL: http://llvm.org/viewvc/llvm-project?rev=83487&view=rev Log: Convert test to FileCheck. Modified: llvm/trunk/test/CodeGen/ARM/vicmp.ll Modified: llvm/trunk/test/CodeGen/ARM/vicmp.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vicmp.ll?rev=83487&r1=83486&r2=83487&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vicmp.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vicmp.ll Wed Oct 7 15:51:42 2009 @@ -1,12 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vceq\\.i8} %t | count 2 -; RUN: grep {vceq\\.i16} %t | count 2 -; RUN: grep {vceq\\.i32} %t | count 2 -; RUN: grep vmvn %t | count 6 -; RUN: grep {vcgt\\.s8} %t | count 1 -; RUN: grep {vcge\\.s16} %t | count 1 -; RUN: grep {vcgt\\.u16} %t | count 1 -; RUN: grep {vcge\\.u32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; This tests icmp operations that do not map directly to NEON instructions. ; Not-equal (ne) operations are implemented by VCEQ/VMVN. Less-than (lt/ult) @@ -15,6 +7,9 @@ ; the other operations. define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vcnei8: +;CHECK: vceq.i8 +;CHECK-NEXT: vmvn %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = icmp ne <8 x i8> %tmp1, %tmp2 @@ -23,6 +18,9 @@ } define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcnei16: +;CHECK: vceq.i16 +;CHECK-NEXT: vmvn %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = icmp ne <4 x i16> %tmp1, %tmp2 @@ -31,6 +29,9 @@ } define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vcnei32: +;CHECK: vceq.i32 +;CHECK-NEXT: vmvn %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = icmp ne <2 x i32> %tmp1, %tmp2 @@ -39,6 +40,9 @@ } define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcneQi8: +;CHECK: vceq.i8 +;CHECK-NEXT: vmvn %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = icmp ne <16 x i8> %tmp1, %tmp2 @@ -47,6 +51,9 @@ } define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vcneQi16: +;CHECK: vceq.i16 +;CHECK-NEXT: vmvn %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = icmp ne <8 x i16> %tmp1, %tmp2 @@ -55,6 +62,9 @@ } define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcneQi32: +;CHECK: vceq.i32 +;CHECK-NEXT: vmvn %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = icmp ne <4 x i32> %tmp1, %tmp2 @@ -63,6 +73,8 @@ } define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcltQs8: +;CHECK: vcgt.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = icmp slt <16 x i8> %tmp1, %tmp2 @@ -71,6 +83,8 @@ } define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcles16: +;CHECK: vcge.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = icmp sle <4 x i16> %tmp1, %tmp2 @@ -79,6 +93,8 @@ } define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcltu16: +;CHECK: vcgt.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = icmp ult <4 x i16> %tmp1, %tmp2 @@ -87,6 +103,8 @@ } define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcleQu32: +;CHECK: vcge.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = icmp ule <4 x i32> %tmp1, %tmp2 From kennethuil at gmail.com Wed Oct 7 15:56:24 2009 From: kennethuil at gmail.com (Kenneth Uildriks) Date: Wed, 7 Oct 2009 15:56:24 -0500 Subject: [llvm-commits] [PATCH] Additions to C-bindings In-Reply-To: <400d33ea0910070606t4d3fbf61nd07c20263cb80e4c@mail.gmail.com> References: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> <400d33ea0910070452j1fd69311n8caa49e79d3c7967@mail.gmail.com> <400d33ea0910070606t4d3fbf61nd07c20263cb80e4c@mail.gmail.com> Message-ID: <400d33ea0910071356r6f095844y6610ed092039312f@mail.gmail.com> Also, I have seen occasional comments to the effect that the C-bindings have some gaps in them, and I think filling them would be a good thing. We cannot thereby break any existing apps. From enderby at apple.com Wed Oct 7 15:57:21 2009 From: enderby at apple.com (Kevin Enderby) Date: Wed, 07 Oct 2009 20:57:21 -0000 Subject: [llvm-commits] [llvm] r83488 - in /llvm/trunk: lib/MC/MCSectionMachO.cpp test/MC/MachO/sections.s Message-ID: <200910072057.n97KvLLb025255@zion.cs.uiuc.edu> Author: enderby Date: Wed Oct 7 15:57:20 2009 New Revision: 83488 URL: http://llvm.org/viewvc/llvm-project?rev=83488&view=rev Log: Fixed MCSectionMachO::ParseSectionSpecifier to allow an attribute of "none" so that a symbol stub section with no attributes can be parsed as in: .section __TEXT,__picsymbolstub4,symbol_stubs,none,16 Modified: llvm/trunk/lib/MC/MCSectionMachO.cpp llvm/trunk/test/MC/MachO/sections.s Modified: llvm/trunk/lib/MC/MCSectionMachO.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCSectionMachO.cpp?rev=83488&r1=83487&r2=83488&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCSectionMachO.cpp (original) +++ llvm/trunk/lib/MC/MCSectionMachO.cpp Wed Oct 7 15:57:20 2009 @@ -40,8 +40,8 @@ /// SectionAttrDescriptors - This is an array of descriptors for section /// attributes. Unlike the SectionTypeDescriptors, this is not directly indexed -/// by attribute, instead it is searched. The last entry has a zero AttrFlag -/// value. +/// by attribute, instead it is searched. The last entry has an AttrFlagEnd +/// AttrFlag value. static const struct { unsigned AttrFlag; const char *AssemblerName, *EnumName; @@ -59,7 +59,9 @@ ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC) ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) #undef ENTRY - { 0, "none", 0 } + { 0, "none", 0 }, // used if section has no attributes but has a stub size +#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set + { AttrFlagEnd, 0, 0 } }; @@ -228,7 +230,7 @@ // Look up the attribute. for (unsigned i = 0; ; ++i) { - if (SectionAttrDescriptors[i].AttrFlag == 0) + if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd) return "mach-o section specifier has invalid attribute"; if (SectionAttrDescriptors[i].AssemblerName && Modified: llvm/trunk/test/MC/MachO/sections.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/sections.s?rev=83488&r1=83487&r2=83488&view=diff ============================================================================== --- llvm/trunk/test/MC/MachO/sections.s (original) +++ llvm/trunk/test/MC/MachO/sections.s Wed Oct 7 15:57:20 2009 @@ -44,6 +44,7 @@ // .objc_meth_var_names .objc_selector_strs + .section __TEXT,__picsymbolstub4,symbol_stubs,none,16 .subsections_via_symbols @@ -51,20 +52,20 @@ // CHECK: ('cpusubtype', 3) // CHECK: ('filetype', 1) // CHECK: ('num_load_commands', 1) -// CHECK: ('load_commands_size', 2436) +// CHECK: ('load_commands_size', 2504) // CHECK: ('flag', 8192) // CHECK: ('load_commands', [ // CHECK: # Load Command 0 // CHECK: (('command', 1) -// CHECK: ('size', 2436) +// CHECK: ('size', 2504) // CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('vm_addr', 0) // CHECK: ('vm_size', 0) -// CHECK: ('file_offset', 2464) +// CHECK: ('file_offset', 2532) // CHECK: ('file_size', 0) // CHECK: ('maxprot', 7) // CHECK: ('initprot', 7) -// CHECK: ('num_sections', 35) +// CHECK: ('num_sections', 36) // CHECK: ('flags', 0) // CHECK: ('sections', [ // CHECK: # Section 0 @@ -72,7 +73,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -85,7 +86,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -98,7 +99,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -111,7 +112,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -124,7 +125,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 2) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -137,7 +138,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 3) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -150,7 +151,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 4) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -163,7 +164,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -176,7 +177,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -189,7 +190,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -202,7 +203,7 @@ // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -215,7 +216,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -228,7 +229,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -241,7 +242,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 2) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -254,7 +255,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 2) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -267,7 +268,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -280,7 +281,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 2) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -293,7 +294,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 2) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -306,7 +307,7 @@ // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -319,7 +320,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -332,7 +333,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -345,7 +346,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -358,7 +359,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -371,7 +372,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -384,7 +385,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -397,7 +398,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -410,7 +411,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -423,7 +424,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 2) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -436,7 +437,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 2) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -449,7 +450,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -462,7 +463,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -475,7 +476,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -488,7 +489,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -501,7 +502,7 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -514,13 +515,25 @@ // CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) // CHECK: ('size', 0) -// CHECK: ('offset', 2464) +// CHECK: ('offset', 2532) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) // CHECK: ('flags', 0x2) // CHECK: ('reserved1', 0) // CHECK: ('reserved2', 0) +// CHECK: # Section 35 +// CHECK: (('section_name', '__picsymbolstub4') +// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('address', 0) +// CHECK: ('size', 0) +// CHECK: ('offset', 2532) +// CHECK: ('alignment', 0) +// CHECK: ('reloc_offset', 0) +// CHECK: ('num_reloc', 0) +// CHECK: ('flags', 0x8) +// CHECK: ('reserved1', 0) +// CHECK: ('reserved2', 16) // CHECK: ), // CHECK: ]) // CHECK: ), From echristo at apple.com Wed Oct 7 16:04:31 2009 From: echristo at apple.com (Eric Christopher) Date: Wed, 7 Oct 2009 14:04:31 -0700 Subject: [llvm-commits] [PATCH] Additions to C-bindings In-Reply-To: <400d33ea0910070606t4d3fbf61nd07c20263cb80e4c@mail.gmail.com> References: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> <400d33ea0910070452j1fd69311n8caa49e79d3c7967@mail.gmail.com> <400d33ea0910070606t4d3fbf61nd07c20263cb80e4c@mail.gmail.com> Message-ID: <6B93A3F9-D941-4C1C-921A-D828E972F1A0@apple.com> >>> >> >> I wanted to call them from generated code and from my language. >> > Not only do I want to end up with a self-hosting compiler, but the > design of my compiler requires that much of the code generation code > is actually generated and JITted when the compiler is running. This > code generation code need to call a C interface to do its work. This seems... odd. I'm not quite sure what you're doing, but language bindings for your language would seem to be a better solution. If I'm missing something feel free to correct me. -eric From echristo at apple.com Wed Oct 7 16:14:25 2009 From: echristo at apple.com (Eric Christopher) Date: Wed, 07 Oct 2009 21:14:25 -0000 Subject: [llvm-commits] [llvm] r83489 - /llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Message-ID: <200910072114.n97LEPkP027433@zion.cs.uiuc.edu> Author: echristo Date: Wed Oct 7 16:14:25 2009 New Revision: 83489 URL: http://llvm.org/viewvc/llvm-project?rev=83489&view=rev Log: 80-column and whitespace fixes. Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=83489&r1=83488&r2=83489&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Wed Oct 7 16:14:25 2009 @@ -57,9 +57,9 @@ /// performed. If it returns CI, then it transformed the call and CI is to be /// deleted. If it returns something else, replace CI with the new value and /// delete CI. - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; - + Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; @@ -75,12 +75,12 @@ /// specified pointer. Ptr is required to be some pointer type, and the /// return value has 'intptr_t' type. Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); - + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, + Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B); - + /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); @@ -97,27 +97,27 @@ /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, const AttrListPtr &Attrs); - + /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. void EmitPutChar(Value *Char, IRBuilder<> &B); - + /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. void EmitPutS(Value *Str, IRBuilder<> &B); - + /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an i32, and File is a pointer to FILE. void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B); - + /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B); - + /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B); - + }; } // End anonymous namespace. @@ -138,7 +138,7 @@ Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast(StrLen->stripPointerCasts())) @@ -169,9 +169,10 @@ AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), - Type::getInt32Ty(*Context), TD->getIntPtrType(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt32Ty(*Context), + TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -256,7 +257,9 @@ Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), Type::getInt32Ty(*Context), NULL); CallInst *CI = B.CreateCall(PutChar, - B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari"), + B.CreateIntCast(Char, + Type::getInt32Ty(*Context), + "chari"), "putchar"); if (const Function *F = dyn_cast(PutChar->stripPointerCasts())) @@ -290,10 +293,14 @@ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Constant *F; if (isa(File->getType())) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), File->getType(), NULL); + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), + Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), File->getType(), + NULL); else - F = M->getOrInsertFunction("fputc", Type::getInt32Ty(*Context), Type::getInt32Ty(*Context), + F = M->getOrInsertFunction("fputc", + Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), File->getType(), NULL); Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari"); CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); @@ -312,7 +319,8 @@ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Constant *F; if (isa(File->getType())) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::getInt32Ty(*Context), + F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + Type::getInt32Ty(*Context), Type::getInt8PtrTy(*Context), File->getType(), NULL); else @@ -339,12 +347,14 @@ F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), TD->getIntPtrType(*Context), Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); else F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, ConstantInt::get(TD->getIntPtrType(*Context), 1), File); @@ -363,30 +373,30 @@ // Look through noop bitcast instructions. if (BitCastInst *BCI = dyn_cast(V)) return GetStringLengthH(BCI->getOperand(0), PHIs); - + // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. if (PHINode *PN = dyn_cast(V)) { if (!PHIs.insert(PN)) return ~0ULL; // already in the set. - + // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); if (Len == 0) return 0; // Unknown length -> unknown. - + if (Len == ~0ULL) continue; - + if (Len != LenSoFar && LenSoFar != ~0ULL) return 0; // Disagree -> unknown. LenSoFar = Len; } - + // Success, all agree. return LenSoFar; } - + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) if (SelectInst *SI = dyn_cast(V)) { uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); @@ -398,7 +408,7 @@ if (Len1 != Len2) return 0; return Len1; } - + // If the value is not a GEP instruction nor a constant expression with a // GEP instruction, then return unknown. User *GEP = 0; @@ -411,11 +421,11 @@ } else { return 0; } - + // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return 0; - + // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. if (ConstantInt *Idx = dyn_cast(GEP->getOperand(1))) { @@ -423,7 +433,7 @@ return 0; } else return 0; - + // If the second index isn't a ConstantInt, then this is a variable index // into the array. If this occurs, we can't say anything meaningful about // the string. @@ -432,7 +442,7 @@ StartIdx = CI->getZExtValue(); else return 0; - + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. @@ -441,21 +451,21 @@ GV->mayBeOverridden()) return 0; Constant *GlobalInit = GV->getInitializer(); - + // Handle the ConstantAggregateZero case, which is a degenerate case. The // initializer is constant zero so the length of the string must be zero. if (isa(GlobalInit)) return 1; // Len = 0 offset by 1. - + // Must be a Constant Array ConstantArray *Array = dyn_cast(GlobalInit); if (!Array || Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) return false; - + // Get the number of elements in the array uint64_t NumElts = Array->getType()->getNumElements(); - + // Traverse the constant array from StartIdx (derived above) which is // the place the GEP refers to in the array. for (unsigned i = StartIdx; i != NumElts; ++i) { @@ -466,7 +476,7 @@ if (CI->isZero()) return i-StartIdx+1; // We found end of string, success! } - + return 0; // The array isn't null terminated, conservatively return 'unknown'. } @@ -474,7 +484,7 @@ /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLength(Value *V) { if (!isa(V->getType())) return 0; - + SmallPtrSet PHIs; uint64_t Len = GetStringLengthH(V, PHIs); // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return @@ -483,7 +493,7 @@ } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. +/// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { @@ -514,16 +524,16 @@ FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) return 0; - + // Extract some information from the instruction Value *Dst = CI->getOperand(1); Value *Src = CI->getOperand(2); - + // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) return 0; --Len; // Unbias length. - + // Handle the simple, do-nothing case: strcat(x, "") -> x if (Len == 0) return Dst; @@ -539,12 +549,12 @@ // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. Value *DstLen = EmitStrLen(Dst, B); - + // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of // the string .. we're concatenating). Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); - + // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(CpyDst, Src, @@ -611,9 +621,9 @@ FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType()) return 0; - + Value *SrcStr = CI->getOperand(1); - + // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast(CI->getOperand(2)); @@ -625,7 +635,7 @@ if (Len == 0 || FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32. return 0; - + return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), B); } @@ -635,11 +645,11 @@ std::string Str; if (!GetConstantStringInfo(SrcStr, Str)) return 0; - + // strchr can find the nul character. Str += '\0'; char CharValue = CharC->getSExtValue(); - + // Compute the offset. uint64_t i = 0; while (1) { @@ -650,7 +660,7 @@ break; ++i; } - + // strchr(s+n,c) -> gep(s+n+i,c) Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i); return B.CreateGEP(SrcStr, Idx, "strchr"); @@ -664,28 +674,29 @@ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != Type::getInt32Ty(*Context) || + if (FT->getNumParams() != 2 || + FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); if (Str1P == Str2P) // strcmp(x,x) -> 0 return ConstantInt::get(CI->getType(), 0); - + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - + if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - + // strcmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) - return ConstantInt::get(CI->getType(), + return ConstantInt::get(CI->getType(), strcmp(Str1.c_str(),Str2.c_str())); // strcmp(P, "x") -> memcmp(P, "x", 2) @@ -711,36 +722,37 @@ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != Type::getInt32Ty(*Context) || + if (FT->getNumParams() != 3 || + FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getParamType(2))) return 0; - + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); - + // Get the length argument if it is constant. uint64_t Length; if (ConstantInt *LengthArg = dyn_cast(CI->getOperand(3))) Length = LengthArg->getZExtValue(); else return 0; - + if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); - + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - + // strncmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) return ConstantInt::get(CI->getType(), @@ -761,18 +773,18 @@ FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - + Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); if (Dst == Src) // strcpy(x,x) -> x return Src; - + // These optimizations require TargetData. if (!TD) return 0; // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) return 0; - + // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(Dst, Src, @@ -804,7 +816,8 @@ if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) - EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, B); + EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, + B); return Dst; } @@ -840,7 +853,7 @@ FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getReturnType())) return 0; - + Value *Src = CI->getOperand(1); // Constant folding: strlen("xyz") -> 3 @@ -992,7 +1005,8 @@ return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), false); + Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + false); EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); return CI->getOperand(1); } @@ -1014,7 +1028,7 @@ FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPoint()) return 0; - + Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); if (ConstantFP *Op1C = dyn_cast(Op1)) { if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 @@ -1022,13 +1036,13 @@ if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); } - + ConstantFP *Op2C = dyn_cast(Op2); if (Op2C == 0) return 0; - + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); - + if (Op2C->isExactlyValue(0.5)) { // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero @@ -1045,7 +1059,7 @@ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp"); return Sel; } - + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x return Op1; if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x @@ -1068,17 +1082,19 @@ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPoint()) return 0; - + Value *Op = CI->getOperand(1); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::getInt32Ty(*Context), "tmp"); + LdExpArg = B.CreateSExt(OpC->getOperand(0), + Type::getInt32Ty(*Context), "tmp"); } else if (UIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::getInt32Ty(*Context), "tmp"); + LdExpArg = B.CreateZExt(OpC->getOperand(0), + Type::getInt32Ty(*Context), "tmp"); } if (LdExpArg) { @@ -1096,7 +1112,8 @@ Module *M = Caller->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), Type::getInt32Ty(*Context),NULL); + Op->getType(), + Type::getInt32Ty(*Context),NULL); CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); if (const Function *F = dyn_cast(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1142,12 +1159,13 @@ const FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != Type::getInt32Ty(*Context) || + if (FT->getNumParams() != 1 || + FT->getReturnType() != Type::getInt32Ty(*Context) || !isa(FT->getParamType(0))) return 0; - + Value *Op = CI->getOperand(1); - + // Constant fold. if (ConstantInt *CI = dyn_cast(Op)) { if (CI->getValue() == 0) // ffs(0) -> 0. @@ -1155,7 +1173,7 @@ return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1 CI->getValue().countTrailingZeros()+1); } - + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 const Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), @@ -1163,9 +1181,10 @@ Value *V = B.CreateCall(F, Op, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp"); - + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); - return B.CreateSelect(Cond, V, ConstantInt::get(Type::getInt32Ty(*Context), 0)); + return B.CreateSelect(Cond, V, + ConstantInt::get(Type::getInt32Ty(*Context), 0)); } }; @@ -1179,12 +1198,12 @@ if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isdigit(c) -> (c-'0') getOperand(1); - Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), + Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), "isdigittmp"); - Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), + Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), "isdigit"); return B.CreateZExt(Op, CI->getType()); } @@ -1200,7 +1219,7 @@ if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isascii(c) -> c getOperand(1); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), @@ -1208,7 +1227,7 @@ return B.CreateZExt(Op, CI->getType()); } }; - + //===---------------------------------------===// // 'abs', 'labs', 'llabs' Optimizations @@ -1219,17 +1238,17 @@ if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || FT->getParamType(0) != FT->getReturnType()) return 0; - + // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getOperand(1); - Value *Pos = B.CreateICmpSGT(Op, + Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); Value *Neg = B.CreateNeg(Op, "neg"); return B.CreateSelect(Pos, Op, Neg); } }; - + //===---------------------------------------===// // 'toascii' Optimizations @@ -1241,7 +1260,7 @@ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isascii(c) -> c & 0x7f return B.CreateAnd(CI->getOperand(1), ConstantInt::get(CI->getType(),0x7F)); @@ -1263,7 +1282,7 @@ !(isa(FT->getReturnType()) || FT->getReturnType()->isVoidTy())) return 0; - + // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) @@ -1271,16 +1290,16 @@ // Empty format string -> noop. if (FormatStr.empty()) // Tolerate printf's declared void. - return CI->use_empty() ? (Value*)CI : + return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0); - + // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B); - return CI->use_empty() ? (Value*)CI : + return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1); } - + // printf("foo\n") --> puts("foo") if (FormatStr[FormatStr.size()-1] == '\n' && FormatStr.find('%') == std::string::npos) { // no format characters. @@ -1291,19 +1310,19 @@ C = new GlobalVariable(*Callee->getParent(), C->getType(), true, GlobalVariable::InternalLinkage, C, "str"); EmitPutS(C, B); - return CI->use_empty() ? (Value*)CI : + return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), FormatStr.size()+1); } - + // Optimize specific format strings. // printf("%c", chr) --> putchar(*(i8*)dst) if (FormatStr == "%c" && CI->getNumOperands() > 2 && isa(CI->getOperand(2)->getType())) { EmitPutChar(CI->getOperand(2), B); - return CI->use_empty() ? (Value*)CI : + return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1); } - + // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && isa(CI->getOperand(2)->getType()) && @@ -1331,7 +1350,7 @@ std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) return 0; - + // If we just have a format string (nothing else crazy) transform it. if (CI->getNumOperands() == 3) { // Make sure there's no % in the constant array. We could try to handle @@ -1348,25 +1367,27 @@ ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); return ConstantInt::get(CI->getType(), FormatStr.size()); } - + // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) return 0; - + // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 if (!isa(CI->getOperand(3)->getType())) return 0; - Value *V = B.CreateTrunc(CI->getOperand(3), Type::getInt8Ty(*Context), "char"); + Value *V = B.CreateTrunc(CI->getOperand(3), + Type::getInt8Ty(*Context), "char"); Value *Ptr = CastToCStr(CI->getOperand(1), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), "nul"); + Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), + "nul"); B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr); - + return ConstantInt::get(CI->getType(), 1); } - + if (FormatStr[1] == 's') { // These optimizations require TargetData. if (!TD) return 0; @@ -1379,7 +1400,7 @@ ConstantInt::get(Len->getType(), 1), "leninc"); EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B); - + // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); } @@ -1400,17 +1421,17 @@ !isa(FT->getParamType(3)) || !isa(FT->getReturnType())) return 0; - + // Get the element size and count. ConstantInt *SizeC = dyn_cast(CI->getOperand(2)); ConstantInt *CountC = dyn_cast(CI->getOperand(3)); if (!SizeC || !CountC) return 0; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); - + // If this is writing zero records, remove the call (it's a noop). if (Bytes == 0) return ConstantInt::get(CI->getType(), 0); - + // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); @@ -1436,7 +1457,7 @@ !isa(FT->getParamType(1)) || !CI->use_empty()) return 0; - + // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getOperand(1)); if (!Len) return 0; @@ -1458,7 +1479,7 @@ !isa(FT->getParamType(1)) || !isa(FT->getReturnType())) return 0; - + // All the optimizations depend on the format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) @@ -1478,12 +1499,12 @@ CI->getOperand(1), B); return ConstantInt::get(CI->getType(), FormatStr.size()); } - + // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) return 0; - + // Decode the second character of the format string. if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> *(i8*)dst = chr @@ -1491,7 +1512,7 @@ EmitFPutC(CI->getOperand(3), CI->getOperand(1), B); return ConstantInt::get(CI->getType(), 1); } - + if (FormatStr[1] == 's') { // fprintf(F, "%s", str) -> fputs(str, F) if (!isa(CI->getOperand(3)->getType()) || !CI->use_empty()) @@ -1554,7 +1575,7 @@ // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { - return new SimplifyLibCalls(); + return new SimplifyLibCalls(); } /// Optimizations - Populate the Optimizations map with all the optimizations @@ -1580,7 +1601,7 @@ Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; Optimizations["memset"] = &MemSet; - + // Math Library Optimizations Optimizations["powf"] = &Pow; Optimizations["pow"] = &Pow; @@ -1598,7 +1619,7 @@ Optimizations["llvm.exp2.f80"] = &Exp2; Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - + #ifdef HAVE_FLOORF Optimizations["floor"] = &UnaryDoubleFP; #endif @@ -1614,7 +1635,7 @@ #ifdef HAVE_NEARBYINTF Optimizations["nearbyint"] = &UnaryDoubleFP; #endif - + // Integer Optimizations Optimizations["ffs"] = &FFS; Optimizations["ffsl"] = &FFS; @@ -1625,7 +1646,7 @@ Optimizations["isdigit"] = &IsDigit; Optimizations["isascii"] = &IsAscii; Optimizations["toascii"] = &ToAscii; - + // Formatting and IO Optimizations Optimizations["sprintf"] = &SPrintF; Optimizations["printf"] = &PrintF; @@ -1640,9 +1661,9 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { if (Optimizations.empty()) InitOptimizations(); - + const TargetData *TD = getAnalysisIfAvailable(); - + IRBuilder<> Builder(F.getContext()); bool Changed = false; @@ -1651,35 +1672,35 @@ // Ignore non-calls. CallInst *CI = dyn_cast(I++); if (!CI) continue; - + // Ignore indirect calls and calls to non-external functions. Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration() || !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage())) continue; - + // Ignore unknown calls. LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); if (!LCO) continue; - + // Set the builder to the instruction after the call. Builder.SetInsertPoint(BB, I); - + // Try to optimize this call. Value *Result = LCO->OptimizeCall(CI, TD, Builder); if (Result == 0) continue; DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI; errs() << " into: " << *Result << "\n"); - + // Something changed! Changed = true; ++NumSimplified; - + // Inspect the instruction after the call (which was potentially just // added) next. I = CI; ++I; - + if (CI != Result && !CI->use_empty()) { CI->replaceAllUsesWith(Result); if (!Result->hasName()) From kennethuil at gmail.com Wed Oct 7 16:42:00 2009 From: kennethuil at gmail.com (Kenneth Uildriks) Date: Wed, 7 Oct 2009 16:42:00 -0500 Subject: [llvm-commits] [PATCH] Additions to C-bindings In-Reply-To: <6B93A3F9-D941-4C1C-921A-D828E972F1A0@apple.com> References: <400d33ea0910061935p5eba5545k9220d95d1419f0cf@mail.gmail.com> <400d33ea0910070452j1fd69311n8caa49e79d3c7967@mail.gmail.com> <400d33ea0910070606t4d3fbf61nd07c20263cb80e4c@mail.gmail.com> <6B93A3F9-D941-4C1C-921A-D828E972F1A0@apple.com> Message-ID: <400d33ea0910071442x6a399d96h8e2072e908f4e17c@mail.gmail.com> On Wed, Oct 7, 2009 at 4:04 PM, Eric Christopher wrote: >>>> >>> >>> I wanted to call them from generated code and from my language. >>> >> Not only do I want to end up with a self-hosting compiler, but the >> design of my compiler requires that much of the code generation code >> is actually generated and JITted when the compiler is running. ?This >> code generation code need to call a C interface to do its work. > > This seems... odd. I'm not quite sure what you're doing, but language > bindings for your language would seem to be a better solution. By "language bindings" are you suggesting that I create a C++ module exposing my own C bindings and call that module from my language? Because calling the LLVM C++ API directly is problematic. Since the C bindings exist for that same purpose, I figured adding to them would benefit others as well. From bob.wilson at apple.com Wed Oct 7 16:53:05 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 21:53:05 -0000 Subject: [llvm-commits] [llvm] r83490 - in /llvm/trunk/lib/Target/ARM: ARMInstrFormats.td ARMInstrNEON.td Message-ID: <200910072153.n97Lr5CI032474@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 16:53:04 2009 New Revision: 83490 URL: http://llvm.org/viewvc/llvm-project?rev=83490&view=rev Log: Add some instruction encoding bits for NEON load/store instructions. Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=83490&r1=83489&r2=83490&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Wed Oct 7 16:53:04 2009 @@ -1215,7 +1215,8 @@ : NeonI { } -class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, + dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list pattern> : NeonI { let Inst{31-24} = 0b11110100; Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83490&r1=83489&r2=83490&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 16:53:04 2009 @@ -155,265 +155,289 @@ } // VLD1 : Vector Load (multiple single elements) -class VLD1D - : NLdSt<(outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, +class VLD1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "", [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -class VLD1Q - : NLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, +class VLD1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "", [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vld1>; -def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vld1>; -def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vld1>; -def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vld1>; -def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vld1>; - -def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vld1>; -def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vld1>; -def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1>; -def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1>; -def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1>; +def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>; +def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>; +def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>; +def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>; +def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>; + +def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>; +def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>; +def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>; +def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>; +def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D - : NLdSt<(outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, +class VLD2D op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD2, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; -class VLD2Q - : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), +class VLD2Q op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0011,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD2, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), "", []>; -def VLD2d8 : VLD2D<"vld2.8">; -def VLD2d16 : VLD2D<"vld2.16">; -def VLD2d32 : VLD2D<"vld2.32">; - -def VLD2q8 : VLD2Q<"vld2.8">; -def VLD2q16 : VLD2Q<"vld2.16">; -def VLD2q32 : VLD2Q<"vld2.32">; +def VLD2d8 : VLD2D<0b0000, "vld2.8">; +def VLD2d16 : VLD2D<0b0100, "vld2.16">; +def VLD2d32 : VLD2D<0b1000, "vld2.32">; + +def VLD2q8 : VLD2Q<0b0000, "vld2.8">; +def VLD2q16 : VLD2Q<0b0100, "vld2.16">; +def VLD2q32 : VLD2Q<0b1000, "vld2.32">; // VLD3 : Vector Load (multiple 3-element structures) -class VLD3D - : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), - IIC_VLD3, +class VLD3D op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD3, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; -class VLD3WB - : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), +class VLD3WB op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr), IIC_VLD3, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "$addr.addr = $wb", []>; -def VLD3d8 : VLD3D<"vld3.8">; -def VLD3d16 : VLD3D<"vld3.16">; -def VLD3d32 : VLD3D<"vld3.32">; +def VLD3d8 : VLD3D<0b0000, "vld3.8">; +def VLD3d16 : VLD3D<0b0100, "vld3.16">; +def VLD3d32 : VLD3D<0b1000, "vld3.32">; // vld3 to double-spaced even registers. -def VLD3q8a : VLD3WB<"vld3.8">; -def VLD3q16a : VLD3WB<"vld3.16">; -def VLD3q32a : VLD3WB<"vld3.32">; +def VLD3q8a : VLD3WB<0b0000, "vld3.8">; +def VLD3q16a : VLD3WB<0b0100, "vld3.16">; +def VLD3q32a : VLD3WB<0b1000, "vld3.32">; // vld3 to double-spaced odd registers. -def VLD3q8b : VLD3WB<"vld3.8">; -def VLD3q16b : VLD3WB<"vld3.16">; -def VLD3q32b : VLD3WB<"vld3.32">; +def VLD3q8b : VLD3WB<0b0000, "vld3.8">; +def VLD3q16b : VLD3WB<0b0100, "vld3.16">; +def VLD3q32b : VLD3WB<0b1000, "vld3.32">; // VLD4 : Vector Load (multiple 4-element structures) -class VLD4D - : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), +class VLD4D op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0000,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD4, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), "", []>; -class VLD4WB - : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), +class VLD4WB op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0001,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr), IIC_VLD4, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), "$addr.addr = $wb", []>; -def VLD4d8 : VLD4D<"vld4.8">; -def VLD4d16 : VLD4D<"vld4.16">; -def VLD4d32 : VLD4D<"vld4.32">; +def VLD4d8 : VLD4D<0b0000, "vld4.8">; +def VLD4d16 : VLD4D<0b0100, "vld4.16">; +def VLD4d32 : VLD4D<0b1000, "vld4.32">; // vld4 to double-spaced even registers. -def VLD4q8a : VLD4WB<"vld4.8">; -def VLD4q16a : VLD4WB<"vld4.16">; -def VLD4q32a : VLD4WB<"vld4.32">; +def VLD4q8a : VLD4WB<0b0000, "vld4.8">; +def VLD4q16a : VLD4WB<0b0100, "vld4.16">; +def VLD4q32a : VLD4WB<0b1000, "vld4.32">; // vld4 to double-spaced odd registers. -def VLD4q8b : VLD4WB<"vld4.8">; -def VLD4q16b : VLD4WB<"vld4.16">; -def VLD4q32b : VLD4WB<"vld4.32">; +def VLD4q8b : VLD4WB<0b0000, "vld4.8">; +def VLD4q16b : VLD4WB<0b0100, "vld4.16">; +def VLD4q32b : VLD4WB<0b1000, "vld4.32">; + +// VLD1LN : Vector Load (single element to one lane) +// FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LND - : NLdSt<(outs DPR:$dst1, DPR:$dst2), +class VLD2LND op11_8, string OpcodeStr> + : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), "$src1 = $dst1, $src2 = $dst2", []>; -def VLD2LNd8 : VLD2LND<"vld2.8">; -def VLD2LNd16 : VLD2LND<"vld2.16">; -def VLD2LNd32 : VLD2LND<"vld2.32">; +def VLD2LNd8 : VLD2LND<0b0001, "vld2.8">; +def VLD2LNd16 : VLD2LND<0b0101, "vld2.16">; +def VLD2LNd32 : VLD2LND<0b1001, "vld2.32">; // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LND - : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), +class VLD3LND op11_8, string OpcodeStr> + : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3, !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; -def VLD3LNd8 : VLD3LND<"vld3.8">; -def VLD3LNd16 : VLD3LND<"vld3.16">; -def VLD3LNd32 : VLD3LND<"vld3.32">; +def VLD3LNd8 : VLD3LND<0b0010, "vld3.8">; +def VLD3LNd16 : VLD3LND<0b0110, "vld3.16">; +def VLD3LNd32 : VLD3LND<0b1010, "vld3.32">; // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LND - : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), +class VLD4LND op11_8, string OpcodeStr> + : NLdSt<1,0b10,op11_8,0b0000, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4, !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; -def VLD4LNd8 : VLD4LND<"vld4.8">; -def VLD4LNd16 : VLD4LND<"vld4.16">; -def VLD4LNd32 : VLD4LND<"vld4.32">; +def VLD4LNd8 : VLD4LND<0b0011, "vld4.8">; +def VLD4LNd16 : VLD4LND<0b0111, "vld4.16">; +def VLD4LNd32 : VLD4LND<0b1011, "vld4.32">; + +// VLD1DUP : Vector Load (single element to all lanes) +// VLD2DUP : Vector Load (single 2-element structure to all lanes) +// VLD3DUP : Vector Load (single 3-element structure to all lanes) +// VLD4DUP : Vector Load (single 4-element structure to all lanes) +// FIXME: Not yet implemented. } // mayLoad = 1, hasExtraDefRegAllocReq = 1 // VST1 : Vector Store (multiple single elements) -class VST1D - : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, +class VST1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "", [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q - : NLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, +class VST1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "", [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vst1>; -def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vst1>; -def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vst1>; -def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vst1>; -def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vst1>; - -def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vst1>; -def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1>; -def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1>; -def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1>; -def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1>; +def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>; +def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>; +def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>; +def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>; +def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>; + +def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>; +def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>; +def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>; +def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>; +def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>; } // hasExtraSrcRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { // VST2 : Vector Store (multiple 2-element structures) -class VST2D - : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, +class VST2D op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b1000,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; -class VST2Q - : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - DPR:$src4), IIC_VST, +class VST2Q op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0011,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), "", []>; -def VST2d8 : VST2D<"vst2.8">; -def VST2d16 : VST2D<"vst2.16">; -def VST2d32 : VST2D<"vst2.32">; - -def VST2q8 : VST2Q<"vst2.8">; -def VST2q16 : VST2Q<"vst2.16">; -def VST2q32 : VST2Q<"vst2.32">; +def VST2d8 : VST2D<0b0000, "vst2.8">; +def VST2d16 : VST2D<0b0100, "vst2.16">; +def VST2d32 : VST2D<0b1000, "vst2.32">; + +def VST2q8 : VST2Q<0b0000, "vst2.8">; +def VST2q16 : VST2Q<0b0100, "vst2.16">; +def VST2q32 : VST2Q<0b1000, "vst2.32">; // VST3 : Vector Store (multiple 3-element structures) -class VST3D - : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, +class VST3D op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0100,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; -class VST3WB - : NLdSt<(outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, - DPR:$src3), IIC_VST, +class VST3WB op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "$addr.addr = $wb", []>; -def VST3d8 : VST3D<"vst3.8">; -def VST3d16 : VST3D<"vst3.16">; -def VST3d32 : VST3D<"vst3.32">; +def VST3d8 : VST3D<0b0000, "vst3.8">; +def VST3d16 : VST3D<0b0100, "vst3.16">; +def VST3d32 : VST3D<0b1000, "vst3.32">; // vst3 to double-spaced even registers. -def VST3q8a : VST3WB<"vst3.8">; -def VST3q16a : VST3WB<"vst3.16">; -def VST3q32a : VST3WB<"vst3.32">; +def VST3q8a : VST3WB<0b0000, "vst3.8">; +def VST3q16a : VST3WB<0b0100, "vst3.16">; +def VST3q32a : VST3WB<0b1000, "vst3.32">; // vst3 to double-spaced odd registers. -def VST3q8b : VST3WB<"vst3.8">; -def VST3q16b : VST3WB<"vst3.16">; -def VST3q32b : VST3WB<"vst3.32">; +def VST3q8b : VST3WB<0b0000, "vst3.8">; +def VST3q16b : VST3WB<0b0100, "vst3.16">; +def VST3q32b : VST3WB<0b1000, "vst3.32">; // VST4 : Vector Store (multiple 4-element structures) -class VST4D - : NLdSt<(outs), (ins addrmode6:$addr, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, +class VST4D op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0000,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), "", []>; -class VST4WB - : NLdSt<(outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, - DPR:$src3, DPR:$src4), IIC_VST, +class VST4WB op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), "$addr.addr = $wb", []>; -def VST4d8 : VST4D<"vst4.8">; -def VST4d16 : VST4D<"vst4.16">; -def VST4d32 : VST4D<"vst4.32">; +def VST4d8 : VST4D<0b0000, "vst4.8">; +def VST4d16 : VST4D<0b0100, "vst4.16">; +def VST4d32 : VST4D<0b1000, "vst4.32">; // vst4 to double-spaced even registers. -def VST4q8a : VST4WB<"vst4.8">; -def VST4q16a : VST4WB<"vst4.16">; -def VST4q32a : VST4WB<"vst4.32">; +def VST4q8a : VST4WB<0b0000, "vst4.8">; +def VST4q16a : VST4WB<0b0100, "vst4.16">; +def VST4q32a : VST4WB<0b1000, "vst4.32">; // vst4 to double-spaced odd registers. -def VST4q8b : VST4WB<"vst4.8">; -def VST4q16b : VST4WB<"vst4.16">; -def VST4q32b : VST4WB<"vst4.32">; +def VST4q8b : VST4WB<0b0000, "vst4.8">; +def VST4q16b : VST4WB<0b0100, "vst4.16">; +def VST4q32b : VST4WB<0b1000, "vst4.32">; + +// VST1LN : Vector Store (single element from one lane) +// FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LND - : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), +class VST2LND op11_8, string OpcodeStr> + : NLdSt<1,0b00,op11_8,0b0000, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), "", []>; -def VST2LNd8 : VST2LND<"vst2.8">; -def VST2LNd16 : VST2LND<"vst2.16">; -def VST2LNd32 : VST2LND<"vst2.32">; +def VST2LNd8 : VST2LND<0b0000, "vst2.8">; +def VST2LNd16 : VST2LND<0b0100, "vst2.16">; +def VST2LNd32 : VST2LND<0b1000, "vst2.32">; // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LND - : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, +class VST3LND op11_8, string OpcodeStr> + : NLdSt<1,0b00,op11_8,0b0000, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>; -def VST3LNd8 : VST3LND<"vst3.8">; -def VST3LNd16 : VST3LND<"vst3.16">; -def VST3LNd32 : VST3LND<"vst3.32">; +def VST3LNd8 : VST3LND<0b0010, "vst3.8">; +def VST3LNd16 : VST3LND<0b0110, "vst3.16">; +def VST3LNd32 : VST3LND<0b1010, "vst3.32">; // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LND - : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - DPR:$src4, nohash_imm:$lane), IIC_VST, +class VST4LND op11_8, string OpcodeStr> + : NLdSt<1,0b00,op11_8,0b0000, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST, !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"), "", []>; -def VST4LNd8 : VST4LND<"vst4.8">; -def VST4LNd16 : VST4LND<"vst4.16">; -def VST4LNd32 : VST4LND<"vst4.32">; +def VST4LNd8 : VST4LND<0b0011, "vst4.8">; +def VST4LNd16 : VST4LND<0b0111, "vst4.16">; +def VST4LNd32 : VST4LND<0b1011, "vst4.32">; } // mayStore = 1, hasExtraSrcRegAllocReq = 1 From dpatel at apple.com Wed Oct 7 17:04:08 2009 From: dpatel at apple.com (Devang Patel) Date: Wed, 07 Oct 2009 22:04:08 -0000 Subject: [llvm-commits] [llvm] r83491 - in /llvm/trunk: include/llvm/Analysis/DebugInfo.h lib/Analysis/DebugInfo.cpp Message-ID: <200910072204.n97M48RX001398@zion.cs.uiuc.edu> Author: dpatel Date: Wed Oct 7 17:04:08 2009 New Revision: 83491 URL: http://llvm.org/viewvc/llvm-project?rev=83491&view=rev Log: Extract subprogram and compile unit information from the debug info attached to an instruction. Modified: llvm/trunk/include/llvm/Analysis/DebugInfo.h llvm/trunk/lib/Analysis/DebugInfo.cpp Modified: llvm/trunk/include/llvm/Analysis/DebugInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/DebugInfo.h?rev=83491&r1=83490&r2=83491&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/DebugInfo.h (original) +++ llvm/trunk/include/llvm/Analysis/DebugInfo.h Wed Oct 7 17:04:08 2009 @@ -679,7 +679,10 @@ /// processType - Process DIType. void processType(DIType DT); - /// processSubprogram - Enumberate DISubprogram. + /// processLexicalBlock - Process DILexicalBlock. + void processLexicalBlock(DILexicalBlock LB); + + /// processSubprogram - Process DISubprogram. void processSubprogram(DISubprogram SP); /// processStopPoint - Process DbgStopPointInst. Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DebugInfo.cpp?rev=83491&r1=83490&r2=83491&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/DebugInfo.cpp (original) +++ llvm/trunk/lib/Analysis/DebugInfo.cpp Wed Oct 7 17:04:08 2009 @@ -966,6 +966,12 @@ /// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(Module &M) { +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + MetadataContext &TheMetadata = M.getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + if (!MDDbgKind) + return; +#endif for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; @@ -980,6 +986,18 @@ processRegionEnd(DRE); else if (DbgDeclareInst *DDI = dyn_cast(BI)) processDeclare(DDI); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + else if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) { + DILocation Loc(L); + DIScope S(Loc.getScope().getNode()); + if (S.isCompileUnit()) + addCompileUnit(DICompileUnit(S.getNode())); + else if (S.isSubprogram()) + processSubprogram(DISubprogram(S.getNode())); + else if (S.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(S.getNode())); + } +#endif } NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); @@ -1021,6 +1039,17 @@ } } +/// processLexicalBlock +void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { + if (LB.isNull()) + return; + DIScope Context = LB.getContext(); + if (Context.isLexicalBlock()) + return processLexicalBlock(DILexicalBlock(Context.getNode())); + else + return processSubprogram(DISubprogram(Context.getNode())); +} + /// processSubprogram - Process DISubprogram. void DebugInfoFinder::processSubprogram(DISubprogram SP) { if (SP.isNull()) From dpatel at apple.com Wed Oct 7 17:06:30 2009 From: dpatel at apple.com (Devang Patel) Date: Wed, 07 Oct 2009 22:06:30 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83493 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200910072206.n97M6UTm001718@zion.cs.uiuc.edu> Author: dpatel Date: Wed Oct 7 17:06:29 2009 New Revision: 83493 URL: http://llvm.org/viewvc/llvm-project?rev=83493&view=rev Log: Attach debug location info to allocas created for temporaries. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83493&r1=83492&r2=83493&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Wed Oct 7 17:06:29 2009 @@ -301,6 +301,9 @@ // Create an alloca for the ResultDecl. Value *Tmp = TheTreeToLLVM->CreateTemporary(AI->getType()); Builder.CreateStore(AI, Tmp); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + Builder.SetDebugLocation(cast(Tmp)); +#endif SET_DECL_LLVM(ResultDecl, Tmp); if (TheDebugInfo) { @@ -599,6 +602,9 @@ Tmp->setName(std::string(Name)+"_addr"); SET_DECL_LLVM(Args, Tmp); if (TheDebugInfo) { +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + Builder.SetDebugLocation(cast(Tmp)); +#endif TheDebugInfo->EmitDeclare(Args, dwarf::DW_TAG_arg_variable, Name, TREE_TYPE(Args), Tmp, Builder.GetInsertBlock()); @@ -694,10 +700,6 @@ } } } - if (TheDebugInfo) { - TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); - TheDebugInfo->EmitFunctionEnd(Builder.GetInsertBlock(), true); - } if (RetVals.empty()) Builder.CreateRetVoid(); else if (!Fn->getReturnType()->isAggregateType()) { @@ -723,6 +725,11 @@ SI->setSuccessor(0, SI->getSuccessor(1)); } + if (TheDebugInfo) { + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); + TheDebugInfo->EmitFunctionEnd(Builder.GetInsertBlock(), true); + } + // Remove any cached LLVM values that are local to this function. Such values // may be deleted when the optimizers run, so would be dangerous to keep. eraseLocalLLVMValues(); @@ -1200,6 +1207,11 @@ /// CreateTempLoc - Like CreateTemporary, but returns a MemRef. MemRef TreeToLLVM::CreateTempLoc(const Type *Ty) { AllocaInst *AI = CreateTemporary(Ty); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (TheDebugInfo) + Builder.SetDebugLocation(AI); +#endif + // MemRefs do not allow alignment 0. if (!AI->getAlignment()) AI->setAlignment(TD.getPrefTypeAlignment(Ty)); @@ -1699,6 +1711,12 @@ // Create a temporary for the value to be switched on. IndirectGotoValue = CreateTemporary(TD.getIntPtrType(Context)); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (TheDebugInfo) { + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); + Builder.SetDebugLocation(cast(IndirectGotoValue)); + } +#endif // Create the block, emit a load, and emit the switch in the block. IndirectGotoBlock = BasicBlock::Create(Context, "indirectgoto"); @@ -2491,6 +2509,10 @@ // A value. Store to a temporary, and return the temporary's address. // Any future access to this argument will reuse the same address. Loc = TheTreeToLLVM->CreateTemporary(TheValue->getType()); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (TheDebugInfo) + Builder.SetDebugLocation(cast(Loc)); +#endif Builder.CreateStore(TheValue, Loc); } return Loc; @@ -2864,6 +2886,10 @@ // Create a new temporary and set the VAR_DECL to use it as the llvm location. Value *NewTmp = CreateTemporary(FirstVal->getType()); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (TheDebugInfo) + Builder.SetDebugLocation(cast(NewTmp)); +#endif SET_DECL_LLVM(Var, NewTmp); // Store the already existing initial value into the alloca. If the value @@ -6165,6 +6191,12 @@ // Emit it as a value, then store it to a temporary slot. Value *V2 = Emit(Arg2T, 0); Arg2 = CreateTemporary(V2->getType()); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (TheDebugInfo) { + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); + Builder.SetDebugLocation(cast(Arg2)); + } +#endif Builder.CreateStore(V2, Arg2); } else { // If the target has aggregate valists, then the second argument @@ -6890,6 +6922,12 @@ } else { // If the input is a scalar, emit to a temporary. Value *Dest = CreateTemporary(ConvertType(TREE_TYPE(Op))); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (TheDebugInfo) { + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); + Builder.SetDebugLocation(cast(Dest)); + } +#endif Builder.CreateStore(Emit(Op, 0), Dest); // The type is the type of the expression. Dest = BitCastToType(Dest, From grosbach at apple.com Wed Oct 7 17:26:14 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 07 Oct 2009 22:26:14 -0000 Subject: [llvm-commits] [llvm] r83494 - in /llvm/trunk/lib/Target/ARM: Thumb1RegisterInfo.cpp Thumb1RegisterInfo.h Message-ID: <200910072226.n97MQFo6004239@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 17:26:14 2009 New Revision: 83494 URL: http://llvm.org/viewvc/llvm-project?rev=83494&view=rev Log: Enable thumb1 register scavenging by default. Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83494&r1=83493&r2=83494&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 17:26:14 2009 @@ -37,11 +37,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -// FIXME: This cmd line option conditionalizes the new register scavenging -// implemenation in PEI. Remove the option when scavenging works well enough -// to be the default. -extern cl::opt FrameIndexVirtualScavenging; - Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { @@ -84,7 +79,13 @@ bool Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return FrameIndexVirtualScavenging; + return true; +} + +bool +Thumb1RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) + const { + return true; } bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { @@ -128,13 +129,7 @@ unsigned LdReg = DestReg; if (DestReg == ARM::SP) { assert(BaseReg == ARM::SP && "Unexpected!"); - if (FrameIndexVirtualScavenging) { - LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); - } else { - LdReg = ARM::R3; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - } + LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); } if (NumBytes <= 255 && NumBytes >= 0) @@ -159,10 +154,6 @@ else MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); AddDefaultPred(MIB); - - if (!FrameIndexVirtualScavenging && DestReg == ARM::SP) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) - .addReg(ARM::R12, RegState::Kill); } /// calcNumMI - Returns the number of instructions required to materialize @@ -635,7 +626,6 @@ else // tLDR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { - if (FrameIndexVirtualScavenging) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); *Value = Offset; @@ -658,52 +648,6 @@ MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); else // tSTR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); - } else { - // FIXME! This is horrific!!! We need register scavenging. - // Our temporary workaround has marked r3 unavailable. Of course, r3 is - // also a ABI register so it's possible that is is the register that is - // being storing here. If that's the case, we do the following: - // r12 = r2 - // Use r2 to materialize sp + offset - // str r3, r2 - // r2 = r12 - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned TmpReg = ARM::R3; - bool UseRR = false; - if (ValReg == ARM::R3) { - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R2, RegState::Kill); - TmpReg = ARM::R2; - } - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - if (Opcode == ARM::tSpill) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); - UseRR = true; - } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tSTR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tSTR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - - MachineBasicBlock::iterator NII = next(II); - if (ValReg == ARM::R3) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) - .addReg(ARM::R12, RegState::Kill); - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) - .addReg(ARM::R12, RegState::Kill); - } } else assert(false && "Unexpected opcode!"); Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83494&r1=83493&r2=83494&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 17:26:14 2009 @@ -41,6 +41,7 @@ getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const; From grosbach at apple.com Wed Oct 7 17:26:32 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 07 Oct 2009 22:26:32 -0000 Subject: [llvm-commits] [llvm] r83496 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/CodeGen/PrologEpilogInserter.h Message-ID: <200910072226.n97MQWs5004301@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 17:26:31 2009 New Revision: 83496 URL: http://llvm.org/viewvc/llvm-project?rev=83496&view=rev Log: Enable thumb1 register scavenging by default. Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83496&r1=83495&r2=83496&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 17:26:31 2009 @@ -561,6 +561,12 @@ return false; } + /// requiresFrameIndexScavenging - returns true if the target requires post + /// PEI scavenging of registers for materializing frame index constants. + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return false; + } + /// hasFP - Return true if the specified function should have a dedicated /// frame pointer register. For most targets this is true only if the function /// has variable sized allocas or if frame pointer elimination is disabled. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83496&r1=83495&r2=83496&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 17:26:31 2009 @@ -44,16 +44,6 @@ static RegisterPass X("prologepilog", "Prologue/Epilogue Insertion"); -// FIXME: For now, the frame index scavenging is off by default and only -// used by the Thumb1 target. When it's the default and replaces the current -// on-the-fly PEI scavenging for all targets, requiresRegisterScavenging() -// will replace this. -cl::opt -FrameIndexVirtualScavenging("enable-frame-index-scavenging", - cl::Hidden, - cl::desc("Enable frame index elimination with" - "virtual register scavenging")); - /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. /// @@ -66,6 +56,7 @@ const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Get MachineModuleInfo so that we can track the construction of the // frame. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83496&r1=83495&r2=83496&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 17:26:31 2009 @@ -95,6 +95,11 @@ // functions. bool ShrinkWrapThisFunction; + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the curren function. + bool FrameIndexVirtualScavenging; + // When using the scavenger post-pass to resolve frame reference // materialization registers, maintain a map of the registers to // the constant value and SP adjustment associated with it. From bob.wilson at apple.com Wed Oct 7 17:30:19 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 22:30:19 -0000 Subject: [llvm-commits] [llvm] r83497 - in /llvm/trunk/test/CodeGen/ARM: vmax.ll vmin.ll vmla.ll vmlal.ll vmls.ll vmlsl.ll Message-ID: <200910072230.n97MUJtS004888@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 17:30:19 2009 New Revision: 83497 URL: http://llvm.org/viewvc/llvm-project?rev=83497&view=rev Log: Convert more NEON tests to use FileCheck. Modified: llvm/trunk/test/CodeGen/ARM/vmax.ll llvm/trunk/test/CodeGen/ARM/vmin.ll llvm/trunk/test/CodeGen/ARM/vmla.ll llvm/trunk/test/CodeGen/ARM/vmlal.ll llvm/trunk/test/CodeGen/ARM/vmls.ll llvm/trunk/test/CodeGen/ARM/vmlsl.ll Modified: llvm/trunk/test/CodeGen/ARM/vmax.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmax.ll?rev=83497&r1=83496&r2=83497&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmax.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmax.ll Wed Oct 7 17:30:19 2009 @@ -1,13 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmax\\.s8} %t | count 2 -; RUN: grep {vmax\\.s16} %t | count 2 -; RUN: grep {vmax\\.s32} %t | count 2 -; RUN: grep {vmax\\.u8} %t | count 2 -; RUN: grep {vmax\\.u16} %t | count 2 -; RUN: grep {vmax\\.u32} %t | count 2 -; RUN: grep {vmax\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmaxs8: +;CHECK: vmax.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -15,6 +10,8 @@ } define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmaxs16: +;CHECK: vmax.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -22,6 +19,8 @@ } define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmaxs32: +;CHECK: vmax.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -29,6 +28,8 @@ } define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmaxu8: +;CHECK: vmax.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -36,6 +37,8 @@ } define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmaxu16: +;CHECK: vmax.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -43,6 +46,8 @@ } define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmaxu32: +;CHECK: vmax.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -50,6 +55,8 @@ } define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vmaxf32: +;CHECK: vmax.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) @@ -57,6 +64,8 @@ } define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmaxQs8: +;CHECK: vmax.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -64,6 +73,8 @@ } define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vmaxQs16: +;CHECK: vmax.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -71,6 +82,8 @@ } define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vmaxQs32: +;CHECK: vmax.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -78,6 +91,8 @@ } define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmaxQu8: +;CHECK: vmax.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -85,6 +100,8 @@ } define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vmaxQu16: +;CHECK: vmax.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -92,6 +109,8 @@ } define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vmaxQu32: +;CHECK: vmax.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -99,6 +118,8 @@ } define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vmaxQf32: +;CHECK: vmax.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vmin.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmin.ll?rev=83497&r1=83496&r2=83497&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmin.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmin.ll Wed Oct 7 17:30:19 2009 @@ -1,13 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmin\\.s8} %t | count 2 -; RUN: grep {vmin\\.s16} %t | count 2 -; RUN: grep {vmin\\.s32} %t | count 2 -; RUN: grep {vmin\\.u8} %t | count 2 -; RUN: grep {vmin\\.u16} %t | count 2 -; RUN: grep {vmin\\.u32} %t | count 2 -; RUN: grep {vmin\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmins8: +;CHECK: vmin.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -15,6 +10,8 @@ } define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmins16: +;CHECK: vmin.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -22,6 +19,8 @@ } define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmins32: +;CHECK: vmin.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -29,6 +28,8 @@ } define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vminu8: +;CHECK: vmin.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -36,6 +37,8 @@ } define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vminu16: +;CHECK: vmin.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -43,6 +46,8 @@ } define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vminu32: +;CHECK: vmin.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -50,6 +55,8 @@ } define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vminf32: +;CHECK: vmin.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) @@ -57,6 +64,8 @@ } define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vminQs8: +;CHECK: vmin.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -64,6 +73,8 @@ } define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vminQs16: +;CHECK: vmin.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -71,6 +82,8 @@ } define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vminQs32: +;CHECK: vmin.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -78,6 +91,8 @@ } define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vminQu8: +;CHECK: vmin.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -85,6 +100,8 @@ } define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vminQu16: +;CHECK: vmin.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -92,6 +109,8 @@ } define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vminQu32: +;CHECK: vmin.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -99,6 +118,8 @@ } define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vminQf32: +;CHECK: vmin.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vmla.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmla.ll?rev=83497&r1=83496&r2=83497&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmla.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmla.ll Wed Oct 7 17:30:19 2009 @@ -1,10 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmla\\.i8} %t | count 2 -; RUN: grep {vmla\\.i16} %t | count 2 -; RUN: grep {vmla\\.i32} %t | count 2 -; RUN: grep {vmla\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { +;CHECK: vmlai8: +;CHECK: vmla.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -14,6 +12,8 @@ } define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlai16: +;CHECK: vmla.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -23,6 +23,8 @@ } define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlai32: +;CHECK: vmla.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -32,6 +34,8 @@ } define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind { +;CHECK: vmlaf32: +;CHECK: vmla.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = load <2 x float>* %C @@ -41,6 +45,8 @@ } define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind { +;CHECK: vmlaQi8: +;CHECK: vmla.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C @@ -50,6 +56,8 @@ } define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: vmlaQi16: +;CHECK: vmla.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C @@ -59,6 +67,8 @@ } define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: vmlaQi32: +;CHECK: vmla.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C @@ -68,6 +78,8 @@ } define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { +;CHECK: vmlaQf32: +;CHECK: vmla.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = load <4 x float>* %C Modified: llvm/trunk/test/CodeGen/ARM/vmlal.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmlal.ll?rev=83497&r1=83496&r2=83497&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmlal.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmlal.ll Wed Oct 7 17:30:19 2009 @@ -1,12 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmlal\\.s8} %t | count 1 -; RUN: grep {vmlal\\.s16} %t | count 1 -; RUN: grep {vmlal\\.s32} %t | count 1 -; RUN: grep {vmlal\\.u8} %t | count 1 -; RUN: grep {vmlal\\.u16} %t | count 1 -; RUN: grep {vmlal\\.u32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlals8: +;CHECK: vmlal.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -15,6 +11,8 @@ } define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlals16: +;CHECK: vmlal.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -23,6 +21,8 @@ } define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlals32: +;CHECK: vmlal.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -31,6 +31,8 @@ } define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlalu8: +;CHECK: vmlal.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -39,6 +41,8 @@ } define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlalu16: +;CHECK: vmlal.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -47,6 +51,8 @@ } define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlalu32: +;CHECK: vmlal.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C Modified: llvm/trunk/test/CodeGen/ARM/vmls.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmls.ll?rev=83497&r1=83496&r2=83497&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmls.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmls.ll Wed Oct 7 17:30:19 2009 @@ -1,10 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmls\\.i8} %t | count 2 -; RUN: grep {vmls\\.i16} %t | count 2 -; RUN: grep {vmls\\.i32} %t | count 2 -; RUN: grep {vmls\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { +;CHECK: vmlsi8: +;CHECK: vmls.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -14,6 +12,8 @@ } define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlsi16: +;CHECK: vmls.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -23,6 +23,8 @@ } define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlsi32: +;CHECK: vmls.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -32,6 +34,8 @@ } define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind { +;CHECK: vmlsf32: +;CHECK: vmls.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = load <2 x float>* %C @@ -41,6 +45,8 @@ } define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind { +;CHECK: vmlsQi8: +;CHECK: vmls.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C @@ -50,6 +56,8 @@ } define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: vmlsQi16: +;CHECK: vmls.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C @@ -59,6 +67,8 @@ } define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: vmlsQi32: +;CHECK: vmls.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C @@ -68,6 +78,8 @@ } define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { +;CHECK: vmlsQf32: +;CHECK: vmls.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = load <4 x float>* %C Modified: llvm/trunk/test/CodeGen/ARM/vmlsl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmlsl.ll?rev=83497&r1=83496&r2=83497&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmlsl.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmlsl.ll Wed Oct 7 17:30:19 2009 @@ -1,12 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmlsl\\.s8} %t | count 1 -; RUN: grep {vmlsl\\.s16} %t | count 1 -; RUN: grep {vmlsl\\.s32} %t | count 1 -; RUN: grep {vmlsl\\.u8} %t | count 1 -; RUN: grep {vmlsl\\.u16} %t | count 1 -; RUN: grep {vmlsl\\.u32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlsls8: +;CHECK: vmlsl.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -15,6 +11,8 @@ } define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlsls16: +;CHECK: vmlsl.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -23,6 +21,8 @@ } define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlsls32: +;CHECK: vmlsl.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -31,6 +31,8 @@ } define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlslu8: +;CHECK: vmlsl.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -39,6 +41,8 @@ } define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlslu16: +;CHECK: vmlsl.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -47,6 +51,8 @@ } define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlslu32: +;CHECK: vmlsl.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C From dalej at apple.com Wed Oct 7 17:47:21 2009 From: dalej at apple.com (Dale Johannesen) Date: Wed, 07 Oct 2009 22:47:21 -0000 Subject: [llvm-commits] [llvm] r83499 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/inline-asm-R-constraint.ll Message-ID: <200910072247.n97MlLIg007038@zion.cs.uiuc.edu> Author: johannes Date: Wed Oct 7 17:47:20 2009 New Revision: 83499 URL: http://llvm.org/viewvc/llvm-project?rev=83499&view=rev Log: Fix handling of x86 'R' constraint. Added: llvm/trunk/test/CodeGen/X86/inline-asm-R-constraint.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=83499&r1=83498&r2=83499&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Oct 7 17:47:20 2009 @@ -9397,7 +9397,6 @@ switch (Constraint[0]) { default: break; case 'r': // GENERAL_REGS - case 'R': // LEGACY_REGS case 'l': // INDEX_REGS if (VT == MVT::i8) return std::make_pair(0U, X86::GR8RegisterClass); @@ -9406,6 +9405,14 @@ if (VT == MVT::i32 || !Subtarget->is64Bit()) return std::make_pair(0U, X86::GR32RegisterClass); return std::make_pair(0U, X86::GR64RegisterClass); + case 'R': // LEGACY_REGS + if (VT == MVT::i8) + return std::make_pair(0U, X86::GR8_NOREXRegisterClass); + if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16_NOREXRegisterClass); + if (VT == MVT::i32 || !Subtarget->is64Bit()) + return std::make_pair(0U, X86::GR32_NOREXRegisterClass); + return std::make_pair(0U, X86::GR64_NOREXRegisterClass); case 'f': // FP Stack registers. // If SSE is enabled for this VT, use f80 to ensure the isel moves the // value to the correct fpstack register class. Added: llvm/trunk/test/CodeGen/X86/inline-asm-R-constraint.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/inline-asm-R-constraint.ll?rev=83499&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/inline-asm-R-constraint.ll (added) +++ llvm/trunk/test/CodeGen/X86/inline-asm-R-constraint.ll Wed Oct 7 17:47:20 2009 @@ -0,0 +1,18 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s +; 7282062 +; ModuleID = '' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin10.0" + +define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp { +entry: +; CHECK: udiv8: +; CHECK-NOT: movb %ah, (%r8) + %a_addr = alloca i16, align 2 ; [#uses=2] + %b_addr = alloca i8, align 1 ; [#uses=2] + store i16 %a, i16* %a_addr + store i8 %b, i8* %b_addr + call void asm "\09\09movw\09$2, %ax\09\09\0A\09\09divb\09$3\09\09\09\0A\09\09movb\09%al, $0\09\0A\09\09movb %ah, ($4)", "=*m,=*m,*m,*m,R,~{dirflag},~{fpsr},~{flags},~{ax}"(i8* %quotient, i8* %remainder, i16* %a_addr, i8* %b_addr, i8* %remainder) nounwind + ret void +; CHECK: ret +} From sabre at nondot.org Wed Oct 7 17:49:30 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 07 Oct 2009 22:49:30 -0000 Subject: [llvm-commits] [llvm] r83500 - in /llvm/trunk: include/llvm/CodeGen/LazyLiveness.h lib/CodeGen/CMakeLists.txt Message-ID: <200910072249.n97MnUU3007319@zion.cs.uiuc.edu> Author: lattner Date: Wed Oct 7 17:49:30 2009 New Revision: 83500 URL: http://llvm.org/viewvc/llvm-project?rev=83500&view=rev Log: second half of lazy liveness removal. Removed: llvm/trunk/include/llvm/CodeGen/LazyLiveness.h Modified: llvm/trunk/lib/CodeGen/CMakeLists.txt Removed: llvm/trunk/include/llvm/CodeGen/LazyLiveness.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LazyLiveness.h?rev=83499&view=auto ============================================================================== --- llvm/trunk/include/llvm/CodeGen/LazyLiveness.h (original) +++ llvm/trunk/include/llvm/CodeGen/LazyLiveness.h (removed) @@ -1,64 +0,0 @@ -//===- LazyLiveness.h - Lazy, CFG-invariant liveness information ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass implements a lazy liveness analysis as per "Fast Liveness Checking -// for SSA-form Programs," by Boissinot, et al. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_LAZYLIVENESS_H -#define LLVM_CODEGEN_LAZYLIVENESS_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SparseBitVector.h" -#include - -namespace llvm { - -class MachineRegisterInfo; - -class LazyLiveness : public MachineFunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - LazyLiveness() : MachineFunctionPass(&ID) { } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &mf); - - bool vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB); - -private: - void computeBackedgeChain(MachineFunction& mf, MachineBasicBlock* MBB); - - typedef std::pair edge_t; - - MachineRegisterInfo* MRI; - - DenseMap preorder; - std::vector rev_preorder; - DenseMap > rv; - DenseMap > tv; - DenseSet backedges; - SparseBitVector<128> backedge_source; - SparseBitVector<128> backedge_target; - SparseBitVector<128> calculated; -}; - -} - -#endif - Modified: llvm/trunk/lib/CodeGen/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CMakeLists.txt?rev=83500&r1=83499&r2=83500&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/CMakeLists.txt (original) +++ llvm/trunk/lib/CodeGen/CMakeLists.txt Wed Oct 7 17:49:30 2009 @@ -13,7 +13,6 @@ IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp - LazyLiveness.cpp LiveInterval.cpp LiveIntervalAnalysis.cpp LiveStackAnalysis.cpp From grosbach at apple.com Wed Oct 7 17:49:41 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 07 Oct 2009 22:49:41 -0000 Subject: [llvm-commits] [llvm] r83501 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/CodeGen/PrologEpilogInserter.h lib/Target/ARM/Thumb1RegisterInfo.cpp lib/Target/ARM/Thumb1RegisterInfo.h Message-ID: <200910072249.n97Mng8j007359@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 17:49:41 2009 New Revision: 83501 URL: http://llvm.org/viewvc/llvm-project?rev=83501&view=rev Log: reverting thumb1 scavenging default due to test failure while I figure out what's up. Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp llvm/trunk/lib/CodeGen/PrologEpilogInserter.h llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83501&r1=83500&r2=83501&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 17:49:41 2009 @@ -561,12 +561,6 @@ return false; } - /// requiresFrameIndexScavenging - returns true if the target requires post - /// PEI scavenging of registers for materializing frame index constants. - virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const { - return false; - } - /// hasFP - Return true if the specified function should have a dedicated /// frame pointer register. For most targets this is true only if the function /// has variable sized allocas or if frame pointer elimination is disabled. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83501&r1=83500&r2=83501&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 17:49:41 2009 @@ -44,6 +44,16 @@ static RegisterPass X("prologepilog", "Prologue/Epilogue Insertion"); +// FIXME: For now, the frame index scavenging is off by default and only +// used by the Thumb1 target. When it's the default and replaces the current +// on-the-fly PEI scavenging for all targets, requiresRegisterScavenging() +// will replace this. +cl::opt +FrameIndexVirtualScavenging("enable-frame-index-scavenging", + cl::Hidden, + cl::desc("Enable frame index elimination with" + "virtual register scavenging")); + /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. /// @@ -56,7 +66,6 @@ const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; - FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Get MachineModuleInfo so that we can track the construction of the // frame. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83501&r1=83500&r2=83501&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 17:49:41 2009 @@ -95,11 +95,6 @@ // functions. bool ShrinkWrapThisFunction; - // Flag to control whether to use the register scavenger to resolve - // frame index materialization registers. Set according to - // TRI->requiresFrameIndexScavenging() for the curren function. - bool FrameIndexVirtualScavenging; - // When using the scavenger post-pass to resolve frame reference // materialization registers, maintain a map of the registers to // the constant value and SP adjustment associated with it. Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83501&r1=83500&r2=83501&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 17:49:41 2009 @@ -37,6 +37,11 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +// FIXME: This cmd line option conditionalizes the new register scavenging +// implemenation in PEI. Remove the option when scavenging works well enough +// to be the default. +extern cl::opt FrameIndexVirtualScavenging; + Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { @@ -79,13 +84,7 @@ bool Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return true; -} - -bool -Thumb1RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) - const { - return true; + return FrameIndexVirtualScavenging; } bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { @@ -129,7 +128,13 @@ unsigned LdReg = DestReg; if (DestReg == ARM::SP) { assert(BaseReg == ARM::SP && "Unexpected!"); - LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + if (FrameIndexVirtualScavenging) { + LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + } else { + LdReg = ARM::R3; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + } } if (NumBytes <= 255 && NumBytes >= 0) @@ -154,6 +159,10 @@ else MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); AddDefaultPred(MIB); + + if (!FrameIndexVirtualScavenging && DestReg == ARM::SP) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) + .addReg(ARM::R12, RegState::Kill); } /// calcNumMI - Returns the number of instructions required to materialize @@ -626,6 +635,7 @@ else // tLDR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { + if (FrameIndexVirtualScavenging) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); *Value = Offset; @@ -648,6 +658,52 @@ MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); else // tSTR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); + } else { + // FIXME! This is horrific!!! We need register scavenging. + // Our temporary workaround has marked r3 unavailable. Of course, r3 is + // also a ABI register so it's possible that is is the register that is + // being storing here. If that's the case, we do the following: + // r12 = r2 + // Use r2 to materialize sp + offset + // str r3, r2 + // r2 = r12 + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned TmpReg = ARM::R3; + bool UseRR = false; + if (ValReg == ARM::R3) { + BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) + .addReg(ARM::R2, RegState::Kill); + TmpReg = ARM::R2; + } + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + + MachineBasicBlock::iterator NII = next(II); + if (ValReg == ARM::R3) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) + .addReg(ARM::R12, RegState::Kill); + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) + .addReg(ARM::R12, RegState::Kill); + } } else assert(false && "Unexpected opcode!"); Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83501&r1=83500&r2=83501&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 17:49:41 2009 @@ -41,7 +41,6 @@ getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; - bool requiresFrameIndexScavenging(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const; From bob.wilson at apple.com Wed Oct 7 17:57:01 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 22:57:01 -0000 Subject: [llvm-commits] [llvm] r83502 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vld2.ll Message-ID: <200910072257.n97Mv1qd008315@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 17:57:01 2009 New Revision: 83502 URL: http://llvm.org/viewvc/llvm-project?rev=83502&view=rev Log: Add codegen support for NEON vld2 intrinsics with <1 x i64> vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vld2.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83502&r1=83501&r2=83502&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 17:57:01 2009 @@ -1357,6 +1357,7 @@ case MVT::v4i16: Opc = ARM::VLD2d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VLD2d32; break; + case MVT::v1i64: Opc = ARM::VLD2d64; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83502&r1=83501&r2=83502&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 17:57:01 2009 @@ -193,6 +193,9 @@ def VLD2d8 : VLD2D<0b0000, "vld2.8">; def VLD2d16 : VLD2D<0b0100, "vld2.16">; def VLD2d32 : VLD2D<0b1000, "vld2.32">; +def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD1, + "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>; def VLD2q8 : VLD2Q<0b0000, "vld2.8">; def VLD2q16 : VLD2Q<0b0100, "vld2.16">; Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83502&r1=83501&r2=83502&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 17:57:01 2009 @@ -49,6 +49,7 @@ case ARM::VLD2d8: case ARM::VLD2d16: case ARM::VLD2d32: + case ARM::VLD2d64: case ARM::VLD2LNd8: case ARM::VLD2LNd16: case ARM::VLD2LNd32: Modified: llvm/trunk/test/CodeGen/ARM/vld2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld2.ll?rev=83502&r1=83501&r2=83502&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld2.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld2.ll Wed Oct 7 17:57:01 2009 @@ -4,6 +4,7 @@ %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } %struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } +%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> } %struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } %struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } @@ -50,6 +51,16 @@ ret <2 x float> %tmp4 } +define <1 x i64> @vld2i64(i64* %A) nounwind { +;CHECK: vld2i64: +;CHECK: vld1.64 + %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i64* %A) + %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld2Qi8(i8* %A) nounwind { ;CHECK: vld2Qi8: ;CHECK: vld2.8 @@ -94,6 +105,7 @@ declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly From jyasskin at google.com Wed Oct 7 18:22:42 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Wed, 07 Oct 2009 23:22:42 -0000 Subject: [llvm-commits] [llvm] r83503 - in /llvm/trunk: autoconf/configure.ac configure Message-ID: <200910072322.n97NMhaa011612@zion.cs.uiuc.edu> Author: jyasskin Date: Wed Oct 7 18:22:42 2009 New Revision: 83503 URL: http://llvm.org/viewvc/llvm-project?rev=83503&view=rev Log: Fix the OProfile part of PR5018. This fixes --without-oprofile, makes it the default, and works around a broken libopagent on some Debian systems. Modified: llvm/trunk/autoconf/configure.ac llvm/trunk/configure Modified: llvm/trunk/autoconf/configure.ac URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=83503&r1=83502&r2=83503&view=diff ============================================================================== --- llvm/trunk/autoconf/configure.ac (original) +++ llvm/trunk/autoconf/configure.ac Wed Oct 7 18:22:42 2009 @@ -999,31 +999,30 @@ AC_SUBST(USE_OPROFILE, [1]) case "$withval" in /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;; + no) llvm_cv_oppath= + AC_SUBST(USE_OPROFILE, [0]) ;; *) llvm_cv_oppath="${withval}/lib/oprofile" CPPFLAGS="-I${withval}/include";; esac - LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" - AC_SEARCH_LIBS(op_open_agent, opagent, [], [ - echo "Error! You need to have libopagent around." - exit -1 - ]) - AC_CHECK_HEADER([opagent.h], [], [ - echo "Error! You need to have opagent.h around." - exit -1 - ]) + if test -n "$llvm_cv_oppath" ; then + LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" + dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744: + dnl libbfd is not included properly in libopagent in some Debian + dnl versions. If libbfd isn't found at all, we assume opagent works + dnl anyway. + AC_SEARCH_LIBS(bfd_init, bfd, [], []) + AC_SEARCH_LIBS(op_open_agent, opagent, [], [ + echo "Error! You need to have libopagent around." + exit -1 + ]) + AC_CHECK_HEADER([opagent.h], [], [ + echo "Error! You need to have opagent.h around." + exit -1 + ]) + fi ], [ - llvm_cv_old_LIBS="$LIBS" - LIBS="$LIBS -L/usr/lib/oprofile -Wl,-rpath,/usr/lib/oprofile" - dnl If either the library or header aren't present, omit oprofile support. - AC_SEARCH_LIBS(op_open_agent, opagent, - [AC_SUBST(USE_OPROFILE, [1])], - [LIBS="$llvm_cv_old_LIBS" - AC_SUBST(USE_OPROFILE, [0])]) - AC_CHECK_HEADER([opagent.h], [], [ - LIBS="$llvm_cv_old_LIBS" - AC_SUBST(USE_OPROFILE, [0]) - ]) + AC_SUBST(USE_OPROFILE, [0]) ]) AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE, [Define if we have the oprofile JIT-support library]) Modified: llvm/trunk/configure URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/configure?rev=83503&r1=83502&r2=83503&view=diff ============================================================================== --- llvm/trunk/configure (original) +++ llvm/trunk/configure Wed Oct 7 18:22:42 2009 @@ -28539,13 +28539,17 @@ case "$withval" in /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;; + no) llvm_cv_oppath= + USE_OPROFILE=0 + ;; *) llvm_cv_oppath="${withval}/lib/oprofile" CPPFLAGS="-I${withval}/include";; esac - LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" - { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5 -echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; } -if test "${ac_cv_search_op_open_agent+set}" = set; then + if test -n "$llvm_cv_oppath" ; then + LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" + { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5 +echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; } +if test "${ac_cv_search_bfd_init+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_func_search_save_LIBS=$LIBS @@ -28562,16 +28566,16 @@ #ifdef __cplusplus extern "C" #endif -char op_open_agent (); +char bfd_init (); int main () { -return op_open_agent (); +return bfd_init (); ; return 0; } _ACEOF -for ac_lib in '' opagent; do +for ac_lib in '' bfd; do if test -z "$ac_lib"; then ac_res="none required" else @@ -28612,7 +28616,7 @@ ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then - ac_cv_search_op_open_agent=$ac_res + ac_cv_search_bfd_init=$ac_res else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 @@ -28622,201 +28626,27 @@ rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext - if test "${ac_cv_search_op_open_agent+set}" = set; then + if test "${ac_cv_search_bfd_init+set}" = set; then break fi done -if test "${ac_cv_search_op_open_agent+set}" = set; then +if test "${ac_cv_search_bfd_init+set}" = set; then : else - ac_cv_search_op_open_agent=no + ac_cv_search_bfd_init=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi -{ echo "$as_me:$LINENO: result: $ac_cv_search_op_open_agent" >&5 -echo "${ECHO_T}$ac_cv_search_op_open_agent" >&6; } -ac_res=$ac_cv_search_op_open_agent +{ echo "$as_me:$LINENO: result: $ac_cv_search_bfd_init" >&5 +echo "${ECHO_T}$ac_cv_search_bfd_init" >&6; } +ac_res=$ac_cv_search_bfd_init if test "$ac_res" != no; then test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" -else - - echo "Error! You need to have libopagent around." - exit -1 - -fi - - if test "${ac_cv_header_opagent_h+set}" = set; then - { echo "$as_me:$LINENO: checking for opagent.h" >&5 -echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } -if test "${ac_cv_header_opagent_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5 -echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking opagent.h usability" >&5 -echo $ECHO_N "checking opagent.h usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$ac_includes_default -#include -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_header_compiler=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_compiler=no fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking opagent.h presence" >&5 -echo $ECHO_N "checking opagent.h presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null; then - if test -s conftest.err; then - ac_cpp_err=$ac_c_preproc_warn_flag - ac_cpp_err=$ac_cpp_err$ac_c_werror_flag - else - ac_cpp_err= - fi -else - ac_cpp_err=yes -fi -if test -z "$ac_cpp_err"; then - ac_header_preproc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: opagent.h: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: opagent.h: present but cannot be compiled" >&5 -echo "$as_me: WARNING: opagent.h: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: opagent.h: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: opagent.h: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: opagent.h: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: opagent.h: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: opagent.h: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: opagent.h: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: opagent.h: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: opagent.h: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: opagent.h: in the future, the compiler will take precedence" >&2;} - ( cat <<\_ASBOX -## ----------------------------------- ## -## Report this to llvmbugs at cs.uiuc.edu ## -## ----------------------------------- ## -_ASBOX - ) | sed "s/^/$as_me: WARNING: /" >&2 - ;; -esac -{ echo "$as_me:$LINENO: checking for opagent.h" >&5 -echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } -if test "${ac_cv_header_opagent_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_cv_header_opagent_h=$ac_header_preproc -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5 -echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; } - -fi -if test $ac_cv_header_opagent_h = yes; then - : -else - - echo "Error! You need to have opagent.h around." - exit -1 - -fi - - - -else - - llvm_cv_old_LIBS="$LIBS" - LIBS="$LIBS -L/usr/lib/oprofile -Wl,-rpath,/usr/lib/oprofile" - { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5 + { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5 echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; } if test "${ac_cv_search_op_open_agent+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 @@ -28912,15 +28742,15 @@ ac_res=$ac_cv_search_op_open_agent if test "$ac_res" != no; then test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - USE_OPROFILE=1 else - LIBS="$llvm_cv_old_LIBS" - USE_OPROFILE=0 + + echo "Error! You need to have libopagent around." + exit -1 fi - if test "${ac_cv_header_opagent_h+set}" = set; then + if test "${ac_cv_header_opagent_h+set}" = set; then { echo "$as_me:$LINENO: checking for opagent.h" >&5 echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } if test "${ac_cv_header_opagent_h+set}" = set; then @@ -29078,13 +28908,18 @@ : else - LIBS="$llvm_cv_old_LIBS" - USE_OPROFILE=0 - + echo "Error! You need to have opagent.h around." + exit -1 fi + fi + +else + + USE_OPROFILE=0 + fi From isanbard at gmail.com Wed Oct 7 18:25:17 2009 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 07 Oct 2009 23:25:17 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83504 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200910072325.n97NPHM0011954@zion.cs.uiuc.edu> Author: void Date: Wed Oct 7 18:25:17 2009 New Revision: 83504 URL: http://llvm.org/viewvc/llvm-project?rev=83504&view=rev Log: Temporarily revert r83493. It was causing a bootstrapping failure. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=83504&r1=83503&r2=83504&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Wed Oct 7 18:25:17 2009 @@ -301,9 +301,6 @@ // Create an alloca for the ResultDecl. Value *Tmp = TheTreeToLLVM->CreateTemporary(AI->getType()); Builder.CreateStore(AI, Tmp); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - Builder.SetDebugLocation(cast(Tmp)); -#endif SET_DECL_LLVM(ResultDecl, Tmp); if (TheDebugInfo) { @@ -602,9 +599,6 @@ Tmp->setName(std::string(Name)+"_addr"); SET_DECL_LLVM(Args, Tmp); if (TheDebugInfo) { -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - Builder.SetDebugLocation(cast(Tmp)); -#endif TheDebugInfo->EmitDeclare(Args, dwarf::DW_TAG_arg_variable, Name, TREE_TYPE(Args), Tmp, Builder.GetInsertBlock()); @@ -700,6 +694,10 @@ } } } + if (TheDebugInfo) { + TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); + TheDebugInfo->EmitFunctionEnd(Builder.GetInsertBlock(), true); + } if (RetVals.empty()) Builder.CreateRetVoid(); else if (!Fn->getReturnType()->isAggregateType()) { @@ -725,11 +723,6 @@ SI->setSuccessor(0, SI->getSuccessor(1)); } - if (TheDebugInfo) { - TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); - TheDebugInfo->EmitFunctionEnd(Builder.GetInsertBlock(), true); - } - // Remove any cached LLVM values that are local to this function. Such values // may be deleted when the optimizers run, so would be dangerous to keep. eraseLocalLLVMValues(); @@ -1207,11 +1200,6 @@ /// CreateTempLoc - Like CreateTemporary, but returns a MemRef. MemRef TreeToLLVM::CreateTempLoc(const Type *Ty) { AllocaInst *AI = CreateTemporary(Ty); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - if (TheDebugInfo) - Builder.SetDebugLocation(AI); -#endif - // MemRefs do not allow alignment 0. if (!AI->getAlignment()) AI->setAlignment(TD.getPrefTypeAlignment(Ty)); @@ -1711,12 +1699,6 @@ // Create a temporary for the value to be switched on. IndirectGotoValue = CreateTemporary(TD.getIntPtrType(Context)); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - if (TheDebugInfo) { - TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); - Builder.SetDebugLocation(cast(IndirectGotoValue)); - } -#endif // Create the block, emit a load, and emit the switch in the block. IndirectGotoBlock = BasicBlock::Create(Context, "indirectgoto"); @@ -2509,10 +2491,6 @@ // A value. Store to a temporary, and return the temporary's address. // Any future access to this argument will reuse the same address. Loc = TheTreeToLLVM->CreateTemporary(TheValue->getType()); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - if (TheDebugInfo) - Builder.SetDebugLocation(cast(Loc)); -#endif Builder.CreateStore(TheValue, Loc); } return Loc; @@ -2886,10 +2864,6 @@ // Create a new temporary and set the VAR_DECL to use it as the llvm location. Value *NewTmp = CreateTemporary(FirstVal->getType()); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - if (TheDebugInfo) - Builder.SetDebugLocation(cast(NewTmp)); -#endif SET_DECL_LLVM(Var, NewTmp); // Store the already existing initial value into the alloca. If the value @@ -6191,12 +6165,6 @@ // Emit it as a value, then store it to a temporary slot. Value *V2 = Emit(Arg2T, 0); Arg2 = CreateTemporary(V2->getType()); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - if (TheDebugInfo) { - TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); - Builder.SetDebugLocation(cast(Arg2)); - } -#endif Builder.CreateStore(V2, Arg2); } else { // If the target has aggregate valists, then the second argument @@ -6922,12 +6890,6 @@ } else { // If the input is a scalar, emit to a temporary. Value *Dest = CreateTemporary(ConvertType(TREE_TYPE(Op))); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - if (TheDebugInfo) { - TheDebugInfo->EmitStopPoint(Fn, Builder.GetInsertBlock(), Builder); - Builder.SetDebugLocation(cast(Dest)); - } -#endif Builder.CreateStore(Emit(Op, 0), Dest); // The type is the type of the expression. Dest = BitCastToType(Dest, From bob.wilson at apple.com Wed Oct 7 18:39:58 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 23:39:58 -0000 Subject: [llvm-commits] [llvm] r83506 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vld3.ll Message-ID: <200910072339.n97NdwqA022772@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 18:39:57 2009 New Revision: 83506 URL: http://llvm.org/viewvc/llvm-project?rev=83506&view=rev Log: Add codegen support for NEON vld3 intrinsics with <1 x i64> vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vld3.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83506&r1=83505&r2=83506&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 18:39:57 2009 @@ -1396,6 +1396,7 @@ case MVT::v4i16: Opc = ARM::VLD3d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VLD3d32; break; + case MVT::v1i64: Opc = ARM::VLD3d64; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83506&r1=83505&r2=83506&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 18:39:57 2009 @@ -215,6 +215,10 @@ def VLD3d8 : VLD3D<0b0000, "vld3.8">; def VLD3d16 : VLD3D<0b0100, "vld3.16">; def VLD3d32 : VLD3D<0b1000, "vld3.32">; +def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD1, + "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; // vld3 to double-spaced even registers. def VLD3q8a : VLD3WB<0b0000, "vld3.8">; Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83506&r1=83505&r2=83506&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 18:39:57 2009 @@ -67,6 +67,7 @@ case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: + case ARM::VLD3d64: case ARM::VLD3LNd8: case ARM::VLD3LNd16: case ARM::VLD3LNd32: Modified: llvm/trunk/test/CodeGen/ARM/vld3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld3.ll?rev=83506&r1=83505&r2=83506&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld3.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld3.ll Wed Oct 7 18:39:57 2009 @@ -4,6 +4,7 @@ %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } %struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } %struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } %struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } @@ -50,6 +51,16 @@ ret <2 x float> %tmp4 } +define <1 x i64> @vld3i64(i64* %A) nounwind { +;CHECK: vld3i64: +;CHECK: vld1.64 + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i64* %A) + %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK: vld3Qi8: ;CHECK: vld3.8 @@ -98,6 +109,7 @@ declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly From bob.wilson at apple.com Wed Oct 7 18:47:21 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 23:47:21 -0000 Subject: [llvm-commits] [llvm] r83507 - in /llvm/trunk/test/CodeGen/ARM: vmov.ll vmovl.ll vmovn.ll vmul.ll vmull.ll vmvn.ll Message-ID: <200910072347.n97NlMlu025467@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 18:47:21 2009 New Revision: 83507 URL: http://llvm.org/viewvc/llvm-project?rev=83507&view=rev Log: Convert more NEON tests to use FileCheck. Modified: llvm/trunk/test/CodeGen/ARM/vmov.ll llvm/trunk/test/CodeGen/ARM/vmovl.ll llvm/trunk/test/CodeGen/ARM/vmovn.ll llvm/trunk/test/CodeGen/ARM/vmul.ll llvm/trunk/test/CodeGen/ARM/vmull.ll llvm/trunk/test/CodeGen/ARM/vmvn.ll Modified: llvm/trunk/test/CodeGen/ARM/vmov.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmov.ll?rev=83507&r1=83506&r2=83507&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmov.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmov.ll Wed Oct 7 18:47:21 2009 @@ -1,101 +1,135 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep vmov.i8 %t | count 2 -; RUN: grep vmov.i16 %t | count 4 -; RUN: grep vmov.i32 %t | count 12 -; RUN: grep vmov.i64 %t | count 2 -; Note: function names do not include "vmov" to allow simple grep for opcodes +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @v_movi8() nounwind { +;CHECK: v_movi8: +;CHECK: vmov.i8 ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <4 x i16> @v_movi16a() nounwind { +;CHECK: v_movi16a: +;CHECK: vmov.i16 ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 > } ; 0x1000 = 4096 define <4 x i16> @v_movi16b() nounwind { +;CHECK: v_movi16b: +;CHECK: vmov.i16 ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 > } define <2 x i32> @v_movi32a() nounwind { +;CHECK: v_movi32a: +;CHECK: vmov.i32 ret <2 x i32> < i32 32, i32 32 > } ; 0x2000 = 8192 define <2 x i32> @v_movi32b() nounwind { +;CHECK: v_movi32b: +;CHECK: vmov.i32 ret <2 x i32> < i32 8192, i32 8192 > } ; 0x200000 = 2097152 define <2 x i32> @v_movi32c() nounwind { +;CHECK: v_movi32c: +;CHECK: vmov.i32 ret <2 x i32> < i32 2097152, i32 2097152 > } ; 0x20000000 = 536870912 define <2 x i32> @v_movi32d() nounwind { +;CHECK: v_movi32d: +;CHECK: vmov.i32 ret <2 x i32> < i32 536870912, i32 536870912 > } ; 0x20ff = 8447 define <2 x i32> @v_movi32e() nounwind { +;CHECK: v_movi32e: +;CHECK: vmov.i32 ret <2 x i32> < i32 8447, i32 8447 > } ; 0x20ffff = 2162687 define <2 x i32> @v_movi32f() nounwind { +;CHECK: v_movi32f: +;CHECK: vmov.i32 ret <2 x i32> < i32 2162687, i32 2162687 > } ; 0xff0000ff0000ffff = 18374687574888349695 define <1 x i64> @v_movi64() nounwind { +;CHECK: v_movi64: +;CHECK: vmov.i64 ret <1 x i64> < i64 18374687574888349695 > } define <16 x i8> @v_movQi8() nounwind { +;CHECK: v_movQi8: +;CHECK: vmov.i8 ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <8 x i16> @v_movQi16a() nounwind { +;CHECK: v_movQi16a: +;CHECK: vmov.i16 ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > } ; 0x1000 = 4096 define <8 x i16> @v_movQi16b() nounwind { +;CHECK: v_movQi16b: +;CHECK: vmov.i16 ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 > } define <4 x i32> @v_movQi32a() nounwind { +;CHECK: v_movQi32a: +;CHECK: vmov.i32 ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 > } ; 0x2000 = 8192 define <4 x i32> @v_movQi32b() nounwind { +;CHECK: v_movQi32b: +;CHECK: vmov.i32 ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 > } ; 0x200000 = 2097152 define <4 x i32> @v_movQi32c() nounwind { +;CHECK: v_movQi32c: +;CHECK: vmov.i32 ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 > } ; 0x20000000 = 536870912 define <4 x i32> @v_movQi32d() nounwind { +;CHECK: v_movQi32d: +;CHECK: vmov.i32 ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 > } ; 0x20ff = 8447 define <4 x i32> @v_movQi32e() nounwind { +;CHECK: v_movQi32e: +;CHECK: vmov.i32 ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 > } ; 0x20ffff = 2162687 define <4 x i32> @v_movQi32f() nounwind { +;CHECK: v_movQi32f: +;CHECK: vmov.i32 ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 > } ; 0xff0000ff0000ffff = 18374687574888349695 define <2 x i64> @v_movQi64() nounwind { +;CHECK: v_movQi64: +;CHECK: vmov.i64 ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } - Modified: llvm/trunk/test/CodeGen/ARM/vmovl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmovl.ll?rev=83507&r1=83506&r2=83507&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmovl.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmovl.ll Wed Oct 7 18:47:21 2009 @@ -1,42 +1,48 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmovl\\.s8} %t | count 1 -; RUN: grep {vmovl\\.s16} %t | count 1 -; RUN: grep {vmovl\\.s32} %t | count 1 -; RUN: grep {vmovl\\.u8} %t | count 1 -; RUN: grep {vmovl\\.u16} %t | count 1 -; RUN: grep {vmovl\\.u32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind { +;CHECK: vmovls8: +;CHECK: vmovl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1) ret <8 x i16> %tmp2 } define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind { +;CHECK: vmovls16: +;CHECK: vmovl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1) ret <4 x i32> %tmp2 } define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind { +;CHECK: vmovls32: +;CHECK: vmovl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1) ret <2 x i64> %tmp2 } define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind { +;CHECK: vmovlu8: +;CHECK: vmovl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1) ret <8 x i16> %tmp2 } define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind { +;CHECK: vmovlu16: +;CHECK: vmovl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1) ret <4 x i32> %tmp2 } define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind { +;CHECK: vmovlu32: +;CHECK: vmovl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1) ret <2 x i64> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vmovn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmovn.ll?rev=83507&r1=83506&r2=83507&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmovn.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmovn.ll Wed Oct 7 18:47:21 2009 @@ -1,21 +1,24 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmovn\\.i16} %t | count 1 -; RUN: grep {vmovn\\.i32} %t | count 1 -; RUN: grep {vmovn\\.i64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind { +;CHECK: vmovni16: +;CHECK: vmovn.i16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1) ret <8 x i8> %tmp2 } define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind { +;CHECK: vmovni32: +;CHECK: vmovn.i32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind { +;CHECK: vmovni64: +;CHECK: vmovn.i64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1) ret <2 x i32> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vmul.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmul.ll?rev=83507&r1=83506&r2=83507&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmul.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmul.ll Wed Oct 7 18:47:21 2009 @@ -1,11 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmul\\.i8} %t | count 2 -; RUN: grep {vmul\\.i16} %t | count 2 -; RUN: grep {vmul\\.i32} %t | count 2 -; RUN: grep {vmul\\.f32} %t | count 2 -; RUN: grep {vmul\\.p8} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmuli8: +;CHECK: vmul.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = mul <8 x i8> %tmp1, %tmp2 @@ -13,6 +10,8 @@ } define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmuli16: +;CHECK: vmul.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = mul <4 x i16> %tmp1, %tmp2 @@ -20,6 +19,8 @@ } define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmuli32: +;CHECK: vmul.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = mul <2 x i32> %tmp1, %tmp2 @@ -27,6 +28,8 @@ } define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vmulf32: +;CHECK: vmul.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = mul <2 x float> %tmp1, %tmp2 @@ -34,6 +37,8 @@ } define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmulp8: +;CHECK: vmul.p8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -41,6 +46,8 @@ } define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmulQi8: +;CHECK: vmul.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = mul <16 x i8> %tmp1, %tmp2 @@ -48,6 +55,8 @@ } define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vmulQi16: +;CHECK: vmul.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = mul <8 x i16> %tmp1, %tmp2 @@ -55,6 +64,8 @@ } define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vmulQi32: +;CHECK: vmul.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = mul <4 x i32> %tmp1, %tmp2 @@ -62,6 +73,8 @@ } define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vmulQf32: +;CHECK: vmul.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = mul <4 x float> %tmp1, %tmp2 @@ -69,6 +82,8 @@ } define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmulQp8: +;CHECK: vmul.p8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vmull.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmull.ll?rev=83507&r1=83506&r2=83507&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmull.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmull.ll Wed Oct 7 18:47:21 2009 @@ -1,13 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmull\\.s8} %t | count 1 -; RUN: grep {vmull\\.s16} %t | count 1 -; RUN: grep {vmull\\.s32} %t | count 1 -; RUN: grep {vmull\\.u8} %t | count 1 -; RUN: grep {vmull\\.u16} %t | count 1 -; RUN: grep {vmull\\.u32} %t | count 1 -; RUN: grep {vmull\\.p8} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmulls8: +;CHECK: vmull.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -15,6 +10,8 @@ } define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmulls16: +;CHECK: vmull.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -22,6 +19,8 @@ } define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmulls32: +;CHECK: vmull.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -29,6 +28,8 @@ } define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmullu8: +;CHECK: vmull.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -36,6 +37,8 @@ } define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmullu16: +;CHECK: vmull.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -43,6 +46,8 @@ } define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmullu32: +;CHECK: vmull.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -50,6 +55,8 @@ } define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmullp8: +;CHECK: vmull.p8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vmvn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmvn.ll?rev=83507&r1=83506&r2=83507&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vmvn.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vmvn.ll Wed Oct 7 18:47:21 2009 @@ -1,50 +1,64 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep vmvn %t | count 8 -; Note: function names do not include "vmvn" to allow simple grep for opcodes +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind { +;CHECK: v_mvni8: +;CHECK: vmvn %tmp1 = load <8 x i8>* %A %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > ret <8 x i8> %tmp2 } define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind { +;CHECK: v_mvni16: +;CHECK: vmvn %tmp1 = load <4 x i16>* %A %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 > ret <4 x i16> %tmp2 } define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind { +;CHECK: v_mvni32: +;CHECK: vmvn %tmp1 = load <2 x i32>* %A %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 > ret <2 x i32> %tmp2 } define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind { +;CHECK: v_mvni64: +;CHECK: vmvn %tmp1 = load <1 x i64>* %A %tmp2 = xor <1 x i64> %tmp1, < i64 -1 > ret <1 x i64> %tmp2 } define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind { +;CHECK: v_mvnQi8: +;CHECK: vmvn %tmp1 = load <16 x i8>* %A %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > ret <16 x i8> %tmp2 } define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind { +;CHECK: v_mvnQi16: +;CHECK: vmvn %tmp1 = load <8 x i16>* %A %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > ret <8 x i16> %tmp2 } define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind { +;CHECK: v_mvnQi32: +;CHECK: vmvn %tmp1 = load <4 x i32>* %A %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 > ret <4 x i32> %tmp2 } define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind { +;CHECK: v_mvnQi64: +;CHECK: vmvn %tmp1 = load <2 x i64>* %A %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 > ret <2 x i64> %tmp2 From evan.cheng at apple.com Wed Oct 7 18:48:41 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 7 Oct 2009 16:48:41 -0700 Subject: [llvm-commits] [llvm] r83467 - in /llvm/trunk: include/llvm/CodeGen/ include/llvm/Target/ lib/CodeGen/ lib/Target/ARM/ lib/Target/Alpha/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MSP430/ lib/Target/Mips/ lib/Target/PIC16/ lib/Target/PowerPC/ lib/Target/Sparc/ lib/Target/SystemZ/ lib/Target/X86/ lib/Target/XCore/ In-Reply-To: <38EE6921-502A-4F02-BD8E-C36062039025@apple.com> References: <200910071712.n97HCvGJ028253@zion.cs.uiuc.edu> <38EE6921-502A-4F02-BD8E-C36062039025@apple.com> Message-ID: On Oct 7, 2009, at 12:12 PM, Jim Grosbach wrote: > > On Oct 7, 2009, at 11:38 AM, Evan Cheng wrote: > >> >> On Oct 7, 2009, at 10:12 AM, Jim Grosbach wrote: >> >>> Author: grosbach >>> Date: Wed Oct 7 12:12:56 2009 >>> New Revision: 83467 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=83467&view=rev >>> Log: >>> Add register-reuse to frame-index register scavenging. When a >>> target uses >>> a virtual register to eliminate a frame index, it can return that >>> register >>> and the constant stored there to PEI to track. When scavenging to >>> allocate >>> for those registers, PEI then tracks the last-used register and >>> value, and >>> if it is still available and matches the value for the next index, >>> reuses >>> the existing value rather and removes the re-materialization >>> instructions. >>> Fancier tracking and adjustment of scavenger allocations to keep >>> more >>> values live for longer is possible, but not yet implemented and >>> would likely >>> be better done via a different, less special-purpose, approach to >>> the >>> problem. >> >> Hi Jim, >> >> Is PEI responsible for allocating physical registers to the virtual >> registers created during eliminateFrameIndex()? That's ok for now. >> But the question is when that is moved to a separate pass does this >> reuse scheme still work? > > The allocation is currently done in PEI, yes. There's nothing > preventing moving it out; however, with a bit of analysis data > passed between them. > >> >> More comments below. >> >> Evan >> >>> >>> eliminateFrameIndex() is modified so the target implementations >>> can return >>> the registers they wish to be tracked for reuse. >>> >>> ARM Thumb1 implements and utilizes the new mechanism. All other >>> targets are >>> simply modified to adjust for the changed eliminateFrameIndex() >>> prototype. >>> >>> >>> Modified: >>> llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h >>> llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >>> llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >>> llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >>> llvm/trunk/lib/CodeGen/RegisterScavenging.cpp >>> llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp >>> llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h >>> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >>> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >>> llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp >>> llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h >>> llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp >>> llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h >>> llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp >>> llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h >>> llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp >>> llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h >>> llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp >>> llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h >>> llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp >>> llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h >>> llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp >>> llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h >>> llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp >>> llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h >>> llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp >>> llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h >>> llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp >>> llvm/trunk/lib/Target/X86/X86RegisterInfo.h >>> llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp >>> llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h >>> >>> Modified: llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h (original) >>> +++ llvm/trunk/include/llvm/CodeGen/RegisterScavenging.h Wed Oct >>> 7 12:12:56 2009 >>> @@ -117,6 +117,9 @@ >>> return scavengeRegister(RegClass, MBBI, SPAdj); >>> } >>> >>> + /// setUsed - Tell the scavenger a register is used. >>> + /// >>> + void setUsed(unsigned Reg); >> >> I'd prefer not to do this if it can be helped. RS should be >> entirely responsible for tracking the liveness. Why is this needed? > > When we recognize that we can reuse a register, we reach back to the > previously killing use and clear the kill flag, since the live range > for the register has been extended. The scavenger state needs to be > updated to know that the register still have a live value in it at > this point, so we also need to flag it as used. Otherwise, the > scavenger state isn't reflective of the new code. Ok. How about changing it to extendLiveness() to make it more explicit what's happening? > >> >>> private: >>> /// isReserved - Returns true if a register is reserved. It is >>> never "unused". >>> bool isReserved(unsigned Reg) const { return ReservedRegs.test >>> (Reg); } >>> @@ -131,7 +134,6 @@ >>> >>> /// setUsed / setUnused - Mark the state of one or a number of >>> registers. >>> /// >>> - void setUsed(unsigned Reg); >>> void setUsed(BitVector &Regs) { >>> RegsAvailable &= ~Regs; >>> } >>> >>> Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) >>> +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -660,8 +660,13 @@ >>> /// specified instruction, as long as it keeps the iterator >>> pointing the the >>> /// finished product. SPAdj is the SP adjustment due to call frame >>> setup >>> /// instruction. >>> - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, >>> - int SPAdj, RegScavenger >>> *RS=NULL) const = 0; >>> + /// >>> + /// When -enable-frame-index-scavenging is enabled, the virtual >>> register >>> + /// allocated for this frame index is returned and its value is >>> stored in >>> + /// *Value. >>> + virtual unsigned eliminateFrameIndex >>> (MachineBasicBlock::iterator MI, >>> + int SPAdj, int *Value = >>> NULL, >>> + RegScavenger *RS=NULL) >>> const = 0; >>> >>> /// emitProlog/emitEpilog - These methods insert prolog and epilog >>> code into >>> /// the function. >>> >>> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -655,6 +655,11 @@ >>> int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); >>> int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); >>> >>> + // Pre-allocate space for frame index mappings. If more space >>> is needed, >>> + // the map will be grown later. >>> + if (FrameIndexVirtualScavenging) >>> + FrameConstantRegMap.grow(Fn.getRegInfo().getLastVirtReg() + >>> 128); >>> + >>> for (MachineFunction::iterator BB = Fn.begin(), >>> E = Fn.end(); BB != E; ++BB) { >>> int SPAdj = 0; // SP offset due to call frame setup / destroy. >>> @@ -703,9 +708,17 @@ >>> // If this instruction has a FrameIndex operand, we need to >>> // use that target machine register info object to eliminate >>> // it. >>> - >>> - TRI.eliminateFrameIndex(MI, SPAdj, >>> FrameIndexVirtualScavenging ? >>> - NULL : RS); >>> + int Value; >>> + unsigned VReg = >>> + TRI.eliminateFrameIndex(MI, SPAdj, &Value, >>> + >>> FrameIndexVirtualScavenging ? NULL : RS); >>> + if (VReg) { >>> + assert (FrameIndexVirtualScavenging && >>> + "Not scavenging, but virtual returned from " >>> + "eliminateFrameIndex()!"); >>> + FrameConstantRegMap.grow(VReg); >>> + FrameConstantRegMap[VReg] = FrameConstantEntry(Value, >>> SPAdj); >>> + } >>> >>> // Reset the iterator if we were at the beginning of the BB. >>> if (AtBeginning) { >>> @@ -727,6 +740,35 @@ >>> } >>> } >>> >>> +/// findLastUseReg - find the killing use of the specified >>> register within >>> +/// the instruciton range. Return the operand number of the kill >>> in Operand. >>> +static MachineBasicBlock::iterator >>> +findLastUseReg(MachineBasicBlock::iterator I, >>> MachineBasicBlock::iterator ME, >>> + unsigned Reg, unsigned *Operand) { >>> + // Scan forward to find the last use of this virtual register >>> + for (++I; I != ME; ++I) { >>> + MachineInstr *MI = I; >>> + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) >>> + if (MI->getOperand(i).isReg()) { >>> + unsigned OpReg = MI->getOperand(i).getReg(); >>> + if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister >>> (OpReg)) >>> + continue; >>> + assert (OpReg == Reg >>> + && "overlapping use of scavenged index register!"); >>> + // If this is the killing use, we're done >>> + if (MI->getOperand(i).isKill()) { >>> + if (Operand) >>> + *Operand = i; >>> + return I; >>> + } >>> + } >>> + } >>> + // If we hit the end of the basic block, there was no kill of >>> + // the virtual register, which is wrong. >>> + assert (0 && "scavenged index register never killed!"); >>> + return ME; >>> +} >>> + >>> /// scavengeFrameVirtualRegs - Replace all frame index virtual >>> registers >>> /// with physical registers. Use the register scavenger to find an >>> /// appropriate register to use. >>> @@ -738,12 +780,21 @@ >>> >>> unsigned CurrentVirtReg = 0; >>> unsigned CurrentScratchReg = 0; >>> + unsigned PrevScratchReg = 0; >>> + int PrevValue; >>> + MachineInstr *PrevLastUseMI; >>> + unsigned PrevLastUseOp; >>> >>> + // The instruction stream may change in the loop, so check BB- >>> >end() >>> + // directly. >>> for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); >>> ++I) { >>> MachineInstr *MI = I; >>> - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) >>> + // Likewise, call getNumOperands() each iteration, as the >>> MI may change >>> + // inside the loop (with 'i' updated accordingly). >> >> Why not just re-compute the end limit when MI is updated? > > We could. To me the code just reads more clearly this way. i.e., > stylistic choice. If you prefer, I'll change it. No big deal to me > either way. I'd prefer not to recompute getNumOperands() everytime, thanks. > >> >>> + for (unsigned i = 0; i != MI->getNumOperands(); ++i) >>> if (MI->getOperand(i).isReg()) { >>> - unsigned Reg = MI->getOperand(i).getReg(); >>> + MachineOperand &MO = MI->getOperand(i); >>> + unsigned Reg = MO.getReg(); >>> if (Reg == 0) >>> continue; >>> if (!TargetRegisterInfo::isVirtualRegister(Reg)) { >>> @@ -751,33 +802,81 @@ >>> // seeing any references to it. >>> assert (Reg != CurrentScratchReg >>> && "overlapping use of scavenged frame index >>> register!"); >>> + >>> + // If we have a previous scratch reg, check and see >>> if anything >>> + // here kills whatever value is in there. >>> + if (Reg == PrevScratchReg) { >>> + if (MO.isUse()) { >>> + // Two-address operands implicitly kill >>> + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) >>> + PrevScratchReg = 0; >>> + } else { >>> + assert (MO.isDef()); >>> + PrevScratchReg = 0; >>> + } >>> + } >>> continue; >>> } >>> >>> // If we already have a scratch for this virtual register, >>> use it >>> if (Reg != CurrentVirtReg) { >>> - // When we first encounter a new virtual register, it >>> - // must be a definition. >>> - assert(MI->getOperand(i).isDef() && >>> - "frame index virtual missing def!"); >>> - // We can't have nested virtual register live ranges >>> because >>> - // there's only a guarantee of one scavenged register >>> at a time. >>> - assert (CurrentVirtReg == 0 && >>> - "overlapping frame index virtual registers!"); >>> - CurrentVirtReg = Reg; >>> - const TargetRegisterClass *RC = Fn.getRegInfo >>> ().getRegClass(Reg); >>> - CurrentScratchReg = RS->FindUnusedReg(RC); >>> - if (CurrentScratchReg == 0) >>> - // No register is "free". Scavenge a register. >>> - // FIXME: Track SPAdj. Zero won't always be right >>> - CurrentScratchReg = RS->scavengeRegister(RC, I, 0); >>> + int Value = FrameConstantRegMap[Reg].first; >>> + int SPAdj = FrameConstantRegMap[Reg].second; >>> + >>> + // If the scratch register from the last allocation >>> is still >>> + // available, see if the value matches. If it does, >>> just re-use it. >>> + if (PrevScratchReg && Value == PrevValue) { >> >> This means the reuse can only happen when you have consecutive uses >> of the same frame indices. That seems very restrictive. The >> implementation makes it difficult to separate the allocation phase >> from PEI. > > The former is currently true, yes. It seemed a reasonable subset of > the general problem to solve. Doing more would require tracking the > liveness of multiple values, and I'm concerned about worst case > performance if I do too much of that sort of thing when iterating > over the instruction list. > > I'm not sure I follow why it is more difficult to separate into > another pass due to implementation. There are some inherent > complications that make it tricky, however, yes. Specifically, I > think the constant value references need help from the target. I'll > think about this a bit more and see if that's always true, or if > there's something we can do about it. If that can be simplified, it > would be great. If you don't think it's difficult to separate it into a pass, I'm ok with leaving it as it is for now. We can talk about the design offline. Evan > >> >> >>> + // FIXME: This assumes that the instructions in the >>> live range >>> + // for the virtual register are exclusively for the >>> purpose >>> + // of populating the value in the register. That >>> reasonable >> >> That -> That's. > > Woops. Thanks. Fixed. > >> >>> + // for these frame index registers, but it's still >>> a very, very >>> + // strong assumption. Perhaps this implies that the >>> frame index >>> + // elimination should be before register >>> allocation, with >>> + // conservative heuristics since we'll know less >>> then, and >>> + // the reuse calculations done directly when doing >>> the code-gen? >> >> This can be solved later. > > Agreed. Just putting it as a fixme to remind us. > >> >>> + >>> + // Find the last use of the new virtual register. >>> Remove all >>> + // instruction between here and there, and update >>> the current >>> + // instruction to reference the last use insn >>> instead. >>> + MachineBasicBlock::iterator LastUseMI = >>> + findLastUseReg(I, BB->end(), Reg, &i); >> >>> + // Remove all instructions up 'til the last use, >>> since they're >>> + // just calculating the value we already have. >>> + BB->erase(I, LastUseMI); >>> + MI = I = LastUseMI; >> >> Rather than doing this, you could simply continue to iterate >> forward until you have reached the kill. > > There's a couple ways I think the code can be adjusted to have a > cleaner flow. This is definitely a good example. > >> >>> + >>> + CurrentScratchReg = PrevScratchReg; >>> + // Extend the live range of the register >>> + PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill >>> (false); >>> + RS->setUsed(CurrentScratchReg); >>> + } else { >>> + // When we first encounter a new virtual register, it >>> + // must be a definition. >>> + assert(MI->getOperand(i).isDef() && >>> + "frame index virtual missing def!"); >>> + // We can't have nested virtual register live >>> ranges because >>> + // there's only a guarantee of one scavenged >>> register at a time. >>> + assert (CurrentVirtReg == 0 && >>> + "overlapping frame index virtual >>> registers!"); >>> + CurrentVirtReg = Reg; >>> + const TargetRegisterClass *RC = Fn.getRegInfo >>> ().getRegClass(Reg); >>> + CurrentScratchReg = RS->FindUnusedReg(RC); >>> + if (CurrentScratchReg == 0) >>> + // No register is "free". Scavenge a register. >>> + CurrentScratchReg = RS->scavengeRegister(RC, I, >>> SPAdj); >>> + >>> + PrevValue = Value; >>> + } >>> } >>> assert (CurrentScratchReg && "Missing scratch register!"); >>> MI->getOperand(i).setReg(CurrentScratchReg); >>> >>> // If this is the last use of the register, stop tracking it. >>> - if (MI->getOperand(i).isKill()) >>> + if (MI->getOperand(i).isKill()) { >>> + PrevScratchReg = CurrentScratchReg; >>> + PrevLastUseMI = MI; >>> CurrentScratchReg = CurrentVirtReg = 0; >>> + } >>> } >>> RS->forward(MI); >>> } >>> >>> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) >>> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -27,6 +27,8 @@ >>> #include "llvm/CodeGen/MachineLoopInfo.h" >>> #include "llvm/ADT/SparseBitVector.h" >>> #include "llvm/ADT/DenseMap.h" >>> +#include "llvm/ADT/IndexedMap.h" >>> +#include "llvm/Target/TargetRegisterInfo.h" >>> >>> namespace llvm { >>> class RegScavenger; >>> @@ -93,6 +95,12 @@ >>> // functions. >>> bool ShrinkWrapThisFunction; >>> >>> + // When using the scavenger post-pass to resolve frame >>> reference >>> + // materialization registers, maintain a map of the registers >>> to >>> + // the constant value and SP adjustment associated with it. >>> + typedef std::pair FrameConstantEntry; >>> + IndexedMap >>> FrameConstantRegMap; >>> + >>> #ifndef NDEBUG >>> // Machine function handle. >>> MachineFunction* MF; >>> >>> Modified: llvm/trunk/lib/CodeGen/RegisterScavenging.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterScavenging.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/CodeGen/RegisterScavenging.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/RegisterScavenging.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -306,7 +306,7 @@ >>> "Cannot scavenge register without an emergency spill >>> slot!"); >>> TII->storeRegToStackSlot(*MBB, I, SReg, true, >>> ScavengingFrameIndex, RC); >>> MachineBasicBlock::iterator II = prior(I); >>> - TRI->eliminateFrameIndex(II, SPAdj, this); >>> + TRI->eliminateFrameIndex(II, SPAdj, NULL, this); >>> >>> // Restore the scavenged register before its use (or first >>> terminator). >>> TII->loadRegFromStackSlot(*MBB, UseMI, SReg, >>> ScavengingFrameIndex, RC); >>> >>> Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -1023,9 +1023,10 @@ >>> return Reg; >>> } >>> >>> -void >>> +unsigned >>> ARMBaseRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger >>> *RS) const { >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> unsigned i = 0; >>> MachineInstr &MI = *II; >>> MachineBasicBlock &MBB = *MI.getParent(); >>> @@ -1067,7 +1068,7 @@ >>> Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); >>> } >>> if (Done) >>> - return; >>> + return 0; >>> >>> // If we get here, the immediate doesn't fit into the >>> instruction. We folded >>> // as much as possible above, handle the rest, providing a >>> register that is >>> @@ -1102,6 +1103,7 @@ >>> } >>> MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); >>> } >>> + return 0; >>> } >>> >>> /// Move iterator pass the next bunch of callee save load / store >>> ops for >>> >>> Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -128,8 +128,9 @@ >>> MachineBasicBlock &MBB, >>> >>> MachineBasicBlock::iterator I) const; >>> >>> - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = >>> NULL) const; >>> + virtual unsigned eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = >>> NULL, >>> + RegScavenger *RS = NULL) >>> const; >>> >>> virtual void emitPrologue(MachineFunction &MF) const; >>> virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock >>> &MBB) const; >>> >>> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -427,8 +427,11 @@ >>> TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); >>> } >>> >>> -void Thumb1RegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, >>> RegScavenger *RS) const{ >>> +unsigned >>> +Thumb1RegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const{ >>> + unsigned VReg = 0; >>> unsigned i = 0; >>> MachineInstr &MI = *II; >>> MachineBasicBlock &MBB = *MI.getParent(); >>> @@ -484,7 +487,7 @@ >>> MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); >>> MI.getOperand(i).ChangeToRegister(FrameReg, false); >>> MI.RemoveOperand(i+1); >>> - return; >>> + return 0; >>> } >>> >>> // Common case: small offset, fits into instruction. >>> @@ -500,7 +503,7 @@ >>> MI.getOperand(i).ChangeToRegister(FrameReg, false); >>> MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); >>> } >>> - return; >>> + return 0; >>> } >>> >>> unsigned DestReg = MI.getOperand(0).getReg(); >>> @@ -512,7 +515,7 @@ >>> emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, >>> TII, >>> *this, dl); >>> MBB.erase(II); >>> - return; >>> + return 0; >>> } >>> >>> if (Offset > 0) { >>> @@ -545,7 +548,7 @@ >>> AddDefaultPred(MIB); >>> } >>> } >>> - return; >>> + return 0; >>> } else { >>> unsigned ImmIdx = 0; >>> int InstrOffs = 0; >>> @@ -575,7 +578,7 @@ >>> // Replace the FrameIndex with sp >>> MI.getOperand(i).ChangeToRegister(FrameReg, false); >>> ImmOp.ChangeToImmediate(ImmedOffset); >>> - return; >>> + return 0; >>> } >>> >>> bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == >>> ARM::tSpill; >>> @@ -633,22 +636,24 @@ >>> MI.addOperand(MachineOperand::CreateReg(0, false)); >>> } else if (Desc.mayStore()) { >>> if (FrameIndexVirtualScavenging) { >>> - unsigned TmpReg = >>> - MF.getRegInfo().createVirtualRegister >>> (ARM::tGPRRegisterClass); >>> + VReg = MF.getRegInfo().createVirtualRegister >>> (ARM::tGPRRegisterClass); >>> + assert (Value && "Frame index virtual allocated, but Value >>> arg is NULL!"); >>> + *Value = Offset; >>> bool UseRR = false; >>> + >>> if (Opcode == ARM::tSpill) { >>> if (FrameReg == ARM::SP) >>> - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, >>> + emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, >>> Offset, false, TII, *this, dl); >>> else { >>> - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); >>> + emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); >>> UseRR = true; >>> } >>> } else >>> - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, >>> Offset, TII, >>> + emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, >>> Offset, TII, >>> *this, dl); >>> MI.setDesc(TII.get(ARM::tSTR)); >>> - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, >>> true); >>> + MI.getOperand(i).ChangeToRegister(VReg, false, false, true); >>> if (UseRR) // Use [reg, reg] addrmode. >>> MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); >>> else // tSTR has an extra register operand. >>> @@ -707,6 +712,7 @@ >>> MachineInstrBuilder MIB(&MI); >>> AddDefaultPred(MIB); >>> } >>> + return VReg; >>> } >>> >>> void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { >>> >>> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -62,8 +62,9 @@ >>> MachineBasicBlock::iterator I, >>> const TargetRegisterClass *RC, >>> unsigned Reg) const; >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> void emitPrologue(MachineFunction &MF) const; >>> void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) >>> const; >>> >>> Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -151,8 +151,10 @@ >>> //variable locals >>> //<- SP >>> >>> -void AlphaRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, >>> RegScavenger *RS) const { >>> +unsigned >>> +AlphaRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> assert(SPAdj == 0 && "Unexpected"); >>> >>> unsigned i = 0; >>> @@ -197,6 +199,7 @@ >>> } else { >>> MI.getOperand(i).ChangeToImmediate(Offset); >>> } >>> + return 0; >>> } >>> >>> >>> >>> Modified: llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/Alpha/AlphaRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -41,8 +41,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator I) >>> const; >>> >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> //void processFunctionBeforeFrameFinalized(MachineFunction &MF) >>> const; >>> >>> >>> Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp >>> (original) >>> +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp Wed >>> Oct 7 12:12:56 2009 >>> @@ -219,9 +219,10 @@ >>> return Reg; >>> } >>> >>> -void BlackfinRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, >>> - RegScavenger *RS) >>> const { >>> +unsigned >>> +BlackfinRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> MachineInstr &MI = *II; >>> MachineBasicBlock &MBB = *MI.getParent(); >>> MachineFunction &MF = *MBB.getParent(); >>> @@ -258,20 +259,20 @@ >>> MI.setDesc(TII.get(isStore >>> ? BF::STORE32p_uimm6m4 >>> : BF::LOAD32p_uimm6m4)); >>> - return; >>> + return 0; >>> } >>> if (BaseReg == BF::FP && isUint<7>(-Offset)) { >>> MI.setDesc(TII.get(isStore >>> ? BF::STORE32fp_nimm7m4 >>> : BF::LOAD32fp_nimm7m4)); >>> MI.getOperand(FIPos+1).setImm(-Offset); >>> - return; >>> + return 0; >>> } >>> if (isInt<18>(Offset)) { >>> MI.setDesc(TII.get(isStore >>> ? BF::STORE32p_imm18m4 >>> : BF::LOAD32p_imm18m4)); >>> - return; >>> + return 0; >>> } >>> // Use RegScavenger to calculate proper offset... >>> MI.dump(); >>> @@ -356,6 +357,7 @@ >>> llvm_unreachable("Cannot eliminate frame index"); >>> break; >>> } >>> + return 0; >>> } >>> >>> void BlackfinRegisterInfo:: >>> >>> Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.h Wed Oct >>> 7 12:12:56 2009 >>> @@ -64,8 +64,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator I) >>> const; >>> >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >>> RegScavenger *RS) const; >>> >>> Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -326,9 +326,9 @@ >>> MBB.erase(I); >>> } >>> >>> -void >>> +unsigned >>> SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >>> II, int SPAdj, >>> - RegScavenger *RS) const >>> + int *Value, RegScavenger >>> *RS) const >>> { >>> unsigned i = 0; >>> MachineInstr &MI = *II; >>> @@ -371,6 +371,7 @@ >>> } else { >>> MO.ChangeToImmediate(Offset); >>> } >>> + return 0; >>> } >>> >>> /// determineFrameLayout - Determine the size of the frame and >>> maximum call >>> >>> Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -63,8 +63,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator I) >>> const; >>> //! Convert frame indicies into machine operands >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, int, >>> - RegScavenger *RS) const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> int SPAdj, >>> + int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> //! Determine the frame's layour >>> void determineFrameLayout(MachineFunction &MF) const; >>> >>> >>> Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -147,9 +147,10 @@ >>> MBB.erase(I); >>> } >>> >>> -void >>> +unsigned >>> MSP430RegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger >>> *RS) const { >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> assert(SPAdj == 0 && "Unexpected"); >>> >>> unsigned i = 0; >>> @@ -187,7 +188,7 @@ >>> MI.getOperand(i).ChangeToRegister(BasePtr, false); >>> >>> if (Offset == 0) >>> - return; >>> + return 0; >>> >>> // We need to materialize the offset via add instruction. >>> unsigned DstReg = MI.getOperand(0).getReg(); >>> @@ -198,11 +199,12 @@ >>> BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) >>> .addReg(DstReg).addImm(Offset); >>> >>> - return; >>> + return 0; >>> } >>> >>> MI.getOperand(i).ChangeToRegister(BasePtr, false); >>> MI.getOperand(i+1).ChangeToImmediate(Offset); >>> + return 0; >>> } >>> >>> void >>> >>> Modified: llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/MSP430/MSP430RegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -49,8 +49,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator I) >>> const; >>> >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> void emitPrologue(MachineFunction &MF) const; >>> void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) >>> const; >>> >>> Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -348,9 +348,9 @@ >>> // FrameIndex represent objects inside a abstract stack. >>> // We must replace FrameIndex with an stack/frame pointer >>> // direct reference. >>> -void MipsRegisterInfo:: >>> -eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, >>> - RegScavenger *RS) const >>> +unsigned MipsRegisterInfo:: >>> +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, >>> + int *Value, RegScavenger *RS) const >>> { >>> MachineInstr &MI = *II; >>> MachineFunction &MF = *MI.getParent()->getParent(); >>> @@ -382,6 +382,7 @@ >>> >>> MI.getOperand(i-1).ChangeToImmediate(Offset); >>> MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); >>> + return 0; >>> } >>> >>> void MipsRegisterInfo:: >>> >>> Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -54,8 +54,9 @@ >>> MachineBasicBlock::iterator I) >>> const; >>> >>> /// Stack Frame Processing Methods >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; >>> >>> >>> Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -51,10 +51,13 @@ >>> return false; >>> } >>> >>> -void PIC16RegisterInfo:: >>> +unsigned PIC16RegisterInfo:: >>> eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, >>> - RegScavenger *RS) const >>> -{ /* NOT YET IMPLEMENTED */ } >>> + int *Value, RegScavenger *RS) const >>> +{ >>> + /* NOT YET IMPLEMENTED */ >>> + return 0; >>> +} >>> >>> void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const >>> { /* NOT YET IMPLEMENTED */ } >>> >>> Modified: llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/PIC16/PIC16RegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -48,8 +48,9 @@ >>> virtual BitVector getReservedRegs(const MachineFunction &MF) const; >>> virtual bool hasFP(const MachineFunction &MF) const; >>> >>> - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, >>> - int SPAdj, RegScavenger *RS=NULL) const; >>> + virtual unsigned eliminateFrameIndex >>> (MachineBasicBlock::iterator MI, >>> + int SPAdj, int *Value = >>> NULL, >>> + RegScavenger *RS=NULL) >>> const; >>> >>> void eliminateCallFramePseudoInstr(MachineFunction &MF, >>> MachineBasicBlock &MBB, >>> >>> Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -699,8 +699,10 @@ >>> MBB.erase(II); >>> } >>> >>> -void PPCRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger >>> *RS) const { >>> +unsigned >>> +PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >>> II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> assert(SPAdj == 0 && "Unexpected"); >>> >>> // Get the instruction. >>> @@ -739,14 +741,14 @@ >>> if (FPSI && FrameIndex == FPSI && >>> (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { >>> lowerDynamicAlloc(II, SPAdj, RS); >>> - return; >>> + return 0; >>> } >>> >>> // Special case for pseudo-op SPILL_CR. >>> if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. >>> if (OpC == PPC::SPILL_CR) { >>> lowerCRSpilling(II, FrameIndex, SPAdj, RS); >>> - return; >>> + return 0; >>> } >>> >>> // Replace the FrameIndex with base register with GPR1 (SP) or >>> GPR31 (FP). >>> @@ -788,7 +790,7 @@ >>> if (isIXAddr) >>> Offset >>= 2; // The actual encoded value has the low two >>> bits zero. >>> MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); >>> - return; >>> + return 0; >>> } >>> >>> // The offset doesn't fit into a single register, scavenge one to >>> build the >>> @@ -828,6 +830,7 @@ >>> unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); >>> MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); >>> MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); >>> + return 0; >>> } >>> >>> /// VRRegNo - Map from a numbered VR register to its enum value. >>> >>> Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -66,8 +66,9 @@ >>> int SPAdj, RegScavenger *RS) const; >>> void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned >>> FrameIndex, >>> int SPAdj, RegScavenger *RS) const; >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> /// determineFrameLayout - Determine the size of the frame and >>> maximum call >>> /// frame size. >>> >>> Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -76,8 +76,10 @@ >>> MBB.erase(I); >>> } >>> >>> -void SparcRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, >>> RegScavenger *RS) const { >>> +unsigned >>> +SparcRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> assert(SPAdj == 0 && "Unexpected"); >>> >>> unsigned i = 0; >>> @@ -113,6 +115,7 @@ >>> MI.getOperand(i).ChangeToRegister(SP::G1, false); >>> MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); >>> } >>> + return 0; >>> } >>> >>> void SparcRegisterInfo:: >>> >>> Modified: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -43,8 +43,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator I) >>> const; >>> >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; >>> >>> >>> Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Wed Oct >>> 7 12:12:56 2009 >>> @@ -107,8 +107,10 @@ >>> return Offset; >>> } >>> >>> -void SystemZRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, >>> RegScavenger *RS) const { >>> +unsigned >>> +SystemZRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> assert(SPAdj == 0 && "Unxpected"); >>> >>> unsigned i = 0; >>> @@ -136,6 +138,7 @@ >>> MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); >>> >>> MI.getOperand(i+1).ChangeToImmediate(Offset); >>> + return 0; >>> } >>> >>> void >>> >>> Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -55,8 +55,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator I) >>> const; >>> >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> >>> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >>> >>> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -579,8 +579,10 @@ >>> MBB.erase(I); >>> } >>> >>> -void X86RegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger >>> *RS) const{ >>> +unsigned >>> +X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator >>> II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const{ >>> assert(SPAdj == 0 && "Unexpected"); >>> >>> unsigned i = 0; >>> @@ -617,6 +619,7 @@ >>> (uint64_t)MI.getOperand(i+3).getOffset(); >>> MI.getOperand(i+3).setOffset(Offset); >>> } >>> + return 0; >>> } >>> >>> void >>> >>> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -139,8 +139,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator MI) >>> const; >>> >>> - void eliminateFrameIndex(MachineBasicBlock::iterator MI, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >>> RegScavenger *RS = NULL) >>> const; >>> >>> Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.cpp Wed Oct 7 >>> 12:12:56 2009 >>> @@ -171,8 +171,10 @@ >>> MBB.erase(I); >>> } >>> >>> -void XCoreRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> - int SPAdj, >>> RegScavenger *RS) const { >>> +unsigned >>> +XCoreRegisterInfo::eliminateFrameIndex >>> (MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value, >>> + RegScavenger *RS) const { >>> assert(SPAdj == 0 && "Unexpected"); >>> MachineInstr &MI = *II; >>> DebugLoc dl = MI.getDebugLoc(); >>> @@ -311,6 +313,7 @@ >>> } >>> // Erase old instruction. >>> MBB.erase(II); >>> + return 0; >>> } >>> >>> void >>> >>> Modified: llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h?rev=83467&r1=83466&r2=83467&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/XCore/XCoreRegisterInfo.h Wed Oct 7 >>> 12:12:56 2009 >>> @@ -57,8 +57,9 @@ >>> MachineBasicBlock &MBB, >>> MachineBasicBlock::iterator I) >>> const; >>> >>> - void eliminateFrameIndex(MachineBasicBlock::iterator II, >>> - int SPAdj, RegScavenger *RS = NULL) >>> const; >>> + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, >>> + int SPAdj, int *Value = NULL, >>> + RegScavenger *RS = NULL) const; >>> >>> void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, >>> RegScavenger *RS = >>> NULL) const; >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > From bob.wilson at apple.com Wed Oct 7 18:54:04 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Wed, 07 Oct 2009 23:54:04 -0000 Subject: [llvm-commits] [llvm] r83508 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vld4.ll Message-ID: <200910072354.n97Ns5Qo027793@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 18:54:04 2009 New Revision: 83508 URL: http://llvm.org/viewvc/llvm-project?rev=83508&view=rev Log: Add codegen support for NEON vld4 intrinsics with <1 x i64> vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vld4.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83508&r1=83507&r2=83508&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 18:54:04 2009 @@ -1454,6 +1454,7 @@ case MVT::v4i16: Opc = ARM::VLD4d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VLD4d32; break; + case MVT::v1i64: Opc = ARM::VLD4d64; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83508&r1=83507&r2=83508&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 18:54:04 2009 @@ -247,6 +247,10 @@ def VLD4d8 : VLD4D<0b0000, "vld4.8">; def VLD4d16 : VLD4D<0b0100, "vld4.16">; def VLD4d32 : VLD4D<0b1000, "vld4.32">; +def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD1, + "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; // vld4 to double-spaced even registers. def VLD4q8a : VLD4WB<0b0000, "vld4.8">; Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83508&r1=83507&r2=83508&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 18:54:04 2009 @@ -96,6 +96,7 @@ case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: + case ARM::VLD4d64: case ARM::VLD4LNd8: case ARM::VLD4LNd16: case ARM::VLD4LNd32: Modified: llvm/trunk/test/CodeGen/ARM/vld4.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld4.ll?rev=83508&r1=83507&r2=83508&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vld4.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vld4.ll Wed Oct 7 18:54:04 2009 @@ -4,6 +4,7 @@ %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @@ -50,6 +51,16 @@ ret <2 x float> %tmp4 } +define <1 x i64> @vld4i64(i64* %A) nounwind { +;CHECK: vld4i64: +;CHECK: vld1.64 + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i64* %A) + %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK: vld4Qi8: ;CHECK: vld4.8 @@ -98,6 +109,7 @@ declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly From jyasskin at google.com Wed Oct 7 19:12:24 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Thu, 08 Oct 2009 00:12:24 -0000 Subject: [llvm-commits] [llvm] r83510 - /llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200910080012.n980CPBe005709@zion.cs.uiuc.edu> Author: jyasskin Date: Wed Oct 7 19:12:24 2009 New Revision: 83510 URL: http://llvm.org/viewvc/llvm-project?rev=83510&view=rev Log: In instcombine's debug output, avoid printing ADD for instructions that are already on the worklist, and print Visited when an instruction is about to be visited. Net, on one input, this reduced the output size by at least 9x. Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=83510&r1=83509&r2=83510&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Wed Oct 7 19:12:24 2009 @@ -90,9 +90,10 @@ /// Add - Add the specified instruction to the worklist if it isn't already /// in it. void Add(Instruction *I) { - DEBUG(errs() << "IC: ADD: " << *I << '\n'); - if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) + if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { + DEBUG(errs() << "IC: ADD: " << *I << '\n'); Worklist.push_back(I); + } } void AddValue(Value *V) { @@ -12853,7 +12854,8 @@ std::string OrigI; #endif DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); - + DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); + if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? From bob.wilson at apple.com Wed Oct 7 19:21:02 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 00:21:02 -0000 Subject: [llvm-commits] [llvm] r83513 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vst2.ll Message-ID: <200910080021.n980L2EJ016044@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 19:21:01 2009 New Revision: 83513 URL: http://llvm.org/viewvc/llvm-project?rev=83513&view=rev Log: Add codegen support for NEON vst2 intrinsics with <1 x i64> vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vst2.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83513&r1=83512&r2=83513&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 19:21:01 2009 @@ -1574,6 +1574,7 @@ case MVT::v4i16: Opc = ARM::VST2d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VST2d32; break; + case MVT::v1i64: Opc = ARM::VST2d64; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83513&r1=83512&r2=83513&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 19:21:01 2009 @@ -352,6 +352,9 @@ def VST2d8 : VST2D<0b0000, "vst2.8">; def VST2d16 : VST2D<0b0100, "vst2.16">; def VST2d32 : VST2D<0b1000, "vst2.32">; +def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>; def VST2q8 : VST2Q<0b0000, "vst2.8">; def VST2q16 : VST2Q<0b0100, "vst2.16">; Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83513&r1=83512&r2=83513&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 19:21:01 2009 @@ -125,6 +125,7 @@ case ARM::VST2d8: case ARM::VST2d16: case ARM::VST2d32: + case ARM::VST2d64: case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: Modified: llvm/trunk/test/CodeGen/ARM/vst2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst2.ll?rev=83513&r1=83512&r2=83513&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vst2.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vst2.ll Wed Oct 7 19:21:01 2009 @@ -32,6 +32,14 @@ ret void } +define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind { +;CHECK: vst2i64: +;CHECK: vst1.64 + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst2.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1) + ret void +} + define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst2Qi8: ;CHECK: vst2.8 @@ -68,6 +76,7 @@ declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind +declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind From bob.wilson at apple.com Wed Oct 7 19:28:29 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 00:28:29 -0000 Subject: [llvm-commits] [llvm] r83518 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vst3.ll Message-ID: <200910080028.n980STPN009309@zion.cs.uiuc.edu> Author: bwilson Date: Wed Oct 7 19:28:28 2009 New Revision: 83518 URL: http://llvm.org/viewvc/llvm-project?rev=83518&view=rev Log: Add codegen support for NEON vst3 intrinsics with <1 x i64> vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vst3.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83518&r1=83517&r2=83518&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Oct 7 19:28:28 2009 @@ -1616,6 +1616,7 @@ case MVT::v4i16: Opc = ARM::VST3d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VST3d32; break; + case MVT::v1i64: Opc = ARM::VST3d64; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83518&r1=83517&r2=83518&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 7 19:28:28 2009 @@ -374,6 +374,10 @@ def VST3d8 : VST3D<0b0000, "vst3.8">; def VST3d16 : VST3D<0b0100, "vst3.16">; def VST3d32 : VST3D<0b1000, "vst3.32">; +def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, + "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>; // vst3 to double-spaced even registers. def VST3q8a : VST3WB<0b0000, "vst3.8">; Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83518&r1=83517&r2=83518&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Oct 7 19:28:28 2009 @@ -143,6 +143,7 @@ case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: + case ARM::VST3d64: case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: Modified: llvm/trunk/test/CodeGen/ARM/vst3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst3.ll?rev=83518&r1=83517&r2=83518&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vst3.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vst3.ll Wed Oct 7 19:28:28 2009 @@ -32,6 +32,14 @@ ret void } +define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind { +;CHECK: vst3i64: +;CHECK: vst1.64 + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst3.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + ret void +} + define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst3Qi8: ;CHECK: vst3.8 @@ -72,6 +80,7 @@ declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind +declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind From grosbach at apple.com Wed Oct 7 20:09:45 2009 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 08 Oct 2009 01:09:45 -0000 Subject: [llvm-commits] [llvm] r83519 - in /llvm/trunk/lib/CodeGen: PrologEpilogInserter.cpp PrologEpilogInserter.h Message-ID: <200910080109.n9819jdm014565@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 20:09:45 2009 New Revision: 83519 URL: http://llvm.org/viewvc/llvm-project?rev=83519&view=rev Log: bugfix. The target may use virtual registers that aren't tracked for re-use but are allocated by the scavenger. The re-use algorithm needs to watch for that. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83519&r1=83518&r2=83519&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 20:09:45 2009 @@ -655,11 +655,6 @@ int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); - // Pre-allocate space for frame index mappings. If more space is needed, - // the map will be grown later. - if (FrameIndexVirtualScavenging) - FrameConstantRegMap.grow(Fn.getRegInfo().getLastVirtReg() + 128); - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. @@ -716,7 +711,6 @@ assert (FrameIndexVirtualScavenging && "Not scavenging, but virtual returned from " "eliminateFrameIndex()!"); - FrameConstantRegMap.grow(VReg); FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); } @@ -780,10 +774,14 @@ unsigned CurrentVirtReg = 0; unsigned CurrentScratchReg = 0; + bool havePrevValue = false; unsigned PrevScratchReg = 0; int PrevValue; MachineInstr *PrevLastUseMI = NULL; unsigned PrevLastUseOp = 0; + bool trackingCurrentValue = false; + int SPAdj = 0; + int Value = 0; // The instruction stream may change in the loop, so check BB->end() // directly. @@ -818,14 +816,31 @@ continue; } - // If we already have a scratch for this virtual register, use it + // Have we already allocated a scratch register for this virtual? if (Reg != CurrentVirtReg) { - int Value = FrameConstantRegMap[Reg].first; - int SPAdj = FrameConstantRegMap[Reg].second; + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // We can't have nested virtual register live ranges because + // there's only a guarantee of one scavenged register at a time. + assert (CurrentVirtReg == 0 && + "overlapping frame index virtual registers!"); + + // If the target gave us information about what's in the register, + // we can use that to re-use scratch regs. + DenseMap::iterator Entry = + FrameConstantRegMap.find(Reg); + trackingCurrentValue = Entry != FrameConstantRegMap.end(); + if (trackingCurrentValue) { + SPAdj = (*Entry).second.second; + Value = (*Entry).second.first; + } else + SPAdj = Value = 0; // If the scratch register from the last allocation is still // available, see if the value matches. If it does, just re-use it. - if (PrevScratchReg && Value == PrevValue) { + if (trackingCurrentValue && havePrevValue && PrevValue == Value) { // FIXME: This assumes that the instructions in the live range // for the virtual register are exclusively for the purpose // of populating the value in the register. That's reasonable @@ -850,14 +865,6 @@ PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); RS->setUsed(CurrentScratchReg); } else { - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // We can't have nested virtual register live ranges because - // there's only a guarantee of one scavenged register at a time. - assert (CurrentVirtReg == 0 && - "overlapping frame index virtual registers!"); CurrentVirtReg = Reg; const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); CurrentScratchReg = RS->FindUnusedReg(RC); @@ -877,6 +884,7 @@ PrevLastUseMI = MI; PrevLastUseOp = i; CurrentScratchReg = CurrentVirtReg = 0; + havePrevValue = trackingCurrentValue; } } RS->forward(MI); Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83519&r1=83518&r2=83519&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 20:09:45 2009 @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { @@ -99,7 +98,7 @@ // materialization registers, maintain a map of the registers to // the constant value and SP adjustment associated with it. typedef std::pair FrameConstantEntry; - IndexedMap FrameConstantRegMap; + DenseMap FrameConstantRegMap; #ifndef NDEBUG // Machine function handle. From grosbach at apple.com Wed Oct 7 20:46:59 2009 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 08 Oct 2009 01:46:59 -0000 Subject: [llvm-commits] [llvm] r83521 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/CodeGen/PrologEpilogInserter.h lib/Target/ARM/Thumb1RegisterInfo.cpp lib/Target/ARM/Thumb1RegisterInfo.h Message-ID: <200910080146.n981kxrE019225@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 20:46:59 2009 New Revision: 83521 URL: http://llvm.org/viewvc/llvm-project?rev=83521&view=rev Log: Re-enable register scavenging in Thumb1 by default. Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp llvm/trunk/lib/CodeGen/PrologEpilogInserter.h llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 20:46:59 2009 @@ -561,6 +561,12 @@ return false; } + /// requiresFrameIndexScavenging - returns true if the target requires post + /// PEI scavenging of registers for materializing frame index constants. + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return false; + } + /// hasFP - Return true if the specified function should have a dedicated /// frame pointer register. For most targets this is true only if the function /// has variable sized allocas or if frame pointer elimination is disabled. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83521&r1=83520&r2=83521&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 20:46:59 2009 @@ -44,16 +44,6 @@ static RegisterPass X("prologepilog", "Prologue/Epilogue Insertion"); -// FIXME: For now, the frame index scavenging is off by default and only -// used by the Thumb1 target. When it's the default and replaces the current -// on-the-fly PEI scavenging for all targets, requiresRegisterScavenging() -// will replace this. -cl::opt -FrameIndexVirtualScavenging("enable-frame-index-scavenging", - cl::Hidden, - cl::desc("Enable frame index elimination with" - "virtual register scavenging")); - /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. /// @@ -66,6 +56,7 @@ const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Get MachineModuleInfo so that we can track the construction of the // frame. Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83521&r1=83520&r2=83521&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 20:46:59 2009 @@ -94,6 +94,11 @@ // functions. bool ShrinkWrapThisFunction; + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the curren function. + bool FrameIndexVirtualScavenging; + // When using the scavenger post-pass to resolve frame reference // materialization registers, maintain a map of the registers to // the constant value and SP adjustment associated with it. Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83521&r1=83520&r2=83521&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 20:46:59 2009 @@ -37,11 +37,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -// FIXME: This cmd line option conditionalizes the new register scavenging -// implemenation in PEI. Remove the option when scavenging works well enough -// to be the default. -extern cl::opt FrameIndexVirtualScavenging; - Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { @@ -84,9 +79,16 @@ bool Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return FrameIndexVirtualScavenging; + return true; +} + +bool +Thumb1RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) + const { + return true; } + bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); @@ -128,13 +130,7 @@ unsigned LdReg = DestReg; if (DestReg == ARM::SP) { assert(BaseReg == ARM::SP && "Unexpected!"); - if (FrameIndexVirtualScavenging) { - LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); - } else { - LdReg = ARM::R3; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - } + LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); } if (NumBytes <= 255 && NumBytes >= 0) @@ -159,10 +155,6 @@ else MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); AddDefaultPred(MIB); - - if (!FrameIndexVirtualScavenging && DestReg == ARM::SP) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) - .addReg(ARM::R12, RegState::Kill); } /// calcNumMI - Returns the number of instructions required to materialize @@ -635,7 +627,6 @@ else // tLDR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { - if (FrameIndexVirtualScavenging) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); *Value = Offset; @@ -658,52 +649,6 @@ MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); else // tSTR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); - } else { - // FIXME! This is horrific!!! We need register scavenging. - // Our temporary workaround has marked r3 unavailable. Of course, r3 is - // also a ABI register so it's possible that is is the register that is - // being storing here. If that's the case, we do the following: - // r12 = r2 - // Use r2 to materialize sp + offset - // str r3, r2 - // r2 = r12 - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned TmpReg = ARM::R3; - bool UseRR = false; - if (ValReg == ARM::R3) { - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R2, RegState::Kill); - TmpReg = ARM::R2; - } - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - if (Opcode == ARM::tSpill) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); - UseRR = true; - } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tSTR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tSTR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - - MachineBasicBlock::iterator NII = next(II); - if (ValReg == ARM::R3) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) - .addReg(ARM::R12, RegState::Kill); - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) - .addReg(ARM::R12, RegState::Kill); - } } else assert(false && "Unexpected opcode!"); Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 20:46:59 2009 @@ -41,6 +41,7 @@ getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const; From grosbach at apple.com Wed Oct 7 20:50:27 2009 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 08 Oct 2009 01:50:27 -0000 Subject: [llvm-commits] [llvm] r83522 - in /llvm/trunk/lib/Target/ARM: ARMMachineFunctionInfo.h Thumb1RegisterInfo.cpp Message-ID: <200910080150.n981oRt4019657@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 20:50:26 2009 New Revision: 83522 URL: http://llvm.org/viewvc/llvm-project?rev=83522&view=rev Log: Cleanup up unused R3LiveIn tracking. Modified: llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Modified: llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h?rev=83522&r1=83521&r2=83522&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h (original) +++ llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h Wed Oct 7 20:50:26 2009 @@ -52,10 +52,6 @@ /// enable far jump. bool LRSpilledForFarJump; - /// R3IsLiveIn - True if R3 is live in to this function. - /// FIXME: Remove when register scavenger for Thumb is done. - bool R3IsLiveIn; - /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer /// spill stack offset. unsigned FramePtrSpillOffset; @@ -100,7 +96,7 @@ hasThumb2(false), Align(2U), VarArgsRegSaveSize(0), HasStackFrame(false), - LRSpilledForFarJump(false), R3IsLiveIn(false), + LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), @@ -111,7 +107,7 @@ hasThumb2(MF.getTarget().getSubtarget().hasThumb2()), Align(isThumb ? 1U : 2U), VarArgsRegSaveSize(0), HasStackFrame(false), - LRSpilledForFarJump(false), R3IsLiveIn(false), + LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), @@ -134,10 +130,6 @@ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } - // FIXME: Remove when register scavenger for Thumb is done. - bool isR3LiveIn() const { return R3IsLiveIn; } - void setR3IsLiveIn(bool l) { R3IsLiveIn = l; } - unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83522&r1=83521&r2=83522&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 20:50:26 2009 @@ -671,15 +671,6 @@ DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); - // Check if R3 is live in. It might have to be used as a scratch register. - for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), - E = MF.getRegInfo().livein_end(); I != E; ++I) { - if (I->first == ARM::R3) { - AFI->setR3IsLiveIn(true); - break; - } - } - // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); From grosbach at apple.com Wed Oct 7 21:13:21 2009 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 08 Oct 2009 02:13:21 -0000 Subject: [llvm-commits] [test-suite] r83523 - /test-suite/trunk/Makefile.programs Message-ID: <200910080213.n982DLnl022530@zion.cs.uiuc.edu> Author: grosbach Date: Wed Oct 7 21:13:20 2009 New Revision: 83523 URL: http://llvm.org/viewvc/llvm-project?rev=83523&view=rev Log: Remove LLCBETA thumb option Modified: test-suite/trunk/Makefile.programs Modified: test-suite/trunk/Makefile.programs URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/Makefile.programs?rev=83523&r1=83522&r2=83523&view=diff ============================================================================== --- test-suite/trunk/Makefile.programs (original) +++ test-suite/trunk/Makefile.programs Wed Oct 7 21:13:20 2009 @@ -249,7 +249,7 @@ #-new-coalescer-heuristic=true endif ifeq ($(ARCH),THUMB) -LLCBETAOPTION := -enable-frame-index-scavenging +LLCBETAOPTION := #-combiner-alias-analysis #--enable-thumb-reg-scavenging endif From foldr at codedgers.com Wed Oct 7 23:40:08 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Thu, 08 Oct 2009 04:40:08 -0000 Subject: [llvm-commits] [llvm] r83524 - in /llvm/trunk: test/LLVMC/ExternOptions.td test/LLVMC/ForwardAs.td test/LLVMC/MultiValuedOption.td test/LLVMC/NoActions.td test/LLVMC/OneOrMore.td utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200910080440.n984e9RS008559@zion.cs.uiuc.edu> Author: foldr Date: Wed Oct 7 23:40:08 2009 New Revision: 83524 URL: http://llvm.org/viewvc/llvm-project?rev=83524&view=rev Log: Input files should go before all other options. Important, for example, when calling 'gcc a.o b.o c.o -lD -lE -lF'. Modified: llvm/trunk/test/LLVMC/ExternOptions.td llvm/trunk/test/LLVMC/ForwardAs.td llvm/trunk/test/LLVMC/MultiValuedOption.td llvm/trunk/test/LLVMC/NoActions.td llvm/trunk/test/LLVMC/OneOrMore.td llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Modified: llvm/trunk/test/LLVMC/ExternOptions.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ExternOptions.td?rev=83524&r1=83523&r2=83524&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ExternOptions.td (original) +++ llvm/trunk/test/LLVMC/ExternOptions.td Wed Oct 7 23:40:08 2009 @@ -10,7 +10,7 @@ (prefix_list_option "L", (extern))]>; def dummy_tool : Tool<[ -(cmd_line "dummy_cmd"), +(cmd_line "dummy_cmd $INFILE"), (in_language "dummy"), (out_language "dummy"), (actions (case Modified: llvm/trunk/test/LLVMC/ForwardAs.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ForwardAs.td?rev=83524&r1=83523&r2=83524&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ForwardAs.td (original) +++ llvm/trunk/test/LLVMC/ForwardAs.td Wed Oct 7 23:40:08 2009 @@ -8,7 +8,7 @@ def OptList : OptionList<[(parameter_option "dummy", (extern))]>; def dummy_tool : Tool<[ -(cmd_line "dummy_cmd"), +(cmd_line "dummy_cmd $INFILE"), (in_language "dummy"), (out_language "dummy"), (actions (case Modified: llvm/trunk/test/LLVMC/MultiValuedOption.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/MultiValuedOption.td?rev=83524&r1=83523&r2=83524&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/MultiValuedOption.td (original) +++ llvm/trunk/test/LLVMC/MultiValuedOption.td Wed Oct 7 23:40:08 2009 @@ -10,7 +10,7 @@ (parameter_list_option "baz", (multi_val 2), (extern))]>; def dummy_tool : Tool<[ -(cmd_line "dummy_cmd"), +(cmd_line "dummy_cmd $INFILE"), (in_language "dummy"), (out_language "dummy"), (actions (case Modified: llvm/trunk/test/LLVMC/NoActions.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/NoActions.td?rev=83524&r1=83523&r2=83524&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/NoActions.td (original) +++ llvm/trunk/test/LLVMC/NoActions.td Wed Oct 7 23:40:08 2009 @@ -4,7 +4,7 @@ include "llvm/CompilerDriver/Common.td" def dummy_tool : Tool<[ -(cmd_line "dummy_cmd"), +(cmd_line "dummy_cmd $INFILE"), (in_language "dummy"), (out_language "dummy") ]>; Modified: llvm/trunk/test/LLVMC/OneOrMore.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OneOrMore.td?rev=83524&r1=83523&r2=83524&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OneOrMore.td (original) +++ llvm/trunk/test/LLVMC/OneOrMore.td Wed Oct 7 23:40:08 2009 @@ -11,7 +11,7 @@ (parameter_list_option "baz", (zero_or_one))]>; def dummy_tool : Tool<[ -(cmd_line "dummy_cmd"), +(cmd_line "dummy_cmd $INFILE"), (in_language "dummy"), (out_language "dummy"), (actions (case Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=83524&r1=83523&r2=83524&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Wed Oct 7 23:40:08 2009 @@ -1352,12 +1352,15 @@ ++I; } + bool hasINFILE = false; + for (; I != E; ++I) { const std::string& cmd = *I; assert(!cmd.empty()); O.indent(IndentLevel); if (cmd.at(0) == '$') { if (cmd == "$INFILE") { + hasINFILE = true; if (IsJoin) { O << "for (PathVector::const_iterator B = inFiles.begin()" << ", E = inFiles.end();\n"; @@ -1369,7 +1372,8 @@ } } else if (cmd == "$OUTFILE") { - O << "vec.push_back(out_file);\n"; + O << "vec.push_back(\"\");\n"; + O.indent(IndentLevel) << "out_file_index = vec.size()-1;\n"; } else { O << "vec.push_back("; @@ -1381,8 +1385,10 @@ O << "vec.push_back(\"" << cmd << "\");\n"; } } - O.indent(IndentLevel) << "cmd = "; + if (!hasINFILE) + throw "Tool '" + ToolName + "' doesn't take any input!"; + O.indent(IndentLevel) << "cmd = "; if (StrVec[0][0] == '$') SubstituteSpecialCommands(StrVec.begin(), StrVec.end(), O); else @@ -1566,7 +1572,7 @@ } }; -// EmitGenerateActionMethod - Emit one of two versions of the +// EmitGenerateActionMethod - Emit either a normal or a "join" version of the // Tool::GenerateAction() method. void EmitGenerateActionMethod (const ToolDescription& D, const OptionDescriptions& OptDescs, @@ -1586,17 +1592,7 @@ O.indent(Indent2) << "bool stop_compilation = !HasChildren;\n"; O.indent(Indent2) << "const char* output_suffix = \"" << D.OutputSuffix << "\";\n"; - O.indent(Indent2) << "std::string out_file;\n\n"; - - // For every understood option, emit handling code. - if (D.Actions) - EmitCaseConstructHandler(D.Actions, Indent2, EmitActionHandler(OptDescs), - false, OptDescs, O); - - O << '\n'; - O.indent(Indent2) - << "out_file = OutFilename(" << (IsJoin ? "sys::Path(),\n" : "inFile,\n"); - O.indent(Indent3) << "TempDir, stop_compilation, output_suffix).str();\n\n"; + O.indent(Indent2) << "int out_file_index = -1;\n\n"; // cmd_line is either a string or a 'case' construct. if (!D.CmdLine) @@ -1608,6 +1604,20 @@ EmitCmdLineVecFillCallback(IsJoin, D.Name), true, OptDescs, O); + // For every understood option, emit handling code. + if (D.Actions) + EmitCaseConstructHandler(D.Actions, Indent2, EmitActionHandler(OptDescs), + false, OptDescs, O); + + O << '\n'; + O.indent(Indent2) + << "std::string out_file = OutFilename(" + << (IsJoin ? "sys::Path(),\n" : "inFile,\n"); + O.indent(Indent3) << "TempDir, stop_compilation, output_suffix).str();\n\n"; + // TODO: emit this check only when necessary. + O.indent(Indent2) << "if (out_file_index != -1)\n"; + O.indent(Indent3) << "vec[out_file_index] = out_file;\n"; + // Handle the Sink property. if (D.isSink()) { O.indent(Indent2) << "if (!" << SinkOptionName << ".empty()) {\n"; From foldr at codedgers.com Wed Oct 7 23:40:28 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Thu, 08 Oct 2009 04:40:28 -0000 Subject: [llvm-commits] [llvm] r83525 - /llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Message-ID: <200910080440.n984eSGn008611@zion.cs.uiuc.edu> Author: foldr Date: Wed Oct 7 23:40:28 2009 New Revision: 83525 URL: http://llvm.org/viewvc/llvm-project?rev=83525&view=rev Log: Make the Base plugin understand -MF and -MT. Modified: llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Modified: llvm/trunk/tools/llvmc/plugins/Base/Base.td.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvmc/plugins/Base/Base.td.in?rev=83525&r1=83524&r2=83525&view=diff ============================================================================== --- llvm/trunk/tools/llvmc/plugins/Base/Base.td.in (original) +++ llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Wed Oct 7 23:40:28 2009 @@ -1,4 +1,4 @@ -//===- Base.td - LLVMC2 toolchain descriptions -------------*- tablegen -*-===// +//===- Base.td - LLVMC toolchain descriptions --------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains compilation graph description used by llvmc2. +// This file contains compilation graph description used by llvmc. // //===----------------------------------------------------------------------===// @@ -32,10 +32,17 @@ (help "Enable threads")), (parameter_option "linker", (help "Choose linker (possible values: gcc, g++)")), + (parameter_option "MF", + (help "Specify a file to write dependencies to"), (hidden)), + (parameter_option "MT", + (help "Change the name of the rule emitted by dependency generation"), + (hidden)), (parameter_list_option "include", (help "Include the named file prior to preprocessing")), (prefix_list_option "I", (help "Add a directory to include path")), + (prefix_list_option "D", + (help "Define a macro")), (prefix_list_option "Wa,", (help "Pass options to assembler")), (prefix_list_option "Wllc,", @@ -78,13 +85,17 @@ (and (switch_on "emit-llvm"), (switch_on "c")), (stop_compilation), (switch_on "fsyntax-only"), (stop_compilation), (not_empty "include"), (forward "include"), - (not_empty "I"), (forward "I"))), + (not_empty "I"), (forward "I")), + (not_empty "D"), (forward "D")), + (not_empty "MF"), (forward "MF")), + (not_empty "MT"), (forward "MT"))), (sink) ]>; def llvm_gcc_c : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x c", "c", "i">; def llvm_gcc_cpp : llvm_gcc_based<"@LLVMGXXCOMMAND@ -x c++", "c++", "i">; -def llvm_gcc_m : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c", "objective-c", "mi">; +def llvm_gcc_m : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c", + "objective-c", "mi">; def llvm_gcc_mxx : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c++", "objective-c++", "mi">; @@ -134,7 +145,7 @@ (switch_on "pthread"), (append_cmd "-lpthread"), (not_empty "L"), (forward "L"), (not_empty "l"), (forward "l"), - (not_empty "Wl,"), (unpack_values "Wl,"))) + (not_empty "Wl,"), (forward "Wl,"))) ]>; // Default linker From bob.wilson at apple.com Thu Oct 8 00:18:18 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 05:18:18 -0000 Subject: [llvm-commits] [llvm] r83526 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vst4.ll Message-ID: <200910080518.n985IIQI013374@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 00:18:18 2009 New Revision: 83526 URL: http://llvm.org/viewvc/llvm-project?rev=83526&view=rev Log: Add codegen support for NEON vst4 intrinsics with <1 x i64> vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vst4.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83526&r1=83525&r2=83526&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Thu Oct 8 00:18:18 2009 @@ -1681,6 +1681,7 @@ case MVT::v4i16: Opc = ARM::VST4d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VST4d32; break; + case MVT::v1i64: Opc = ARM::VST4d64; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83526&r1=83525&r2=83526&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Thu Oct 8 00:18:18 2009 @@ -406,6 +406,10 @@ def VST4d8 : VST4D<0b0000, "vst4.8">; def VST4d16 : VST4D<0b0100, "vst4.16">; def VST4d32 : VST4D<0b1000, "vst4.32">; +def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + DPR:$src4), IIC_VST, + "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; // vst4 to double-spaced even registers. def VST4q8a : VST4WB<0b0000, "vst4.8">; Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83526&r1=83525&r2=83526&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Thu Oct 8 00:18:18 2009 @@ -172,6 +172,7 @@ case ARM::VST4d8: case ARM::VST4d16: case ARM::VST4d32: + case ARM::VST4d64: case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: Modified: llvm/trunk/test/CodeGen/ARM/vst4.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst4.ll?rev=83526&r1=83525&r2=83526&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vst4.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vst4.ll Thu Oct 8 00:18:18 2009 @@ -32,6 +32,14 @@ ret void } +define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { +;CHECK: vst4i64: +;CHECK: vst1.64 + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst4.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + ret void +} + define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst4Qi8: ;CHECK: vst4.8 @@ -72,6 +80,7 @@ declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind +declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind From evan.cheng at apple.com Thu Oct 8 00:38:27 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 7 Oct 2009 22:38:27 -0700 Subject: [llvm-commits] [llvm] r83521 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/CodeGen/PrologEpilogInserter.h lib/Target/ARM/Thumb1RegisterInfo.cpp lib/Target/ARM/Thumb1RegisterInfo.h In-Reply-To: <200910080146.n981kxrE019225@zion.cs.uiuc.edu> References: <200910080146.n981kxrE019225@zion.cs.uiuc.edu> Message-ID: <51D3B8B5-79BB-4399-BFB0-9C7D6ABCE484@apple.com> Nice. Are all Thumb1 tests passing? Evan On Oct 7, 2009, at 6:46 PM, Jim Grosbach wrote: > Author: grosbach > Date: Wed Oct 7 20:46:59 2009 > New Revision: 83521 > > URL: http://llvm.org/viewvc/llvm-project?rev=83521&view=rev > Log: > Re-enable register scavenging in Thumb1 by default. > > Modified: > llvm/trunk/include/llvm/Target/TargetRegisterInfo.h > llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp > llvm/trunk/lib/CodeGen/PrologEpilogInserter.h > llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp > llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h > > Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) > +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 > 20:46:59 2009 > @@ -561,6 +561,12 @@ > return false; > } > > + /// requiresFrameIndexScavenging - returns true if the target > requires post > + /// PEI scavenging of registers for materializing frame index > constants. > + virtual bool requiresFrameIndexScavenging(const MachineFunction > &MF) const { > + return false; > + } > + > /// hasFP - Return true if the specified function should have a > dedicated > /// frame pointer register. For most targets this is true only if > the function > /// has variable sized allocas or if frame pointer elimination is > disabled. > > Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83521&r1=83520&r2=83521&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) > +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 > 20:46:59 2009 > @@ -44,16 +44,6 @@ > static RegisterPass > X("prologepilog", "Prologue/Epilogue Insertion"); > > -// FIXME: For now, the frame index scavenging is off by default and > only > -// used by the Thumb1 target. When it's the default and replaces > the current > -// on-the-fly PEI scavenging for all targets, > requiresRegisterScavenging() > -// will replace this. > -cl::opt > -FrameIndexVirtualScavenging("enable-frame-index-scavenging", > - cl::Hidden, > - cl::desc("Enable frame index > elimination with" > - "virtual register scavenging")); > - > /// createPrologEpilogCodeInserter - This function returns a pass > that inserts > /// prolog and epilog code, and eliminates abstract frame references. > /// > @@ -66,6 +56,7 @@ > const Function* F = Fn.getFunction(); > const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); > RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : > NULL; > + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging > (Fn); > > // Get MachineModuleInfo so that we can track the construction of > the > // frame. > > Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83521&r1=83520&r2=83521&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) > +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 > 20:46:59 2009 > @@ -94,6 +94,11 @@ > // functions. > bool ShrinkWrapThisFunction; > > + // Flag to control whether to use the register scavenger to > resolve > + // frame index materialization registers. Set according to > + // TRI->requiresFrameIndexScavenging() for the curren function. > + bool FrameIndexVirtualScavenging; > + > // When using the scavenger post-pass to resolve frame reference > // materialization registers, maintain a map of the registers to > // the constant value and SP adjustment associated with it. > > Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83521&r1=83520&r2=83521&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 > 20:46:59 2009 > @@ -37,11 +37,6 @@ > #include "llvm/Support/raw_ostream.h" > using namespace llvm; > > -// FIXME: This cmd line option conditionalizes the new register > scavenging > -// implemenation in PEI. Remove the option when scavenging works > well enough > -// to be the default. > -extern cl::opt FrameIndexVirtualScavenging; > - > Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, > const ARMSubtarget &sti) > : ARMBaseRegisterInfo(tii, sti) { > @@ -84,9 +79,16 @@ > > bool > Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction > &MF) const { > - return FrameIndexVirtualScavenging; > + return true; > +} > + > +bool > +Thumb1RegisterInfo::requiresFrameIndexScavenging(const > MachineFunction &MF) > + const { > + return true; > } > > + > bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) > const { > const MachineFrameInfo *FFI = MF.getFrameInfo(); > unsigned CFSize = FFI->getMaxCallFrameSize(); > @@ -128,13 +130,7 @@ > unsigned LdReg = DestReg; > if (DestReg == ARM::SP) { > assert(BaseReg == ARM::SP && "Unexpected!"); > - if (FrameIndexVirtualScavenging) { > - LdReg = MF.getRegInfo().createVirtualRegister > (ARM::tGPRRegisterClass); > - } else { > - LdReg = ARM::R3; > - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) > - .addReg(ARM::R3, RegState::Kill); > - } > + LdReg = MF.getRegInfo().createVirtualRegister > (ARM::tGPRRegisterClass); > } > > if (NumBytes <= 255 && NumBytes >= 0) > @@ -159,10 +155,6 @@ > else > MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); > AddDefaultPred(MIB); > - > - if (!FrameIndexVirtualScavenging && DestReg == ARM::SP) > - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) > - .addReg(ARM::R12, RegState::Kill); > } > > /// calcNumMI - Returns the number of instructions required to > materialize > @@ -635,7 +627,6 @@ > else // tLDR has an extra register operand. > MI.addOperand(MachineOperand::CreateReg(0, false)); > } else if (Desc.mayStore()) { > - if (FrameIndexVirtualScavenging) { > VReg = MF.getRegInfo().createVirtualRegister > (ARM::tGPRRegisterClass); > assert (Value && "Frame index virtual allocated, but Value arg > is NULL!"); > *Value = Offset; > @@ -658,52 +649,6 @@ > MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); > else // tSTR has an extra register operand. > MI.addOperand(MachineOperand::CreateReg(0, false)); > - } else { > - // FIXME! This is horrific!!! We need register scavenging. > - // Our temporary workaround has marked r3 unavailable. Of > course, r3 is > - // also a ABI register so it's possible that is is the > register that is > - // being storing here. If that's the case, we do the following: > - // r12 = r2 > - // Use r2 to materialize sp + offset > - // str r3, r2 > - // r2 = r12 > - unsigned ValReg = MI.getOperand(0).getReg(); > - unsigned TmpReg = ARM::R3; > - bool UseRR = false; > - if (ValReg == ARM::R3) { > - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) > - .addReg(ARM::R2, RegState::Kill); > - TmpReg = ARM::R2; > - } > - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) > - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) > - .addReg(ARM::R3, RegState::Kill); > - if (Opcode == ARM::tSpill) { > - if (FrameReg == ARM::SP) > - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, > - Offset, false, TII, *this, dl); > - else { > - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); > - UseRR = true; > - } > - } else > - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, > Offset, TII, > - *this, dl); > - MI.setDesc(TII.get(ARM::tSTR)); > - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); > - if (UseRR) // Use [reg, reg] addrmode. > - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); > - else // tSTR has an extra register operand. > - MI.addOperand(MachineOperand::CreateReg(0, false)); > - > - MachineBasicBlock::iterator NII = next(II); > - if (ValReg == ARM::R3) > - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) > - .addReg(ARM::R12, RegState::Kill); > - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) > - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) > - .addReg(ARM::R12, RegState::Kill); > - } > } else > assert(false && "Unexpected opcode!"); > > > Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) > +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 > 20:46:59 2009 > @@ -41,6 +41,7 @@ > getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) > const; > > bool requiresRegisterScavenging(const MachineFunction &MF) const; > + bool requiresFrameIndexScavenging(const MachineFunction &MF) const; > > bool hasReservedCallFrame(MachineFunction &MF) const; > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From evan.cheng at apple.com Thu Oct 8 01:00:58 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 08 Oct 2009 06:00:58 -0000 Subject: [llvm-commits] [test-suite] r83527 - /test-suite/trunk/Makefile.programs Message-ID: <200910080600.n9860w2Y018669@zion.cs.uiuc.edu> Author: evancheng Date: Thu Oct 8 01:00:58 2009 New Revision: 83527 URL: http://llvm.org/viewvc/llvm-project?rev=83527&view=rev Log: Testing prealloc splitting to make sure it hasn't bit rotted. Modified: test-suite/trunk/Makefile.programs Modified: test-suite/trunk/Makefile.programs URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/Makefile.programs?rev=83527&r1=83526&r2=83527&view=diff ============================================================================== --- test-suite/trunk/Makefile.programs (original) +++ test-suite/trunk/Makefile.programs Thu Oct 8 01:00:58 2009 @@ -224,10 +224,12 @@ LLCBETAOPTION := -sched=simple endif ifeq ($(ARCH),x86_64) -LLCBETAOPTION := -combiner-alias-analysis +LLCBETAOPTION := -pre-alloc-split +#-combiner-alias-analysis endif ifeq ($(ARCH),x86) -LLCBETAOPTION := -combiner-alias-analysis +LLCBETAOPTION := -pre-alloc-split +#-combiner-alias-analysis #-combiner-global-alias-analysis #-pre-alloc-split #-fast-isel From bob.wilson at apple.com Thu Oct 8 01:02:10 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 06:02:10 -0000 Subject: [llvm-commits] [llvm] r83528 - in /llvm/trunk/test/CodeGen/ARM: vneg.ll vorn.ll vorr.ll vpadal.ll vpadd.ll vpaddl.ll vpmax.ll vpmin.ll Message-ID: <200910080602.n9862Aup018839@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 01:02:10 2009 New Revision: 83528 URL: http://llvm.org/viewvc/llvm-project?rev=83528&view=rev Log: Convert more NEON tests to use FileCheck. Modified: llvm/trunk/test/CodeGen/ARM/vneg.ll llvm/trunk/test/CodeGen/ARM/vorn.ll llvm/trunk/test/CodeGen/ARM/vorr.ll llvm/trunk/test/CodeGen/ARM/vpadal.ll llvm/trunk/test/CodeGen/ARM/vpadd.ll llvm/trunk/test/CodeGen/ARM/vpaddl.ll llvm/trunk/test/CodeGen/ARM/vpmax.ll llvm/trunk/test/CodeGen/ARM/vpmin.ll Modified: llvm/trunk/test/CodeGen/ARM/vneg.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vneg.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vneg.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vneg.ll Thu Oct 8 01:02:10 2009 @@ -1,52 +1,64 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vneg\\.s8} %t | count 2 -; RUN: grep {vneg\\.s16} %t | count 2 -; RUN: grep {vneg\\.s32} %t | count 2 -; RUN: grep {vneg\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind { +;CHECK: vnegs8: +;CHECK: vneg.s8 %tmp1 = load <8 x i8>* %A %tmp2 = sub <8 x i8> zeroinitializer, %tmp1 ret <8 x i8> %tmp2 } define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind { +;CHECK: vnegs16: +;CHECK: vneg.s16 %tmp1 = load <4 x i16>* %A %tmp2 = sub <4 x i16> zeroinitializer, %tmp1 ret <4 x i16> %tmp2 } define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind { +;CHECK: vnegs32: +;CHECK: vneg.s32 %tmp1 = load <2 x i32>* %A %tmp2 = sub <2 x i32> zeroinitializer, %tmp1 ret <2 x i32> %tmp2 } define <2 x float> @vnegf32(<2 x float>* %A) nounwind { +;CHECK: vnegf32: +;CHECK: vneg.f32 %tmp1 = load <2 x float>* %A %tmp2 = sub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1 ret <2 x float> %tmp2 } define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind { +;CHECK: vnegQs8: +;CHECK: vneg.s8 %tmp1 = load <16 x i8>* %A %tmp2 = sub <16 x i8> zeroinitializer, %tmp1 ret <16 x i8> %tmp2 } define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind { +;CHECK: vnegQs16: +;CHECK: vneg.s16 %tmp1 = load <8 x i16>* %A %tmp2 = sub <8 x i16> zeroinitializer, %tmp1 ret <8 x i16> %tmp2 } define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind { +;CHECK: vnegQs32: +;CHECK: vneg.s32 %tmp1 = load <4 x i32>* %A %tmp2 = sub <4 x i32> zeroinitializer, %tmp1 ret <4 x i32> %tmp2 } define <4 x float> @vnegQf32(<4 x float>* %A) nounwind { +;CHECK: vnegQf32: +;CHECK: vneg.f32 %tmp1 = load <4 x float>* %A %tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1 ret <4 x float> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vorn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vorn.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vorn.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vorn.ll Thu Oct 8 01:02:10 2009 @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep vorn %t | count 8 -; Note: function names do not include "vorn" to allow simple grep for opcodes +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: v_orni8: +;CHECK: vorn %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -11,6 +11,8 @@ } define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: v_orni16: +;CHECK: vorn %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > @@ -19,6 +21,8 @@ } define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: v_orni32: +;CHECK: vorn %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > @@ -27,6 +31,8 @@ } define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: v_orni64: +;CHECK: vorn %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > @@ -35,6 +41,8 @@ } define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: v_ornQi8: +;CHECK: vorn %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -43,6 +51,8 @@ } define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: v_ornQi16: +;CHECK: vorn %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -51,6 +61,8 @@ } define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: v_ornQi32: +;CHECK: vorn %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > @@ -59,6 +71,8 @@ } define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: v_ornQi64: +;CHECK: vorn %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > Modified: llvm/trunk/test/CodeGen/ARM/vorr.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vorr.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vorr.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vorr.ll Thu Oct 8 01:02:10 2009 @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep vorr %t | count 8 -; Note: function names do not include "vorr" to allow simple grep for opcodes +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: v_orri8: +;CHECK: vorr %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = or <8 x i8> %tmp1, %tmp2 @@ -10,6 +10,8 @@ } define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: v_orri16: +;CHECK: vorr %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = or <4 x i16> %tmp1, %tmp2 @@ -17,6 +19,8 @@ } define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: v_orri32: +;CHECK: vorr %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = or <2 x i32> %tmp1, %tmp2 @@ -24,6 +28,8 @@ } define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: v_orri64: +;CHECK: vorr %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = or <1 x i64> %tmp1, %tmp2 @@ -31,6 +37,8 @@ } define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: v_orrQi8: +;CHECK: vorr %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = or <16 x i8> %tmp1, %tmp2 @@ -38,6 +46,8 @@ } define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: v_orrQi16: +;CHECK: vorr %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = or <8 x i16> %tmp1, %tmp2 @@ -45,6 +55,8 @@ } define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: v_orrQi32: +;CHECK: vorr %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = or <4 x i32> %tmp1, %tmp2 @@ -52,6 +64,8 @@ } define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: v_orrQi64: +;CHECK: vorr %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = or <2 x i64> %tmp1, %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vpadal.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpadal.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vpadal.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vpadal.ll Thu Oct 8 01:02:10 2009 @@ -1,12 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vpadal\\.s8} %t | count 2 -; RUN: grep {vpadal\\.s16} %t | count 2 -; RUN: grep {vpadal\\.s32} %t | count 2 -; RUN: grep {vpadal\\.u8} %t | count 2 -; RUN: grep {vpadal\\.u16} %t | count 2 -; RUN: grep {vpadal\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpadals8: +;CHECK: vpadal.s8 %tmp1 = load <4 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2) @@ -14,6 +10,8 @@ } define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpadals16: +;CHECK: vpadal.s16 %tmp1 = load <2 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2) @@ -21,6 +19,8 @@ } define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpadals32: +;CHECK: vpadal.s32 %tmp1 = load <1 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2) @@ -28,6 +28,8 @@ } define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpadalu8: +;CHECK: vpadal.u8 %tmp1 = load <4 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2) @@ -35,6 +37,8 @@ } define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpadalu16: +;CHECK: vpadal.u16 %tmp1 = load <2 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2) @@ -42,6 +46,8 @@ } define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpadalu32: +;CHECK: vpadal.u32 %tmp1 = load <1 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2) @@ -49,6 +55,8 @@ } define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind { +;CHECK: vpadalQs8: +;CHECK: vpadal.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2) @@ -56,6 +64,8 @@ } define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind { +;CHECK: vpadalQs16: +;CHECK: vpadal.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2) @@ -63,6 +73,8 @@ } define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind { +;CHECK: vpadalQs32: +;CHECK: vpadal.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2) @@ -70,6 +82,8 @@ } define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind { +;CHECK: vpadalQu8: +;CHECK: vpadal.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2) @@ -77,6 +91,8 @@ } define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind { +;CHECK: vpadalQu16: +;CHECK: vpadal.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2) @@ -84,6 +100,8 @@ } define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind { +;CHECK: vpadalQu32: +;CHECK: vpadal.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vpadd.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpadd.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vpadd.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vpadd.ll Thu Oct 8 01:02:10 2009 @@ -1,10 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vpadd\\.i8} %t | count 1 -; RUN: grep {vpadd\\.i16} %t | count 1 -; RUN: grep {vpadd\\.i32} %t | count 1 -; RUN: grep {vpadd\\.f32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpaddi8: +;CHECK: vpadd.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -12,6 +10,8 @@ } define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpaddi16: +;CHECK: vpadd.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -19,6 +19,8 @@ } define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpaddi32: +;CHECK: vpadd.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -26,6 +28,8 @@ } define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vpaddf32: +;CHECK: vpadd.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vpaddl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpaddl.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vpaddl.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vpaddl.ll Thu Oct 8 01:02:10 2009 @@ -1,78 +1,96 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vpaddl\\.s8} %t | count 2 -; RUN: grep {vpaddl\\.s16} %t | count 2 -; RUN: grep {vpaddl\\.s32} %t | count 2 -; RUN: grep {vpaddl\\.u8} %t | count 2 -; RUN: grep {vpaddl\\.u16} %t | count 2 -; RUN: grep {vpaddl\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind { +;CHECK: vpaddls8: +;CHECK: vpaddl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind { +;CHECK: vpaddls16: +;CHECK: vpaddl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1) ret <2 x i32> %tmp2 } define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind { +;CHECK: vpaddls32: +;CHECK: vpaddl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1) ret <1 x i64> %tmp2 } define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind { +;CHECK: vpaddlu8: +;CHECK: vpaddl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind { +;CHECK: vpaddlu16: +;CHECK: vpaddl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1) ret <2 x i32> %tmp2 } define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind { +;CHECK: vpaddlu32: +;CHECK: vpaddl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1) ret <1 x i64> %tmp2 } define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind { +;CHECK: vpaddlQs8: +;CHECK: vpaddl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1) ret <8 x i16> %tmp2 } define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind { +;CHECK: vpaddlQs16: +;CHECK: vpaddl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1) ret <4 x i32> %tmp2 } define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind { +;CHECK: vpaddlQs32: +;CHECK: vpaddl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1) ret <2 x i64> %tmp2 } define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind { +;CHECK: vpaddlQu8: +;CHECK: vpaddl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1) ret <8 x i16> %tmp2 } define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind { +;CHECK: vpaddlQu16: +;CHECK: vpaddl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1) ret <4 x i32> %tmp2 } define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind { +;CHECK: vpaddlQu32: +;CHECK: vpaddl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1) ret <2 x i64> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vpmax.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpmax.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vpmax.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vpmax.ll Thu Oct 8 01:02:10 2009 @@ -1,13 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vpmax\\.s8} %t | count 1 -; RUN: grep {vpmax\\.s16} %t | count 1 -; RUN: grep {vpmax\\.s32} %t | count 1 -; RUN: grep {vpmax\\.u8} %t | count 1 -; RUN: grep {vpmax\\.u16} %t | count 1 -; RUN: grep {vpmax\\.u32} %t | count 1 -; RUN: grep {vpmax\\.f32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpmaxs8: +;CHECK: vpmax.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -15,6 +10,8 @@ } define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpmaxs16: +;CHECK: vpmax.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -22,6 +19,8 @@ } define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpmaxs32: +;CHECK: vpmax.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -29,6 +28,8 @@ } define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpmaxu8: +;CHECK: vpmax.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -36,6 +37,8 @@ } define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpmaxu16: +;CHECK: vpmax.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -43,6 +46,8 @@ } define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpmaxu32: +;CHECK: vpmax.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -50,6 +55,8 @@ } define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vpmaxf32: +;CHECK: vpmax.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vpmin.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpmin.ll?rev=83528&r1=83527&r2=83528&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vpmin.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vpmin.ll Thu Oct 8 01:02:10 2009 @@ -1,13 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vpmin\\.s8} %t | count 1 -; RUN: grep {vpmin\\.s16} %t | count 1 -; RUN: grep {vpmin\\.s32} %t | count 1 -; RUN: grep {vpmin\\.u8} %t | count 1 -; RUN: grep {vpmin\\.u16} %t | count 1 -; RUN: grep {vpmin\\.u32} %t | count 1 -; RUN: grep {vpmin\\.f32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpmins8: +;CHECK: vpmin.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -15,6 +10,8 @@ } define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpmins16: +;CHECK: vpmin.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -22,6 +19,8 @@ } define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpmins32: +;CHECK: vpmin.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -29,6 +28,8 @@ } define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpminu8: +;CHECK: vpmin.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -36,6 +37,8 @@ } define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpminu16: +;CHECK: vpmin.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -43,6 +46,8 @@ } define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpminu32: +;CHECK: vpmin.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -50,6 +55,8 @@ } define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vpminf32: +;CHECK: vpmin.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) From foldr at codedgers.com Thu Oct 8 01:03:38 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Thu, 08 Oct 2009 06:03:38 -0000 Subject: [llvm-commits] [llvm] r83529 - /llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Message-ID: <200910080603.n9863cC9019037@zion.cs.uiuc.edu> Author: foldr Date: Thu Oct 8 01:03:38 2009 New Revision: 83529 URL: http://llvm.org/viewvc/llvm-project?rev=83529&view=rev Log: Unbreak the build. Forgot about the need to reconfigure after modifying Base.td.in.... Modified: llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Modified: llvm/trunk/tools/llvmc/plugins/Base/Base.td.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvmc/plugins/Base/Base.td.in?rev=83529&r1=83528&r2=83529&view=diff ============================================================================== --- llvm/trunk/tools/llvmc/plugins/Base/Base.td.in (original) +++ llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Thu Oct 8 01:03:38 2009 @@ -85,9 +85,9 @@ (and (switch_on "emit-llvm"), (switch_on "c")), (stop_compilation), (switch_on "fsyntax-only"), (stop_compilation), (not_empty "include"), (forward "include"), - (not_empty "I"), (forward "I")), - (not_empty "D"), (forward "D")), - (not_empty "MF"), (forward "MF")), + (not_empty "I"), (forward "I"), + (not_empty "D"), (forward "D"), + (not_empty "MF"), (forward "MF"), (not_empty "MT"), (forward "MT"))), (sink) ]>; From nicholas at mxc.ca Thu Oct 8 01:14:12 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 07 Oct 2009 23:14:12 -0700 Subject: [llvm-commits] [llvm] r83503 - in /llvm/trunk: autoconf/configure.ac configure In-Reply-To: <200910072322.n97NMhaa011612@zion.cs.uiuc.edu> References: <200910072322.n97NMhaa011612@zion.cs.uiuc.edu> Message-ID: <4ACD8334.3040309@mxc.ca> Jeffrey Yasskin wrote: > Author: jyasskin > Date: Wed Oct 7 18:22:42 2009 > New Revision: 83503 > > URL: http://llvm.org/viewvc/llvm-project?rev=83503&view=rev > Log: > Fix the OProfile part of PR5018. This fixes --without-oprofile, makes > it the default, and works around a broken libopagent on some Debian > systems. What's the point of making it default? Configure is supposed to figure out whether you've got oprofile or not. Nick > > Modified: > llvm/trunk/autoconf/configure.ac > llvm/trunk/configure > > Modified: llvm/trunk/autoconf/configure.ac > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=83503&r1=83502&r2=83503&view=diff > > ============================================================================== > --- llvm/trunk/autoconf/configure.ac (original) > +++ llvm/trunk/autoconf/configure.ac Wed Oct 7 18:22:42 2009 > @@ -999,31 +999,30 @@ > AC_SUBST(USE_OPROFILE, [1]) > case "$withval" in > /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;; > + no) llvm_cv_oppath= > + AC_SUBST(USE_OPROFILE, [0]) ;; > *) llvm_cv_oppath="${withval}/lib/oprofile" > CPPFLAGS="-I${withval}/include";; > esac > - LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" > - AC_SEARCH_LIBS(op_open_agent, opagent, [], [ > - echo "Error! You need to have libopagent around." > - exit -1 > - ]) > - AC_CHECK_HEADER([opagent.h], [], [ > - echo "Error! You need to have opagent.h around." > - exit -1 > - ]) > + if test -n "$llvm_cv_oppath" ; then > + LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" > + dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744: > + dnl libbfd is not included properly in libopagent in some Debian > + dnl versions. If libbfd isn't found at all, we assume opagent works > + dnl anyway. > + AC_SEARCH_LIBS(bfd_init, bfd, [], []) > + AC_SEARCH_LIBS(op_open_agent, opagent, [], [ > + echo "Error! You need to have libopagent around." > + exit -1 > + ]) > + AC_CHECK_HEADER([opagent.h], [], [ > + echo "Error! You need to have opagent.h around." > + exit -1 > + ]) > + fi > ], > [ > - llvm_cv_old_LIBS="$LIBS" > - LIBS="$LIBS -L/usr/lib/oprofile -Wl,-rpath,/usr/lib/oprofile" > - dnl If either the library or header aren't present, omit oprofile support. > - AC_SEARCH_LIBS(op_open_agent, opagent, > - [AC_SUBST(USE_OPROFILE, [1])], > - [LIBS="$llvm_cv_old_LIBS" > - AC_SUBST(USE_OPROFILE, [0])]) > - AC_CHECK_HEADER([opagent.h], [], [ > - LIBS="$llvm_cv_old_LIBS" > - AC_SUBST(USE_OPROFILE, [0]) > - ]) > + AC_SUBST(USE_OPROFILE, [0]) > ]) > AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE, > [Define if we have the oprofile JIT-support library]) > > Modified: llvm/trunk/configure > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/configure?rev=83503&r1=83502&r2=83503&view=diff > > ============================================================================== > --- llvm/trunk/configure (original) > +++ llvm/trunk/configure Wed Oct 7 18:22:42 2009 > @@ -28539,13 +28539,17 @@ > > case "$withval" in > /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;; > + no) llvm_cv_oppath= > + USE_OPROFILE=0 > + ;; > *) llvm_cv_oppath="${withval}/lib/oprofile" > CPPFLAGS="-I${withval}/include";; > esac > - LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" > - { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5 > -echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; } > -if test "${ac_cv_search_op_open_agent+set}" = set; then > + if test -n "$llvm_cv_oppath" ; then > + LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" > + { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5 > +echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; } > +if test "${ac_cv_search_bfd_init+set}" = set; then > echo $ECHO_N "(cached) $ECHO_C" >&6 > else > ac_func_search_save_LIBS=$LIBS > @@ -28562,16 +28566,16 @@ > #ifdef __cplusplus > extern "C" > #endif > -char op_open_agent (); > +char bfd_init (); > int > main () > { > -return op_open_agent (); > +return bfd_init (); > ; > return 0; > } > _ACEOF > -for ac_lib in '' opagent; do > +for ac_lib in '' bfd; do > if test -z "$ac_lib"; then > ac_res="none required" > else > @@ -28612,7 +28616,7 @@ > ac_status=$? > echo "$as_me:$LINENO: \$? = $ac_status" >&5 > (exit $ac_status); }; }; then > - ac_cv_search_op_open_agent=$ac_res > + ac_cv_search_bfd_init=$ac_res > else > echo "$as_me: failed program was:" >&5 > sed 's/^/| /' conftest.$ac_ext >&5 > @@ -28622,201 +28626,27 @@ > > rm -f core conftest.err conftest.$ac_objext \ > conftest$ac_exeext > - if test "${ac_cv_search_op_open_agent+set}" = set; then > + if test "${ac_cv_search_bfd_init+set}" = set; then > break > fi > done > -if test "${ac_cv_search_op_open_agent+set}" = set; then > +if test "${ac_cv_search_bfd_init+set}" = set; then > : > else > - ac_cv_search_op_open_agent=no > + ac_cv_search_bfd_init=no > fi > rm conftest.$ac_ext > LIBS=$ac_func_search_save_LIBS > fi > -{ echo "$as_me:$LINENO: result: $ac_cv_search_op_open_agent" >&5 > -echo "${ECHO_T}$ac_cv_search_op_open_agent" >&6; } > -ac_res=$ac_cv_search_op_open_agent > +{ echo "$as_me:$LINENO: result: $ac_cv_search_bfd_init" >&5 > +echo "${ECHO_T}$ac_cv_search_bfd_init" >&6; } > +ac_res=$ac_cv_search_bfd_init > if test "$ac_res" != no; then > test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" > > -else > - > - echo "Error! You need to have libopagent around." > - exit -1 > - > -fi > - > - if test "${ac_cv_header_opagent_h+set}" = set; then > - { echo "$as_me:$LINENO: checking for opagent.h" >&5 > -echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } > -if test "${ac_cv_header_opagent_h+set}" = set; then > - echo $ECHO_N "(cached) $ECHO_C" >&6 > -fi > -{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5 > -echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; } > -else > - # Is the header compilable? > -{ echo "$as_me:$LINENO: checking opagent.h usability" >&5 > -echo $ECHO_N "checking opagent.h usability... $ECHO_C" >&6; } > -cat >conftest.$ac_ext <<_ACEOF > -/* confdefs.h. */ > -_ACEOF > -cat confdefs.h >>conftest.$ac_ext > -cat >>conftest.$ac_ext <<_ACEOF > -/* end confdefs.h. */ > -$ac_includes_default > -#include > -_ACEOF > -rm -f conftest.$ac_objext > -if { (ac_try="$ac_compile" > -case "(($ac_try" in > - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; > - *) ac_try_echo=$ac_try;; > -esac > -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 > - (eval "$ac_compile") 2>conftest.er1 > - ac_status=$? > - grep -v '^ *+' conftest.er1 >conftest.err > - rm -f conftest.er1 > - cat conftest.err >&5 > - echo "$as_me:$LINENO: \$? = $ac_status" >&5 > - (exit $ac_status); } && > - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' > - { (case "(($ac_try" in > - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; > - *) ac_try_echo=$ac_try;; > -esac > -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 > - (eval "$ac_try") 2>&5 > - ac_status=$? > - echo "$as_me:$LINENO: \$? = $ac_status" >&5 > - (exit $ac_status); }; } && > - { ac_try='test -s conftest.$ac_objext' > - { (case "(($ac_try" in > - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; > - *) ac_try_echo=$ac_try;; > -esac > -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 > - (eval "$ac_try") 2>&5 > - ac_status=$? > - echo "$as_me:$LINENO: \$? = $ac_status" >&5 > - (exit $ac_status); }; }; then > - ac_header_compiler=yes > -else > - echo "$as_me: failed program was:" >&5 > -sed 's/^/| /' conftest.$ac_ext >&5 > - > - ac_header_compiler=no > fi > > -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext > -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 > -echo "${ECHO_T}$ac_header_compiler" >&6; } > - > -# Is the header present? > -{ echo "$as_me:$LINENO: checking opagent.h presence" >&5 > -echo $ECHO_N "checking opagent.h presence... $ECHO_C" >&6; } > -cat >conftest.$ac_ext <<_ACEOF > -/* confdefs.h. */ > -_ACEOF > -cat confdefs.h >>conftest.$ac_ext > -cat >>conftest.$ac_ext <<_ACEOF > -/* end confdefs.h. */ > -#include > -_ACEOF > -if { (ac_try="$ac_cpp conftest.$ac_ext" > -case "(($ac_try" in > - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; > - *) ac_try_echo=$ac_try;; > -esac > -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 > - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 > - ac_status=$? > - grep -v '^ *+' conftest.er1 >conftest.err > - rm -f conftest.er1 > - cat conftest.err >&5 > - echo "$as_me:$LINENO: \$? = $ac_status" >&5 > - (exit $ac_status); } >/dev/null; then > - if test -s conftest.err; then > - ac_cpp_err=$ac_c_preproc_warn_flag > - ac_cpp_err=$ac_cpp_err$ac_c_werror_flag > - else > - ac_cpp_err= > - fi > -else > - ac_cpp_err=yes > -fi > -if test -z "$ac_cpp_err"; then > - ac_header_preproc=yes > -else > - echo "$as_me: failed program was:" >&5 > -sed 's/^/| /' conftest.$ac_ext >&5 > - > - ac_header_preproc=no > -fi > - > -rm -f conftest.err conftest.$ac_ext > -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 > -echo "${ECHO_T}$ac_header_preproc" >&6; } > - > -# So? What about this header? > -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in > - yes:no: ) > - { echo "$as_me:$LINENO: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&5 > -echo "$as_me: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&2;} > - { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the compiler's result" >&5 > -echo "$as_me: WARNING: opagent.h: proceeding with the compiler's result" >&2;} > - ac_header_preproc=yes > - ;; > - no:yes:* ) > - { echo "$as_me:$LINENO: WARNING: opagent.h: present but cannot be compiled" >&5 > -echo "$as_me: WARNING: opagent.h: present but cannot be compiled" >&2;} > - { echo "$as_me:$LINENO: WARNING: opagent.h: check for missing prerequisite headers?" >&5 > -echo "$as_me: WARNING: opagent.h: check for missing prerequisite headers?" >&2;} > - { echo "$as_me:$LINENO: WARNING: opagent.h: see the Autoconf documentation" >&5 > -echo "$as_me: WARNING: opagent.h: see the Autoconf documentation" >&2;} > - { echo "$as_me:$LINENO: WARNING: opagent.h: section \"Present But Cannot Be Compiled\"" >&5 > -echo "$as_me: WARNING: opagent.h: section \"Present But Cannot Be Compiled\"" >&2;} > - { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the preprocessor's result" >&5 > -echo "$as_me: WARNING: opagent.h: proceeding with the preprocessor's result" >&2;} > - { echo "$as_me:$LINENO: WARNING: opagent.h: in the future, the compiler will take precedence" >&5 > -echo "$as_me: WARNING: opagent.h: in the future, the compiler will take precedence" >&2;} > - ( cat <<\_ASBOX > -## ----------------------------------- ## > -## Report this to llvmbugs at cs.uiuc.edu ## > -## ----------------------------------- ## > -_ASBOX > - ) | sed "s/^/$as_me: WARNING: /" >&2 > - ;; > -esac > -{ echo "$as_me:$LINENO: checking for opagent.h" >&5 > -echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } > -if test "${ac_cv_header_opagent_h+set}" = set; then > - echo $ECHO_N "(cached) $ECHO_C" >&6 > -else > - ac_cv_header_opagent_h=$ac_header_preproc > -fi > -{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5 > -echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; } > - > -fi > -if test $ac_cv_header_opagent_h = yes; then > - : > -else > - > - echo "Error! You need to have opagent.h around." > - exit -1 > - > -fi > - > - > - > -else > - > - llvm_cv_old_LIBS="$LIBS" > - LIBS="$LIBS -L/usr/lib/oprofile -Wl,-rpath,/usr/lib/oprofile" > - { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5 > + { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5 > echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; } > if test "${ac_cv_search_op_open_agent+set}" = set; then > echo $ECHO_N "(cached) $ECHO_C" >&6 > @@ -28912,15 +28742,15 @@ > ac_res=$ac_cv_search_op_open_agent > if test "$ac_res" != no; then > test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" > - USE_OPROFILE=1 > > else > - LIBS="$llvm_cv_old_LIBS" > - USE_OPROFILE=0 > + > + echo "Error! You need to have libopagent around." > + exit -1 > > fi > > - if test "${ac_cv_header_opagent_h+set}" = set; then > + if test "${ac_cv_header_opagent_h+set}" = set; then > { echo "$as_me:$LINENO: checking for opagent.h" >&5 > echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } > if test "${ac_cv_header_opagent_h+set}" = set; then > @@ -29078,13 +28908,18 @@ > : > else > > - LIBS="$llvm_cv_old_LIBS" > - USE_OPROFILE=0 > - > + echo "Error! You need to have opagent.h around." > + exit -1 > > fi > > > + fi > + > +else > + > + USE_OPROFILE=0 > + > > fi > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From jyasskin at google.com Thu Oct 8 01:17:47 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Wed, 7 Oct 2009 23:17:47 -0700 Subject: [llvm-commits] [llvm] r83503 - in /llvm/trunk: autoconf/configure.ac configure In-Reply-To: <4ACD8334.3040309@mxc.ca> References: <200910072322.n97NMhaa011612@zion.cs.uiuc.edu> <4ACD8334.3040309@mxc.ca> Message-ID: On Wed, Oct 7, 2009 at 11:14 PM, Nick Lewycky wrote: > Jeffrey Yasskin wrote: >> >> Author: jyasskin >> Date: Wed Oct ?7 18:22:42 2009 >> New Revision: 83503 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83503&view=rev >> Log: >> Fix the OProfile part of PR5018. This fixes --without-oprofile, makes >> it the default, and works around a broken libopagent on some Debian >> systems. > > What's the point of making it default? Configure is supposed to figure out > whether you've got oprofile or not. > > Nick Duncan asked for it to be off by default instead of appearing if your system has it. I think that was partly because it was infecting the gcc-plugin due to how llvm-config doesn't support per-component ldflags. From grosbach at apple.com Thu Oct 8 01:20:36 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 7 Oct 2009 23:20:36 -0700 Subject: [llvm-commits] [llvm] r83521 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/CodeGen/PrologEpilogInserter.h lib/Target/ARM/Thumb1RegisterInfo.cpp lib/Target/ARM/Thumb1RegisterInfo.h In-Reply-To: <51D3B8B5-79BB-4399-BFB0-9C7D6ABCE484@apple.com> References: <200910080146.n981kxrE019225@zion.cs.uiuc.edu> <51D3B8B5-79BB-4399-BFB0-9C7D6ABCE484@apple.com> Message-ID: Heya, I didn't see any regressions when I ran the singlesource tests by hand, and the nightly run last night looks good with the scavenging in for LLCBETA. If there's a problem on tonight's run, it'll be the register re-use stuff. I wanted to get this in this evening so we'd have one change at a time per nightly run to make things simpler to track down if there's problems. For an additional sanity check, I also built a sample application (unzip) and ran some tests of that and it worked great (and had better codegen). Assuming things look good, I have a patch to enable allocation of R3 ready to go. I held off on that for tonight so as not to change too many things per nightly run. Similarly, I looked more closely at getting the ARM and T2 targets to use the new scheme, and I didn't see anything that will make that non-trivial. Basically, by tomorrow we should have the functionality in place. Still some cleanup to do to address a few of your comments I haven't gotten to yet and a few other things I've been keeping notes on that I want to fix. FWIW, I looked again at the eliminateFrameIndex() hooks and didn't see any way around the changes I made there to track the values and associated virtual registers. We can talk over the specifics tomorrow. Maybe you'll have some ideas for ways to avoid that stuff. It'd be very nice to not need to have that bit. Would make it a lot easier to make other ports use the new stuff and such. Regards, Jim On Oct 7, 2009, at 10:38 PM, Evan Cheng wrote: > Nice. Are all Thumb1 tests passing? > > Evan > > On Oct 7, 2009, at 6:46 PM, Jim Grosbach wrote: > >> Author: grosbach >> Date: Wed Oct 7 20:46:59 2009 >> New Revision: 83521 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83521&view=rev >> Log: >> Re-enable register scavenging in Thumb1 by default. >> >> Modified: >> llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >> llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >> llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >> >> Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) >> +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 >> 20:46:59 2009 >> @@ -561,6 +561,12 @@ >> return false; >> } >> >> + /// requiresFrameIndexScavenging - returns true if the target >> requires post >> + /// PEI scavenging of registers for materializing frame index >> constants. >> + virtual bool requiresFrameIndexScavenging(const MachineFunction >> &MF) const { >> + return false; >> + } >> + >> /// hasFP - Return true if the specified function should have a >> dedicated >> /// frame pointer register. For most targets this is true only if >> the function >> /// has variable sized allocas or if frame pointer elimination is >> disabled. >> >> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83521&r1=83520&r2=83521&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) >> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 >> 20:46:59 2009 >> @@ -44,16 +44,6 @@ >> static RegisterPass >> X("prologepilog", "Prologue/Epilogue Insertion"); >> >> -// FIXME: For now, the frame index scavenging is off by default >> and only >> -// used by the Thumb1 target. When it's the default and replaces >> the current >> -// on-the-fly PEI scavenging for all targets, >> requiresRegisterScavenging() >> -// will replace this. >> -cl::opt >> -FrameIndexVirtualScavenging("enable-frame-index-scavenging", >> - cl::Hidden, >> - cl::desc("Enable frame index >> elimination with" >> - "virtual register >> scavenging")); >> - >> /// createPrologEpilogCodeInserter - This function returns a pass >> that inserts >> /// prolog and epilog code, and eliminates abstract frame references. >> /// >> @@ -66,6 +56,7 @@ >> const Function* F = Fn.getFunction(); >> const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); >> RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : >> NULL; >> + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging >> (Fn); >> >> // Get MachineModuleInfo so that we can track the construction of >> the >> // frame. >> >> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83521&r1=83520&r2=83521&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) >> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 >> 20:46:59 2009 >> @@ -94,6 +94,11 @@ >> // functions. >> bool ShrinkWrapThisFunction; >> >> + // Flag to control whether to use the register scavenger to >> resolve >> + // frame index materialization registers. Set according to >> + // TRI->requiresFrameIndexScavenging() for the curren function. >> + bool FrameIndexVirtualScavenging; >> + >> // When using the scavenger post-pass to resolve frame reference >> // materialization registers, maintain a map of the registers to >> // the constant value and SP adjustment associated with it. >> >> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83521&r1=83520&r2=83521&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) >> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 >> 20:46:59 2009 >> @@ -37,11 +37,6 @@ >> #include "llvm/Support/raw_ostream.h" >> using namespace llvm; >> >> -// FIXME: This cmd line option conditionalizes the new register >> scavenging >> -// implemenation in PEI. Remove the option when scavenging works >> well enough >> -// to be the default. >> -extern cl::opt FrameIndexVirtualScavenging; >> - >> Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, >> const ARMSubtarget &sti) >> : ARMBaseRegisterInfo(tii, sti) { >> @@ -84,9 +79,16 @@ >> >> bool >> Thumb1RegisterInfo::requiresRegisterScavenging(const >> MachineFunction &MF) const { >> - return FrameIndexVirtualScavenging; >> + return true; >> +} >> + >> +bool >> +Thumb1RegisterInfo::requiresFrameIndexScavenging(const >> MachineFunction &MF) >> + const { >> + return true; >> } >> >> + >> bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) >> const { >> const MachineFrameInfo *FFI = MF.getFrameInfo(); >> unsigned CFSize = FFI->getMaxCallFrameSize(); >> @@ -128,13 +130,7 @@ >> unsigned LdReg = DestReg; >> if (DestReg == ARM::SP) { >> assert(BaseReg == ARM::SP && "Unexpected!"); >> - if (FrameIndexVirtualScavenging) { >> - LdReg = MF.getRegInfo().createVirtualRegister >> (ARM::tGPRRegisterClass); >> - } else { >> - LdReg = ARM::R3; >> - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) >> - .addReg(ARM::R3, RegState::Kill); >> - } >> + LdReg = MF.getRegInfo().createVirtualRegister >> (ARM::tGPRRegisterClass); >> } >> >> if (NumBytes <= 255 && NumBytes >= 0) >> @@ -159,10 +155,6 @@ >> else >> MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); >> AddDefaultPred(MIB); >> - >> - if (!FrameIndexVirtualScavenging && DestReg == ARM::SP) >> - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) >> - .addReg(ARM::R12, RegState::Kill); >> } >> >> /// calcNumMI - Returns the number of instructions required to >> materialize >> @@ -635,7 +627,6 @@ >> else // tLDR has an extra register operand. >> MI.addOperand(MachineOperand::CreateReg(0, false)); >> } else if (Desc.mayStore()) { >> - if (FrameIndexVirtualScavenging) { >> VReg = MF.getRegInfo().createVirtualRegister >> (ARM::tGPRRegisterClass); >> assert (Value && "Frame index virtual allocated, but Value arg >> is NULL!"); >> *Value = Offset; >> @@ -658,52 +649,6 @@ >> MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); >> else // tSTR has an extra register operand. >> MI.addOperand(MachineOperand::CreateReg(0, false)); >> - } else { >> - // FIXME! This is horrific!!! We need register scavenging. >> - // Our temporary workaround has marked r3 unavailable. Of >> course, r3 is >> - // also a ABI register so it's possible that is is the >> register that is >> - // being storing here. If that's the case, we do the >> following: >> - // r12 = r2 >> - // Use r2 to materialize sp + offset >> - // str r3, r2 >> - // r2 = r12 >> - unsigned ValReg = MI.getOperand(0).getReg(); >> - unsigned TmpReg = ARM::R3; >> - bool UseRR = false; >> - if (ValReg == ARM::R3) { >> - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) >> - .addReg(ARM::R2, RegState::Kill); >> - TmpReg = ARM::R2; >> - } >> - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) >> - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) >> - .addReg(ARM::R3, RegState::Kill); >> - if (Opcode == ARM::tSpill) { >> - if (FrameReg == ARM::SP) >> - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, >> - Offset, false, TII, *this, dl); >> - else { >> - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); >> - UseRR = true; >> - } >> - } else >> - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, >> Offset, TII, >> - *this, dl); >> - MI.setDesc(TII.get(ARM::tSTR)); >> - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); >> - if (UseRR) // Use [reg, reg] addrmode. >> - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); >> - else // tSTR has an extra register operand. >> - MI.addOperand(MachineOperand::CreateReg(0, false)); >> - >> - MachineBasicBlock::iterator NII = next(II); >> - if (ValReg == ARM::R3) >> - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) >> - .addReg(ARM::R12, RegState::Kill); >> - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) >> - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) >> - .addReg(ARM::R12, RegState::Kill); >> - } >> } else >> assert(false && "Unexpected opcode!"); >> >> >> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff >> >> = >> = >> = >> = >> = >> = >> = >> = >> = >> ===================================================================== >> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) >> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 >> 20:46:59 2009 >> @@ -41,6 +41,7 @@ >> getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) >> const; >> >> bool requiresRegisterScavenging(const MachineFunction &MF) const; >> + bool requiresFrameIndexScavenging(const MachineFunction &MF) >> const; >> >> bool hasReservedCallFrame(MachineFunction &MF) const; >> >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From sabre at nondot.org Thu Oct 8 01:27:53 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 08 Oct 2009 06:27:53 -0000 Subject: [llvm-commits] [llvm] r83530 - /llvm/trunk/docs/ReleaseNotes-2.6.html Message-ID: <200910080627.n986RrqW022054@zion.cs.uiuc.edu> Author: lattner Date: Thu Oct 8 01:27:53 2009 New Revision: 83530 URL: http://llvm.org/viewvc/llvm-project?rev=83530&view=rev Log: checkpoint, this is still not comprehendible. Modified: llvm/trunk/docs/ReleaseNotes-2.6.html Modified: llvm/trunk/docs/ReleaseNotes-2.6.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/ReleaseNotes-2.6.html?rev=83530&r1=83529&r2=83530&view=diff ============================================================================== --- llvm/trunk/docs/ReleaseNotes-2.6.html (original) +++ llvm/trunk/docs/ReleaseNotes-2.6.html Thu Oct 8 01:27:53 2009 @@ -52,26 +52,8 @@ - Many new papers added to /pubs/ - Machine LICM, hoists things like constant pool loads, loads from readonly stubs, vector constant synthesization code, etc. - Machine Sinking - Regalloc improvements for commuting, various spiller peephole optimizations, cross-class coalescing. - Support for debug line numbers when optimization enabled - gold lto plugin - - target-specific intrinsics (r63765) - llc -enable-value-prop, propagation of value info (sign/zero ext info) from one MBB to another - interpreter + libffi - LLVMContext - Preliminary support for addrspace 256 -> GS, 257 -> FS, known problems: CodeGenerator.html#x86_memory - -Add support for the PowerPC 64-bit SVR4 ABI. - - NSW/NUW/exact div - Inbounds for GEP - SRoA improvements for vector unions, memset, arbitrary weird bitfield accesses etc. It now produces "strange" sized integers. - pre-alloc splitter?? - X86: Support for softfloat modes, typically used by OS kernels. + + MC: MCSection, MCAsmInfo MCInstPrinter did it make it in? @@ -83,94 +65,23 @@ ELF Writer? How stable? - LSR promotes int induction variables to 64-bit on 64-bit targets, major perf boost for numerical code. - LSR now analyzes pointer expressions (e.g. getelementptrs), not just integers. - Stack slot coloring for register spills (denser stack frames) - SelectionDAGS: New BuildVectorSDNode (r65296), and ISD::VECTOR_SHUFFLE (r69952 / PR2957) - New PrettyStackTrace, crashes of llvm tools should give some indication of what the compiler was doing at the time of the crash (e.g. running a pass), and print out command line arguments. - new linkage types linkonce_odr, weak_odr, linker_private, and available_externally. - Inliner reuse stack space when inlining arrays? - - Regalloc hints for allocation stuff: Evan r73381/r73671. Finished/enabled? - API Cleanup: - no use of hash_set/hash_map, no more llvm::OStream - Use raw_ostream for everything, killed off llvm/Streams.h and DOUT - - Mips now supports O32 Calling Convention - - StringRef class, Twine class. - New BlackFin backend. - +APIs: + Shrink wrapping support in PEI, what is the state of it? - X86-64: better modeling of implicit zero extensions, eliminates a lot of redundant zexts - X86-64 TLS support for local exec and initial exec. - X86 - Better modeling of H registerts as subregs. - Getelementpr instruction now allows any integer type for array/pointer indexes. - - include/llvm/Analysis/LiveValues.h => dead?? - lib/Analysis/LoopVR.cpp ==> dead?? - include/llvm/CodeGen/LazyLiveness.h ==> dead? - lib/Transforms/IPO/MergeFunctions.cpp ==> dead? - llvm/Analysis/PointerTracking.h ==> dead?? - PRedSimplify -> Ask vmkit if it is still useful and for testcases. - - ARM calling convention code is now tblgen generated instead of manual. - ARM: NEON support. neonfp for doing single precision fp with neon instead of VFP. - Tblgen now supports multiclass inheritance. - Unladen swallow as user? - Loop index split disabled by default? - - New WeakVH and AssertingVH and CallbackVH classes. - New llvm/ADT/Triple class. - Removed the IA64 backend. - Profile info improvements by Andreas Neustifter. - PostRA scheduler improvements David Goodwin. + - New MSP430 and SystemZ backends. - llvm-gcc now supports a new TCE target. - klee web page at klee.llvm.org - New llvm/System/Atomic.h, llvm/System/RWMutex.h for portable atomic ops, rw locks. - llvm_start_multithreaded: ProgrammersMAnual.html#threading - - Tablegen now supports a number of new string and list operations like - !(subst), !(foreach), !car, !cdr, !null, !if, !cast. - New fadd, fsub, fmul instructions and classes. - New MachineVerifier pass. - Enabled GVN Load PRE. - ARM AAPCS-VFP hard float ABI is supported. - LLVM build now builds all libraries as .a files instead of some libraries as relinked .o files. This requires some APIs like InitializeAllTargets.h. TargetRegistry! - - ARM Thumb2 support: status? - CBE status: not part of the release criteria. - - New SourceMgr, SMLoc classes for simple parsers with caret diagnostics and #include support, (used by - tablegen, llvm-mc, the .ll parser, FileCheck, etc) - FileCheck! + CHECK-NEXT - New compiler-rt project. - New Static Single Information (SSI) construction pass (not used by anything yet, experimental). - llvm_report_error() error handling API (llvm/Support/ErrorHandling.h) - - x86: Vector icmp/fcmp now work with SSE codegen. - X86: all global variable reference logic is now in ClassifyGlobalReference. - JIT support for oprofile (r75279), configure with --with-oprofile. Now we get line # and function info for JIT'd functions. - Mention gcc plugin. - - New EngineBuilder class for creating JITs: r76276 Reid Kleckner - - -asm-verbose now prints location info (with -g) and loop nest info. - JIT now supports generating more than 16M of code. - -removed the BigBlock register allocator, it had bitrotted. -Target intrinsics can now return multiple results. - - SSE 4.2 support. - Ada bindings for LLVM IR. - Many extensions to the C APIs. + @@ -406,6 +321,7 @@ minor improvements. Some of the major improvements and new features are listed in this section.

+ @@ -418,7 +334,15 @@

LLVM 2.6 includes several major new capabilities:

    -
  • Something wonderful!
  • +
  • Support for debug line numbers when optimization enabled
  • +
  • gold lto plugin
  • +
  • New MSP430 and SystemZ backends.
  • +
  • New BlackFin backend.
  • +
  • LLVMContext, llvm_start_multithreaded: ProgrammersManual.html#threading
  • +
  • Unladen swallow as user?
  • +
  • klee web page at klee.llvm.org
  • +
  • FileCheck
  • +
  • New compiler-rt project.
  • LLVM 2.6 includes a brand new experimental LLVM bindings to the Ada2005 programming language.
@@ -453,7 +377,14 @@ can be useful if you are writing a front-end for LLVM:

    -
  • Something wonderful!
  • +
  • Getelementpr instruction now allows any integer type for array/pointer indexes.
  • +
  • Inbounds for GEP
  • +
  • NSW/NUW/exact div
  • +
  • LSR promotes int induction variables to 64-bit on 64-bit targets, major perf boost for numerical code.
  • +
  • LSR now analyzes pointer expressions (e.g. getelementptrs), not just integers.
  • +
  • new linkage types linkonce_odr, weak_odr, linker_private, and available_externally.
  • +
  • New fadd, fsub, fmul instructions and classes.
  • +
  • Target intrinsics can now return multiple results.
@@ -470,7 +401,11 @@
    -
  • Something wonderful!
  • +
  • SRoA improvements for vector unions, memset, arbitrary weird bitfield accesses etc. It now produces "strange" sized integers.
  • +
  • Inliner reuse stack space when inlining arrays?
  • +
  • Enabled GVN Load PRE.
  • +
  • New Static Single Information (SSI) construction pass (not used by anything yet, experimental).
  • +
@@ -489,7 +424,22 @@
    -
  • Something wonderful!
  • +
  • -asm-verbose now prints location info (with -g) and loop nest info.
  • +
  • Tblgen now supports multiclass inheritance and a number of new string and + list operations like !(subst), !(foreach), !car, !cdr, !null, !if, !cast. + These make the .td files more expressive and allow more aggressive factoring + of duplication across instruction patterns.
  • +
  • New MachineVerifier pass.
  • +
  • Machine LICM, hoists things like constant pool loads, loads from readonly stubs, vector constant synthesization code, etc.
  • +
  • Machine Sinking
  • +
  • target-specific intrinsics (r63765)
  • +
  • Regalloc improvements for commuting, various spiller peephole optimizations, cross-class coalescing.
  • +
  • llc -enable-value-prop, propagation of value info (sign/zero ext info) from one MBB to another
  • +
  • Regalloc hints for allocation stuff: Evan r73381/r73671. Finished/enabled?
  • +
  • Stack slot coloring for register spills (denser stack frames)
  • +
  • SelectionDAGS: New BuildVectorSDNode (r65296), and ISD::VECTOR_SHUFFLE (r69952 / PR2957)
  • +
  • PostRA scheduler improvements David Goodwin.
  • +
@@ -504,7 +454,16 @@
    -
  • Something wonderful!
  • +
  • Preliminary support for addrspace 256 -> GS, 257 -> FS, known problems: CodeGenerator.html#x86_memory
  • +
  • Support for softfloat modes, typically used by OS kernels.
  • + +
  • X86-64: better modeling of implicit zero extensions, eliminates a lot of redundant zexts
  • +
  • X86-64 TLS support for local exec and initial exec.
  • +
  • Better modeling of H registerts as subregs.
  • +
  • Vector icmp/fcmp now work with SSE codegen.
  • +
  • SSE 4.2 support.
  • +
  • all global variable reference logic is now in ClassifyGlobalReference.
  • +
@@ -519,18 +478,18 @@

    -
  • Something wonderful!
  • +
  • Support for floating-point, indirect function calls, and + passing/returning aggregate types to functions. +
  • The code generator is able to generate debug info into output COFF files. +
  • Support for placing an object into a specific section or at a specific + address in memory.

Things not yet supported:

    -
  • Floating point.
  • -
  • Passing/returning aggregate types to and from functions.
  • Variable arguments.
  • -
  • Indirect function calls.
  • Interrupts/programs.
  • -
  • Debug info.
@@ -554,21 +513,74 @@ and subject to change. The Neon intrinsics, in particular, may change in future releases of LLVM. + + ARM AAPCS-VFP hard float ABI is supported. + ARM calling convention code is now tblgen generated instead of manual. + ARM: NEON support. neonfp for doing single precision fp with neon instead of VFP. + + + + +
+

New features of other targets include: +

+ +
    +
  • Add support for the PowerPC 64-bit SVR4 ABI.
  • +
  • Mips now supports O32 Calling Convention.
  • +
+ +
-

New features include:

    -
  • Something wonderful!
  • +
  • New EngineBuilder class for creating JITs: r76276
  • + New PrettyStackTrace, crashes of llvm tools should give some indication of what the compiler was doing at the time of the crash (e.g. running a pass), and print out command line arguments. + StringRef class, Twine class. + New WeakVH and AssertingVH and CallbackVH classes. + New llvm/ADT/Triple class. + llvm_report_error() error handling API (llvm/Support/ErrorHandling.h) + New llvm/System/Atomic.h, llvm/System/RWMutex.h for portable atomic ops, rw locks. + New SourceMgr, SMLoc classes for simple parsers with caret diagnostics and #include support, (used by + tablegen, llvm-mc, the .ll parser, FileCheck, etc) + +
      + + +
+ + + + +
+

Other miscellaneous features include:

+ +
    +
  • interpreter + libffi
  • +
  • JIT now supports generating more than 16M of code.
  • +
  • Users can now register +a JITEventListener +to receive callbacks when the JIT emits or frees machine code. The +OProfile support uses this mechanism.
  • + JIT support for oprofile (r75279), configure with --with-oprofile. Now we get line # and function info for JIT'd functions. + +
  • Profile info improvements by Andreas Neustifter.
  • +
  • Many extensions to the C APIs.
@@ -587,14 +599,23 @@
    -
  • Something horrible!
  • - +
  • The Itanium (IA64) backend has been removed. It was not supported and + bitrotted.
  • +
  • The BigBlock register allocator has been removed, it also bitrotted.
  • +
  • The C Backend (-march=c) is no longer considered part of the LLVM release +criteria. We still want it to work, but no one is maintaining it and it lacks +support for arbitrary precision integers and other important IR features.

In addition, many APIs have changed in this release. Some of the major LLVM API changes are:

+ + API Cleanup: + no use of hash_set/hash_map, no more llvm::OStream + Use raw_ostream for everything, killed off llvm/Streams.h and DOUT +
  • LLVM's global uniquing tables for Types and Constants have been privatized into members of an LLVMContext. A number of APIs @@ -724,7 +745,7 @@ See: Broken versions of GCC and other tools. However, A Modern GCC Build for x86/x64 has been made available from the third party AuroraUX Project -that has been meticulously tested for bootstrapping LLVM & Clang.
  • +that has been meticulously tested for bootstrapping LLVM & Clang.
@@ -809,9 +830,6 @@ results (PR1388).
  • Compilation for ARM Linux OABI (old ABI) is supported but not fully tested.
  • -
  • There is a bug in QEMU-ARM (<= 0.9.0) which causes it to incorrectly - execute -programs compiled with LLVM. Please use more recent versions of QEMU.
  • From clattner at apple.com Thu Oct 8 01:38:56 2009 From: clattner at apple.com (Chris Lattner) Date: Wed, 7 Oct 2009 23:38:56 -0700 Subject: [llvm-commits] [llvm] r83529 - /llvm/trunk/tools/llvmc/plugins/Base/Base.td.in In-Reply-To: <200910080603.n9863cC9019037@zion.cs.uiuc.edu> References: <200910080603.n9863cC9019037@zion.cs.uiuc.edu> Message-ID: On Oct 7, 2009, at 11:03 PM, Mikhail Glushenkov wrote: > Author: foldr > Date: Thu Oct 8 01:03:38 2009 > New Revision: 83529 > > URL: http://llvm.org/viewvc/llvm-project?rev=83529&view=rev > Log: > Unbreak the build. > > Forgot about the need to reconfigure after modifying Base.td.in.... Hi Mikhail, When I updated, the build did not detect that it needed to reconfigure the tree. Can you see why this isn't happening like it does for the .in files in llvm/include? -Chris > > Modified: > llvm/trunk/tools/llvmc/plugins/Base/Base.td.in > > Modified: llvm/trunk/tools/llvmc/plugins/Base/Base.td.in > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvmc/plugins/Base/Base.td.in?rev=83529&r1=83528&r2=83529&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/tools/llvmc/plugins/Base/Base.td.in (original) > +++ llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Thu Oct 8 > 01:03:38 2009 > @@ -85,9 +85,9 @@ > (and (switch_on "emit-llvm"), (switch_on "c")), > (stop_compilation), > (switch_on "fsyntax-only"), (stop_compilation), > (not_empty "include"), (forward "include"), > - (not_empty "I"), (forward "I")), > - (not_empty "D"), (forward "D")), > - (not_empty "MF"), (forward "MF")), > + (not_empty "I"), (forward "I"), > + (not_empty "D"), (forward "D"), > + (not_empty "MF"), (forward "MF"), > (not_empty "MT"), (forward "MT"))), > (sink) > ]>; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From sabre at nondot.org Thu Oct 8 01:42:44 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 08 Oct 2009 06:42:44 -0000 Subject: [llvm-commits] [llvm] r83531 - in /llvm/trunk: include/llvm/Analysis/LoopVR.h include/llvm/LinkAllPasses.h lib/Analysis/LoopVR.cpp Message-ID: <200910080642.n986givv023912@zion.cs.uiuc.edu> Author: lattner Date: Thu Oct 8 01:42:44 2009 New Revision: 83531 URL: http://llvm.org/viewvc/llvm-project?rev=83531&view=rev Log: remove LoopVR pass. According to Nick: "LoopVR's logic was copied into ScalarEvolution::getUnsignedRange and ::getSignedRange. Please delete LoopVR." Removed: llvm/trunk/include/llvm/Analysis/LoopVR.h llvm/trunk/lib/Analysis/LoopVR.cpp Modified: llvm/trunk/include/llvm/LinkAllPasses.h Removed: llvm/trunk/include/llvm/Analysis/LoopVR.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopVR.h?rev=83530&view=auto ============================================================================== --- llvm/trunk/include/llvm/Analysis/LoopVR.h (original) +++ llvm/trunk/include/llvm/Analysis/LoopVR.h (removed) @@ -1,85 +0,0 @@ -//===- LoopVR.cpp - Value Range analysis driven by loop information -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the loop-driven value range pass. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_LOOPVR_H -#define LLVM_ANALYSIS_LOOPVR_H - -#include "llvm/Pass.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Support/ConstantRange.h" -#include - -namespace llvm { - -/// LoopVR - This class maintains a mapping of Values to ConstantRanges. -/// There are interfaces to look up and update ranges by value, and for -/// accessing all values with range information. -/// -class LoopVR : public FunctionPass { -public: - static char ID; // Class identification, replacement for typeinfo - - LoopVR() : FunctionPass(&ID) {} - - bool runOnFunction(Function &F); - virtual void print(raw_ostream &os, const Module *) const; - void releaseMemory(); - - void getAnalysisUsage(AnalysisUsage &AU) const; - - //===--------------------------------------------------------------------- - // Methods that are used to look up and update particular values. - - /// get - return the ConstantRange for a given Value of IntegerType. - ConstantRange get(Value *V); - - /// remove - remove a value from this analysis. - void remove(Value *V); - - /// narrow - improve our unterstanding of a Value by pointing out that it - /// must fall within ConstantRange. To replace a range, remove it first. - void narrow(Value *V, const ConstantRange &CR); - - //===--------------------------------------------------------------------- - // Methods that are used to iterate across all values with information. - - /// size - returns the number of Values with information - unsigned size() const { return Map.size(); } - - typedef std::map::iterator iterator; - - /// begin - return an iterator to the first Value, ConstantRange pair - iterator begin() { return Map.begin(); } - - /// end - return an iterator one past the last Value, ConstantRange pair - iterator end() { return Map.end(); } - - /// getValue - return the Value referenced by an iterator - Value *getValue(iterator I) { return I->first; } - - /// getConstantRange - return the ConstantRange referenced by an iterator - ConstantRange getConstantRange(iterator I) { return *I->second; } - -private: - ConstantRange compute(Value *V); - - ConstantRange getRange(const SCEV *S, Loop *L, ScalarEvolution &SE); - - ConstantRange getRange(const SCEV *S, const SCEV *T, ScalarEvolution &SE); - - std::map Map; -}; - -} // end llvm namespace - -#endif Modified: llvm/trunk/include/llvm/LinkAllPasses.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LinkAllPasses.h?rev=83531&r1=83530&r2=83531&view=diff ============================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h (original) +++ llvm/trunk/include/llvm/LinkAllPasses.h Thu Oct 8 01:42:44 2009 @@ -18,7 +18,6 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/IntervalPartition.h" -#include "llvm/Analysis/LoopVR.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PointerTracking.h" #include "llvm/Analysis/PostDominators.h" @@ -139,7 +138,6 @@ (void)new llvm::IntervalPartition(); (void)new llvm::FindUsedTypes(); (void)new llvm::ScalarEvolution(); - (void)new llvm::LoopVR(); (void)new llvm::PointerTracking(); ((llvm::Function*)0)->viewCFGOnly(); llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0); Removed: llvm/trunk/lib/Analysis/LoopVR.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopVR.cpp?rev=83530&view=auto ============================================================================== --- llvm/trunk/lib/Analysis/LoopVR.cpp (original) +++ llvm/trunk/lib/Analysis/LoopVR.cpp (removed) @@ -1,297 +0,0 @@ -//===- LoopVR.cpp - Value Range analysis driven by loop information -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// FIXME: What does this do? -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "loopvr" -#include "llvm/Analysis/LoopVR.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -char LoopVR::ID = 0; -static RegisterPass X("loopvr", "Loop Value Ranges", false, true); - -/// getRange - determine the range for a particular SCEV within a given Loop -ConstantRange LoopVR::getRange(const SCEV *S, Loop *L, ScalarEvolution &SE) { - const SCEV *T = SE.getBackedgeTakenCount(L); - if (isa(T)) - return ConstantRange(cast(S->getType())->getBitWidth(), true); - - T = SE.getTruncateOrZeroExtend(T, S->getType()); - return getRange(S, T, SE); -} - -/// getRange - determine the range for a particular SCEV with a given trip count -ConstantRange LoopVR::getRange(const SCEV *S, const SCEV *T, ScalarEvolution &SE){ - - if (const SCEVConstant *C = dyn_cast(S)) - return ConstantRange(C->getValue()->getValue()); - - ConstantRange FullSet(cast(S->getType())->getBitWidth(), true); - - // {x,+,y,+,...z}. We detect overflow by checking the size of the set after - // summing the upper and lower. - if (const SCEVAddExpr *Add = dyn_cast(S)) { - ConstantRange X = getRange(Add->getOperand(0), T, SE); - if (X.isFullSet()) return FullSet; - for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) { - ConstantRange Y = getRange(Add->getOperand(i), T, SE); - if (Y.isFullSet()) return FullSet; - - APInt Spread_X = X.getSetSize(), Spread_Y = Y.getSetSize(); - APInt NewLower = X.getLower() + Y.getLower(); - APInt NewUpper = X.getUpper() + Y.getUpper() - 1; - if (NewLower == NewUpper) - return FullSet; - - X = ConstantRange(NewLower, NewUpper); - if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y)) - return FullSet; // we've wrapped, therefore, full set. - } - return X; - } - - // {x,*,y,*,...,z}. In order to detect overflow, we use k*bitwidth where - // k is the number of terms being multiplied. - if (const SCEVMulExpr *Mul = dyn_cast(S)) { - ConstantRange X = getRange(Mul->getOperand(0), T, SE); - if (X.isFullSet()) return FullSet; - - const IntegerType *Ty = IntegerType::get(SE.getContext(), X.getBitWidth()); - const IntegerType *ExTy = IntegerType::get(SE.getContext(), - X.getBitWidth() * Mul->getNumOperands()); - ConstantRange XExt = X.zeroExtend(ExTy->getBitWidth()); - - for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) { - ConstantRange Y = getRange(Mul->getOperand(i), T, SE); - if (Y.isFullSet()) return FullSet; - - ConstantRange YExt = Y.zeroExtend(ExTy->getBitWidth()); - XExt = ConstantRange(XExt.getLower() * YExt.getLower(), - ((XExt.getUpper()-1) * (YExt.getUpper()-1)) + 1); - } - return XExt.truncate(Ty->getBitWidth()); - } - - // X smax Y smax ... Z is: range(smax(X_smin, Y_smin, ..., Z_smin), - // smax(X_smax, Y_smax, ..., Z_smax)) - // It doesn't matter if one of the SCEVs has FullSet because we're taking - // a maximum of the minimums across all of them. - if (const SCEVSMaxExpr *SMax = dyn_cast(S)) { - ConstantRange X = getRange(SMax->getOperand(0), T, SE); - if (X.isFullSet()) return FullSet; - - APInt smin = X.getSignedMin(), smax = X.getSignedMax(); - for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) { - ConstantRange Y = getRange(SMax->getOperand(i), T, SE); - smin = APIntOps::smax(smin, Y.getSignedMin()); - smax = APIntOps::smax(smax, Y.getSignedMax()); - } - if (smax + 1 == smin) return FullSet; - return ConstantRange(smin, smax + 1); - } - - // X umax Y umax ... Z is: range(umax(X_umin, Y_umin, ..., Z_umin), - // umax(X_umax, Y_umax, ..., Z_umax)) - // It doesn't matter if one of the SCEVs has FullSet because we're taking - // a maximum of the minimums across all of them. - if (const SCEVUMaxExpr *UMax = dyn_cast(S)) { - ConstantRange X = getRange(UMax->getOperand(0), T, SE); - if (X.isFullSet()) return FullSet; - - APInt umin = X.getUnsignedMin(), umax = X.getUnsignedMax(); - for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) { - ConstantRange Y = getRange(UMax->getOperand(i), T, SE); - umin = APIntOps::umax(umin, Y.getUnsignedMin()); - umax = APIntOps::umax(umax, Y.getUnsignedMax()); - } - if (umax + 1 == umin) return FullSet; - return ConstantRange(umin, umax + 1); - } - - // L udiv R. Luckily, there's only ever 2 sides to a udiv. - if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { - ConstantRange L = getRange(UDiv->getLHS(), T, SE); - ConstantRange R = getRange(UDiv->getRHS(), T, SE); - if (L.isFullSet() && R.isFullSet()) return FullSet; - - if (R.getUnsignedMax() == 0) { - // RHS must be single-element zero. Return an empty set. - return ConstantRange(R.getBitWidth(), false); - } - - APInt Lower = L.getUnsignedMin().udiv(R.getUnsignedMax()); - - APInt Upper; - - if (R.getUnsignedMin() == 0) { - // Just because it contains zero, doesn't mean it will also contain one. - ConstantRange NotZero(APInt(L.getBitWidth(), 1), - APInt::getNullValue(L.getBitWidth())); - R = R.intersectWith(NotZero); - } - - // But, the intersection might still include zero. If it does, then we know - // it also included one. - if (R.contains(APInt::getNullValue(L.getBitWidth()))) - Upper = L.getUnsignedMax(); - else - Upper = L.getUnsignedMax().udiv(R.getUnsignedMin()); - - return ConstantRange(Lower, Upper); - } - - // ConstantRange already implements the cast operators. - - if (const SCEVZeroExtendExpr *ZExt = dyn_cast(S)) { - T = SE.getTruncateOrZeroExtend(T, ZExt->getOperand()->getType()); - ConstantRange X = getRange(ZExt->getOperand(), T, SE); - return X.zeroExtend(cast(ZExt->getType())->getBitWidth()); - } - - if (const SCEVSignExtendExpr *SExt = dyn_cast(S)) { - T = SE.getTruncateOrZeroExtend(T, SExt->getOperand()->getType()); - ConstantRange X = getRange(SExt->getOperand(), T, SE); - return X.signExtend(cast(SExt->getType())->getBitWidth()); - } - - if (const SCEVTruncateExpr *Trunc = dyn_cast(S)) { - T = SE.getTruncateOrZeroExtend(T, Trunc->getOperand()->getType()); - ConstantRange X = getRange(Trunc->getOperand(), T, SE); - if (X.isFullSet()) return FullSet; - return X.truncate(cast(Trunc->getType())->getBitWidth()); - } - - if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { - const SCEVConstant *Trip = dyn_cast(T); - if (!Trip) return FullSet; - - if (AddRec->isAffine()) { - const SCEV *StartHandle = AddRec->getStart(); - const SCEV *StepHandle = AddRec->getOperand(1); - - const SCEVConstant *Step = dyn_cast(StepHandle); - if (!Step) return FullSet; - - uint32_t ExWidth = 2 * Trip->getValue()->getBitWidth(); - APInt TripExt = Trip->getValue()->getValue(); TripExt.zext(ExWidth); - APInt StepExt = Step->getValue()->getValue(); StepExt.zext(ExWidth); - if ((TripExt * StepExt).ugt(APInt::getLowBitsSet(ExWidth, ExWidth >> 1))) - return FullSet; - - const SCEV *EndHandle = SE.getAddExpr(StartHandle, - SE.getMulExpr(T, StepHandle)); - const SCEVConstant *Start = dyn_cast(StartHandle); - const SCEVConstant *End = dyn_cast(EndHandle); - if (!Start || !End) return FullSet; - - const APInt &StartInt = Start->getValue()->getValue(); - const APInt &EndInt = End->getValue()->getValue(); - const APInt &StepInt = Step->getValue()->getValue(); - - if (StepInt.isNegative()) { - if (EndInt == StartInt + 1) return FullSet; - return ConstantRange(EndInt, StartInt + 1); - } else { - if (StartInt == EndInt + 1) return FullSet; - return ConstantRange(StartInt, EndInt + 1); - } - } - } - - // TODO: non-affine addrec, udiv, SCEVUnknown (narrowed from elsewhere)? - - return FullSet; -} - -void LoopVR::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive(); - AU.addRequiredTransitive(); - AU.setPreservesAll(); -} - -bool LoopVR::runOnFunction(Function &F) { Map.clear(); return false; } - -void LoopVR::print(raw_ostream &OS, const Module *) const { - for (std::map::const_iterator I = Map.begin(), - E = Map.end(); I != E; ++I) { - OS << *I->first << ": " << *I->second << '\n'; - } -} - -void LoopVR::releaseMemory() { - for (std::map::iterator I = Map.begin(), - E = Map.end(); I != E; ++I) { - delete I->second; - } - - Map.clear(); -} - -ConstantRange LoopVR::compute(Value *V) { - if (ConstantInt *CI = dyn_cast(V)) - return ConstantRange(CI->getValue()); - - Instruction *I = dyn_cast(V); - if (!I) - return ConstantRange(cast(V->getType())->getBitWidth(), false); - - LoopInfo &LI = getAnalysis(); - - Loop *L = LI.getLoopFor(I->getParent()); - if (!L || L->isLoopInvariant(I)) - return ConstantRange(cast(V->getType())->getBitWidth(), false); - - ScalarEvolution &SE = getAnalysis(); - - const SCEV *S = SE.getSCEV(I); - if (isa(S) || isa(S)) - return ConstantRange(cast(V->getType())->getBitWidth(), false); - - return ConstantRange(getRange(S, L, SE)); -} - -ConstantRange LoopVR::get(Value *V) { - std::map::iterator I = Map.find(V); - if (I == Map.end()) { - ConstantRange *CR = new ConstantRange(compute(V)); - Map[V] = CR; - return *CR; - } - - return *I->second; -} - -void LoopVR::remove(Value *V) { - std::map::iterator I = Map.find(V); - if (I != Map.end()) { - delete I->second; - Map.erase(I); - } -} - -void LoopVR::narrow(Value *V, const ConstantRange &CR) { - if (CR.isFullSet()) return; - - std::map::iterator I = Map.find(V); - if (I == Map.end()) - Map[V] = new ConstantRange(CR); - else - Map[V] = new ConstantRange(Map[V]->intersectWith(CR)); -} From sabre at nondot.org Thu Oct 8 02:01:46 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 08 Oct 2009 07:01:46 -0000 Subject: [llvm-commits] [llvm] r83532 - /llvm/trunk/docs/ReleaseNotes-2.6.html Message-ID: <200910080701.n9871kaN026316@zion.cs.uiuc.edu> Author: lattner Date: Thu Oct 8 02:01:46 2009 New Revision: 83532 URL: http://llvm.org/viewvc/llvm-project?rev=83532&view=rev Log: all content split into sections, still much work to be done. Modified: llvm/trunk/docs/ReleaseNotes-2.6.html Modified: llvm/trunk/docs/ReleaseNotes-2.6.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/ReleaseNotes-2.6.html?rev=83532&r1=83531&r2=83532&view=diff ============================================================================== --- llvm/trunk/docs/ReleaseNotes-2.6.html (original) +++ llvm/trunk/docs/ReleaseNotes-2.6.html Thu Oct 8 02:01:46 2009 @@ -51,45 +51,26 @@ releases page.

    - - - - MC: - MCSection, MCAsmInfo - MCInstPrinter did it make it in? - MCInst (X86 using it so far) - Rewrite of X86 GV selection logic: TargetOperand flags on ExternalSymbol, GV, etc operands. - Can parse and re-print out an darwin-x86 .s file. - TargetLoweringObjectFile, MCSectionKind - Verrrry early start of a macho writer. - - ELF Writer? How stable? - -APIs: - - Shrink wrapping support in PEI, what is the state of it? + - LLVM build now builds all libraries as .a files instead of some - libraries as relinked .o files. This requires some APIs like - InitializeAllTargets.h. TargetRegistry! - - + + +
    +

    +UPDATE! +blah + + MC: + MCSection, MCAsmInfo + MCInstPrinter did it make it in? + MCInst (X86 using it so far) + Rewrite of X86 GV selection logic: TargetOperand flags on ExternalSymbol, GV, etc operands. + Can parse and re-print out an darwin-x86 .s file. + TargetLoweringObjectFile, MCSectionKind + Verrrry early start of a macho writer. +

    + +
    + + + + + +
    +

    Rubinius is an environment +for running Ruby code which strives to write as much of the core class +implementation in Ruby as possible. Combined with a bytecode interpreting VM, it +uses LLVM to optimize and compile ruby code down to machine code. Techniques +such as type feedback, method inlining, and uncommon traps are all used to +remove dynamism from ruby execution and increase performance.

    +
    @@ -236,6 +254,7 @@

    +UPDATE! Pure is an algebraic/functional programming language based on term rewriting. Programs are collections of equations which are used to evaluate expressions in @@ -262,6 +281,7 @@

    +UPDATE! LDC is an implementation of the D Programming Language using the LLVM optimizer and code generator. The LDC project works great with the LLVM 2.6 release. General improvements in @@ -279,7 +299,9 @@

    -

    Roadsend PHP (rphp) is an open +

    +UPDATE! +Roadsend PHP (rphp) is an open source implementation of the PHP programming language that uses LLVM for its optimizer, JIT, and static compiler. This is a reimplementation of an earlier project that is now based on LLVM.

    @@ -291,22 +313,14 @@
    -

    Unladen Swallow is a +

    +UPDATE! +Unladen Swallow is a branch of Python intended to be fully compatible and significantly faster. It uses LLVM's optimization passes and JIT compiler.

    - - - -
    -

    Rubinius is a new virtual -machine for Ruby. It leverages LLVM to dynamically compile Ruby code down to -machine code using LLVM's JIT.

    -
    @@ -339,7 +353,6 @@
  • New MSP430 and SystemZ backends.
  • New BlackFin backend.
  • LLVMContext, llvm_start_multithreaded: ProgrammersManual.html#threading
  • -
  • Unladen swallow as user?
  • klee web page at klee.llvm.org
  • FileCheck
  • New compiler-rt project.
  • @@ -348,25 +361,6 @@
    - - - - -
    - -

    LLVM fully supports the llvm-gcc 4.2 front-end, which marries the GCC -front-ends and driver with the LLVM optimizer and code generator. It currently -includes support for the C, C++, Objective-C, Ada, and Fortran front-ends.

    - -
      -
    • Something wonderful!
    • -
    - -
    - -
    LLVM IR and Core Improvements @@ -438,8 +432,7 @@
  • Regalloc hints for allocation stuff: Evan r73381/r73671. Finished/enabled?
  • Stack slot coloring for register spills (denser stack frames)
  • SelectionDAGS: New BuildVectorSDNode (r65296), and ISD::VECTOR_SHUFFLE (r69952 / PR2957)
  • -
  • PostRA scheduler improvements David Goodwin.
  • - +
  • Experimental support for shrink wrapping support in PEI.
  • @@ -579,8 +572,17 @@ OProfile support uses this mechanism. JIT support for oprofile (r75279), configure with --with-oprofile. Now we get line # and function info for JIT'd functions. -
  • Profile info improvements by Andreas Neustifter.
  • -
  • Many extensions to the C APIs.
  • +
  • Profile info improvements by Andreas Neustifter.
  • +
  • Many extensions to the C APIs.
  • + +
  • LLVMC: + +* Dynamic plugins now work on Windows. +* New option property: init. Makes possible to provide default values for + options defined in plugins (interface to cl::init). +* New example: Skeleton, shows how to create a standalone LLVMC-based driver. +* New example: mcc16, a driver for the PIC16 toolchain.
  • +
    @@ -607,6 +609,12 @@ support for arbitrary precision integers and other important IR features. + LLVM build now builds all libraries as .a files instead of some + libraries as relinked .o files. This requires some APIs like + InitializeAllTargets.h. TargetRegistry! + + +

    In addition, many APIs have changed in this release. Some of the major LLVM API changes are:

    From baldrick at free.fr Thu Oct 8 04:21:49 2009 From: baldrick at free.fr (Duncan Sands) Date: Thu, 08 Oct 2009 11:21:49 +0200 Subject: [llvm-commits] [llvm-gcc-4.2] r83493 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp In-Reply-To: <200910072206.n97M6UTm001718@zion.cs.uiuc.edu> References: <200910072206.n97M6UTm001718@zion.cs.uiuc.edu> Message-ID: <4ACDAF2D.2020900@free.fr> Hi Devang, > Attach debug location info to allocas created for temporaries. how about doing this inside CreateTemporary itself? Ciao, Duncan. From edwintorok at gmail.com Thu Oct 8 04:23:28 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Thu, 08 Oct 2009 12:23:28 +0300 Subject: [llvm-commits] [PATCH] Preserve DebugInfo during LICM Message-ID: <4ACDAF90.1040202@gmail.com> Hi, LICM currently sinks, and deletes debug stoppoints as dead instructions. The attached patch disables sink/hoist of debuginfo (simplifycfg should already knows to ignore debuginfo when checking if it can merge blocks), and also updates the debug info when sinking/hoisting instructions (by copying the stoppoints as needed). Before committing I'll also add a testcase, and run TEST.dbgopt on a part of the test suite. I think that we'd need another TEST.dbgquality that tests whether each basicblock has at least one stoppoint (if original did), and also outputs how many instructions have a stoppoint in same BB, how many have a stoppoint foundable by findStopPointInst, and how many don't have at all. The test would pass if each BB has at least one stoppoint, and output an overall quality for each test, and for the entire testsuite. What do you think? Best regards, --Edwin -------------- next part -------------- A non-text attachment was scrubbed... Name: licm-dbginfo.patch Type: text/x-diff Size: 6792 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091008/18bc4678/attachment.bin From richard at xmos.com Thu Oct 8 10:38:17 2009 From: richard at xmos.com (Richard Osborne) Date: Thu, 08 Oct 2009 15:38:17 -0000 Subject: [llvm-commits] [llvm] r83549 - in /llvm/trunk: lib/Target/XCore/XCoreInstrInfo.td test/CodeGen/XCore/ashr.ll Message-ID: <200910081538.n98FcHhn023340@zion.cs.uiuc.edu> Author: friedgold Date: Thu Oct 8 10:38:17 2009 New Revision: 83549 URL: http://llvm.org/viewvc/llvm-project?rev=83549&view=rev Log: Add some peepholes for signed comparisons using ashr X, X, 32. Added: llvm/trunk/test/CodeGen/XCore/ashr.ll Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td Modified: llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td?rev=83549&r1=83548&r2=83549&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td (original) +++ llvm/trunk/lib/Target/XCore/XCoreInstrInfo.td Thu Oct 8 10:38:17 2009 @@ -975,5 +975,21 @@ def : Pat<(sra GRRegs:$src, 31), (ASHR_l2rus GRRegs:$src, 32)>; +def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst), + (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>; + +// setge X, 0 is canonicalized to setgt X, -1 +def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst), + (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>; + +def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F), + (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>; + +def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F), + (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>; + +def : Pat<(setgt GRRegs:$lhs, -1), + (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>; + def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm), (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>; Added: llvm/trunk/test/CodeGen/XCore/ashr.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/XCore/ashr.ll?rev=83549&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/XCore/ashr.ll (added) +++ llvm/trunk/test/CodeGen/XCore/ashr.ll Thu Oct 8 10:38:17 2009 @@ -0,0 +1,76 @@ +; RUN: llc < %s -march=xcore | FileCheck %s +define i32 @ashr(i32 %a, i32 %b) { + %1 = ashr i32 %a, %b + ret i32 %1 +} +; CHECK: ashr: +; CHECK-NEXT: ashr r0, r0, r1 + +define i32 @ashri1(i32 %a) { + %1 = ashr i32 %a, 24 + ret i32 %1 +} +; CHECK: ashri1: +; CHECK-NEXT: ashr r0, r0, 24 + +define i32 @ashri2(i32 %a) { + %1 = ashr i32 %a, 31 + ret i32 %1 +} +; CHECK: ashri2: +; CHECK-NEXT: ashr r0, r0, 32 + +define i32 @f1(i32 %a) { + %1 = icmp slt i32 %a, 0 + br i1 %1, label %less, label %not_less +less: + ret i32 10 +not_less: + ret i32 17 +} +; CHECK: f1: +; CHECK-NEXT: ashr r0, r0, 32 +; CHECK-NEXT: bf r0 + +define i32 @f2(i32 %a) { + %1 = icmp sge i32 %a, 0 + br i1 %1, label %greater, label %not_greater +greater: + ret i32 10 +not_greater: + ret i32 17 +} +; CHECK: f2: +; CHECK-NEXT: ashr r0, r0, 32 +; CHECK-NEXT: bt r0 + +define i32 @f3(i32 %a) { + %1 = icmp slt i32 %a, 0 + %2 = select i1 %1, i32 10, i32 17 + ret i32 %2 +} +; CHECK: f3: +; CHECK-NEXT: ashr r1, r0, 32 +; CHECK-NEXT: ldc r0, 10 +; CHECK-NEXT: bt r1 +; CHECK: ldc r0, 17 + +define i32 @f4(i32 %a) { + %1 = icmp sge i32 %a, 0 + %2 = select i1 %1, i32 10, i32 17 + ret i32 %2 +} +; CHECK: f4: +; CHECK-NEXT: ashr r1, r0, 32 +; CHECK-NEXT: ldc r0, 17 +; CHECK-NEXT: bt r1 +; CHECK: ldc r0, 10 + +define i32 @f5(i32 %a) { + %1 = icmp sge i32 %a, 0 + %2 = zext i1 %1 to i32 + ret i32 %2 +} +; CHECK: f5: +; CHECK-NEXT: ashr r0, r0, 32 +; CHECK-NEXT: eq r0, r0, 0 From sabre at nondot.org Thu Oct 8 11:01:34 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 08 Oct 2009 16:01:34 -0000 Subject: [llvm-commits] [llvm] r83551 - /llvm/trunk/docs/ReleaseNotes-2.6.html Message-ID: <200910081601.n98G1Yh5026329@zion.cs.uiuc.edu> Author: lattner Date: Thu Oct 8 11:01:33 2009 New Revision: 83551 URL: http://llvm.org/viewvc/llvm-project?rev=83551&view=rev Log: some updates from users of llvm Modified: llvm/trunk/docs/ReleaseNotes-2.6.html Modified: llvm/trunk/docs/ReleaseNotes-2.6.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/ReleaseNotes-2.6.html?rev=83551&r1=83550&r2=83551&view=diff ============================================================================== --- llvm/trunk/docs/ReleaseNotes-2.6.html (original) +++ llvm/trunk/docs/ReleaseNotes-2.6.html Thu Oct 8 11:01:33 2009 @@ -223,6 +223,11 @@ uses LLVM to optimize and compile ruby code down to machine code. Techniques such as type feedback, method inlining, and uncommon traps are all used to remove dynamism from ruby execution and increase performance.

    + +

    Since LLVM 2.5, Rubinius has made several major leaps forward, implementing +a counter based JIT, type feedback, and speculative method inlining. +

    + @@ -254,7 +259,6 @@

    -UPDATE! Pure is an algebraic/functional programming language based on term rewriting. Programs are collections of equations which are used to evaluate expressions in @@ -264,12 +268,8 @@ an easy-to-use C interface. The interpreter uses LLVM as a backend to JIT-compile Pure programs to fast native code.

    -

    In addition to the usual algebraic data structures, Pure also has -MATLAB-style matrices in order to support numeric computations and signal -processing in an efficient way. Pure is mainly aimed at mathematical -applications right now, but it has been designed as a general purpose language. -The dynamic interpreter environment and the C interface make it possible to use -it as a kind of functional scripting language for many application areas. +

    Pure versions 0.31 and later have been tested and are known to work with +LLVM 2.6 (and continue to work with older LLVM releases >= 2.3 as well).

    @@ -300,7 +300,6 @@

    -UPDATE! Roadsend PHP (rphp) is an open source implementation of the PHP programming language that uses LLVM for its optimizer, JIT, and static compiler. This is a From dpatel at apple.com Thu Oct 8 11:23:09 2009 From: dpatel at apple.com (Devang Patel) Date: Thu, 8 Oct 2009 09:23:09 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r83493 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp In-Reply-To: <4ACDAF2D.2020900@free.fr> References: <200910072206.n97M6UTm001718@zion.cs.uiuc.edu> <4ACDAF2D.2020900@free.fr> Message-ID: On Oct 8, 2009, at 2:21 AM, Duncan Sands wrote: > Hi Devang, > >> Attach debug location info to allocas created for temporaries. > > how about doing this inside CreateTemporary itself? The trouble is whether there is a need to have a stoppoint or not. I am investigating a case where gdb trips if there is an extra line table entry! I'll try to simply this as much as possible. - Devang From jyasskin at google.com Thu Oct 8 11:42:58 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Thu, 8 Oct 2009 09:42:58 -0700 Subject: [llvm-commits] [llvm] r83010 - /llvm/trunk/lib/VMCore/AsmWriter.cpp In-Reply-To: References: <200909282056.n8SKu0ca012458@zion.cs.uiuc.edu> Message-ID: On Wed, Oct 7, 2009 at 9:38 AM, Devang Patel wrote: > > On Oct 6, 2009, at 5:44 PM, Jeffrey Yasskin wrote: > > This patch assumes that the Module* parameter to AssemblyWriter() is > non-null, but Value::print() in this file can pass NULL. This crashes if you > run `opt -instcombine -debug-only=instcombine` on any input where > instcombine will replace values. > > Yup. Fixed in r83462. Thanks! From gohman at apple.com Thu Oct 8 12:00:02 2009 From: gohman at apple.com (Dan Gohman) Date: Thu, 08 Oct 2009 17:00:02 -0000 Subject: [llvm-commits] [llvm] r83555 - in /llvm/trunk: include/llvm/Pass.h include/llvm/PassAnalysisSupport.h lib/CodeGen/MachineFunctionPass.cpp lib/Transforms/Scalar/CodeGenLICM.cpp lib/VMCore/Pass.cpp Message-ID: <200910081700.n98H02pl001774@zion.cs.uiuc.edu> Author: djg Date: Thu Oct 8 12:00:02 2009 New Revision: 83555 URL: http://llvm.org/viewvc/llvm-project?rev=83555&view=rev Log: Add a form of addPreserved which takes a string argument, to allow passes to declare that they preserve other passes without needing to pull in additional header file or library dependencies. Convert MachineFunctionPass and CodeGenLICM to make use of this. Modified: llvm/trunk/include/llvm/Pass.h llvm/trunk/include/llvm/PassAnalysisSupport.h llvm/trunk/lib/CodeGen/MachineFunctionPass.cpp llvm/trunk/lib/Transforms/Scalar/CodeGenLICM.cpp llvm/trunk/lib/VMCore/Pass.cpp Modified: llvm/trunk/include/llvm/Pass.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Pass.h?rev=83555&r1=83554&r2=83555&view=diff ============================================================================== --- llvm/trunk/include/llvm/Pass.h (original) +++ llvm/trunk/include/llvm/Pass.h Thu Oct 8 12:00:02 2009 @@ -46,6 +46,7 @@ class AnalysisResolver; class PMDataManager; class raw_ostream; +class StringRef; // AnalysisID - Use the PassInfo to identify a pass... typedef const PassInfo* AnalysisID; @@ -164,6 +165,10 @@ // or null if it is not known. static const PassInfo *lookupPassInfo(intptr_t TI); + // lookupPassInfo - Return the pass info object for the pass with the given + // argument string, or null if it is not known. + static const PassInfo *lookupPassInfo(const StringRef &Arg); + /// getAnalysisIfAvailable() - Subclasses use this function to /// get analysis information that might be around, for example to update it. /// This is different than getAnalysis in that it can fail (if the analysis Modified: llvm/trunk/include/llvm/PassAnalysisSupport.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/PassAnalysisSupport.h?rev=83555&r1=83554&r2=83555&view=diff ============================================================================== --- llvm/trunk/include/llvm/PassAnalysisSupport.h (original) +++ llvm/trunk/include/llvm/PassAnalysisSupport.h Thu Oct 8 12:00:02 2009 @@ -24,6 +24,8 @@ namespace llvm { +class StringRef; + // No need to include Pass.h, we are being included by it! //===----------------------------------------------------------------------===// @@ -79,6 +81,9 @@ return *this; } + // addPreserved - Add the specified Pass class to the set of analyses + // preserved by this pass. + // template AnalysisUsage &addPreserved() { assert(Pass::getClassPassInfo() && "Pass class not registered!"); @@ -86,6 +91,18 @@ return *this; } + // addPreserved - Add the Pass with the specified argument string to the set + // of analyses preserved by this pass. If no such Pass exists, do nothing. + // This can be useful when a pass is trivially preserved, but may not be + // linked in. Be careful about spelling! + // + AnalysisUsage &addPreserved(const StringRef &Arg) { + const PassInfo *PI = Pass::lookupPassInfo(Arg); + // If the pass exists, preserve it. Otherwise silently do nothing. + if (PI) Preserved.push_back(PI); + return *this; + } + // setPreservesAll - Set by analyses that do not transform their input at all void setPreservesAll() { PreservesAll = true; } bool getPreservesAll() const { return PreservesAll; } Modified: llvm/trunk/lib/CodeGen/MachineFunctionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineFunctionPass.cpp?rev=83555&r1=83554&r2=83555&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineFunctionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineFunctionPass.cpp Thu Oct 8 12:00:02 2009 @@ -11,11 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Function.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/IVUsers.h" -#include "llvm/Analysis/LiveValues.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -36,15 +33,18 @@ // MachineFunctionPass preserves all LLVM IR passes, but there's no // high-level way to express this. Instead, just list a bunch of - // passes explicitly. + // passes explicitly. This does not include setPreservesCFG, + // because CodeGen overloads that to mean preserving the MachineBasicBlock + // CFG in addition to the LLVM IR CFG. AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("memdep"); + AU.addPreserved("live-values"); + AU.addPreserved("domtree"); + AU.addPreserved("domfrontier"); + AU.addPreserved("loops"); + AU.addPreserved("lda"); FunctionPass::getAnalysisUsage(AU); } Modified: llvm/trunk/lib/Transforms/Scalar/CodeGenLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CodeGenLICM.cpp?rev=83555&r1=83554&r2=83555&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/CodeGenLICM.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/CodeGenLICM.cpp Thu Oct 8 12:00:02 2009 @@ -22,8 +22,6 @@ #include "llvm/LLVMContext.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/IVUsers.h" #include "llvm/ADT/DenseMap.h" using namespace llvm; @@ -104,8 +102,10 @@ AU.addPreservedID(LoopSimplifyID); AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("lda"); + AU.addPreserved("live-values"); // Hoisting requires a loop preheader. AU.addRequiredID(LoopSimplifyID); Modified: llvm/trunk/lib/VMCore/Pass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Pass.cpp?rev=83555&r1=83554&r2=83555&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Pass.cpp (original) +++ llvm/trunk/lib/VMCore/Pass.cpp Thu Oct 8 12:00:02 2009 @@ -129,6 +129,9 @@ /// pass. typedef std::map MapType; MapType PassInfoMap; + + typedef StringMap StringMapType; + StringMapType PassInfoStringMap; /// AnalysisGroupInfo - Keep track of information for each analysis group. struct AnalysisGroupInfo { @@ -145,10 +148,16 @@ return I != PassInfoMap.end() ? I->second : 0; } + const PassInfo *GetPassInfo(const StringRef &Arg) const { + StringMapType::const_iterator I = PassInfoStringMap.find(Arg); + return I != PassInfoStringMap.end() ? I->second : 0; + } + void RegisterPass(const PassInfo &PI) { bool Inserted = PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted; + PassInfoStringMap[PI.getPassArgument()] = &PI; } void UnregisterPass(const PassInfo &PI) { @@ -157,6 +166,7 @@ // Remove pass from the map. PassInfoMap.erase(I); + PassInfoStringMap.erase(PI.getPassArgument()); } void EnumerateWith(PassRegistrationListener *L) { @@ -227,6 +237,10 @@ return getPassRegistrar()->GetPassInfo(TI); } +const PassInfo *Pass::lookupPassInfo(const StringRef &Arg) { + return getPassRegistrar()->GetPassInfo(Arg); +} + void PassInfo::registerPass() { getPassRegistrar()->RegisterPass(*this); From richard at xmos.com Thu Oct 8 12:14:57 2009 From: richard at xmos.com (Richard Osborne) Date: Thu, 08 Oct 2009 17:14:57 -0000 Subject: [llvm-commits] [llvm] r83556 - /llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp Message-ID: <200910081714.n98HEvVj003705@zion.cs.uiuc.edu> Author: friedgold Date: Thu Oct 8 12:14:57 2009 New Revision: 83556 URL: http://llvm.org/viewvc/llvm-project?rev=83556&view=rev Log: Add missing names for the XCore specific LADD and LSUB nodes. Modified: llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp Modified: llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp?rev=83556&r1=83555&r2=83556&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreISelLowering.cpp Thu Oct 8 12:14:57 2009 @@ -51,6 +51,8 @@ case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper"; case XCoreISD::STWSP : return "XCoreISD::STWSP"; case XCoreISD::RETSP : return "XCoreISD::RETSP"; + case XCoreISD::LADD : return "XCoreISD::LADD"; + case XCoreISD::LSUB : return "XCoreISD::LSUB"; default : return NULL; } } From evan.cheng at apple.com Thu Oct 8 12:59:17 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 8 Oct 2009 10:59:17 -0700 Subject: [llvm-commits] [llvm] r83521 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/CodeGen/PrologEpilogInserter.h lib/Target/ARM/Thumb1RegisterInfo.cpp lib/Target/ARM/Thumb1RegisterInfo.h In-Reply-To: References: <200910080146.n981kxrE019225@zion.cs.uiuc.edu> <51D3B8B5-79BB-4399-BFB0-9C7D6ABCE484@apple.com> Message-ID: <5E52095F-2F0E-4437-AC53-0345654EB461@apple.com> On Oct 7, 2009, at 11:20 PM, Jim Grosbach wrote: > Heya, > > I didn't see any regressions when I ran the singlesource tests by > hand, and the nightly run last night looks good with the scavenging > in for LLCBETA. If there's a problem on tonight's run, it'll be the > register re-use stuff. I wanted to get this in this evening so we'd > have one change at a time per nightly run to make things simpler to > track down if there's problems. For an additional sanity check, I > also built a sample application (unzip) and ran some tests of that > and it worked great (and had better codegen). Ok. In addition to "no new failures", please make sure all of thumb1 tests are passing. The #1 reason for this work is to fix remaining failures that are caused R3 being clobbered. > > Assuming things look good, I have a patch to enable allocation of R3 > ready to go. I held off on that for tonight so as not to change too > many things per nightly run. Similarly, I looked more closely at > getting the ARM and T2 targets to use the new scheme, and I didn't > see anything that will make that non-trivial. > Ok. > Basically, by tomorrow we should have the functionality in place. > Still some cleanup to do to address a few of your comments I haven't > gotten to yet and a few other things I've been keeping notes on that > I want to fix. Ok. I strongly suggest eliminating the call to findLastUseReg() in the scavengeFrameVirtualRegs loop. The other changes I want are listed below. > > FWIW, I looked again at the eliminateFrameIndex() hooks and didn't > see any way around the changes I made there to track the values and > associated virtual registers. We can talk over the specifics > tomorrow. Maybe you'll have some ideas for ways to avoid that stuff. > It'd be very nice to not need to have that bit. Would make it a lot > easier to make other ports use the new stuff and such. Let's talk offline about this. BTW, please fix this compilation warning: PrologEpilogInserter.cpp: In member function 'void llvm::PEI::scavengeFrameVirtualRegs(llvm::MachineFunction&)': PrologEpilogInserter.cpp:770: warning: 'PrevValue' may be used uninitialized in this function Evan > > Regards, > Jim > > On Oct 7, 2009, at 10:38 PM, Evan Cheng wrote: > >> Nice. Are all Thumb1 tests passing? >> >> Evan >> >> On Oct 7, 2009, at 6:46 PM, Jim Grosbach wrote: >> >>> Author: grosbach >>> Date: Wed Oct 7 20:46:59 2009 >>> New Revision: 83521 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=83521&view=rev >>> Log: >>> Re-enable register scavenging in Thumb1 by default. >>> >>> Modified: >>> llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >>> llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >>> llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >>> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >>> llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >>> >>> Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) >>> +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Oct 7 >>> 20:46:59 2009 >>> @@ -561,6 +561,12 @@ >>> return false; >>> } >>> >>> + /// requiresFrameIndexScavenging - returns true if the target >>> requires post >>> + /// PEI scavenging of registers for materializing frame index >>> constants. >>> + virtual bool requiresFrameIndexScavenging(const MachineFunction >>> &MF) const { >>> + return false; >>> + } >>> + >>> /// hasFP - Return true if the specified function should have a >>> dedicated >>> /// frame pointer register. For most targets this is true only if >>> the function >>> /// has variable sized allocas or if frame pointer elimination is >>> disabled. >>> >>> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=83521&r1=83520&r2=83521&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Wed Oct 7 >>> 20:46:59 2009 >>> @@ -44,16 +44,6 @@ >>> static RegisterPass >>> X("prologepilog", "Prologue/Epilogue Insertion"); >>> >>> -// FIXME: For now, the frame index scavenging is off by default >>> and only >>> -// used by the Thumb1 target. When it's the default and replaces >>> the current >>> -// on-the-fly PEI scavenging for all targets, >>> requiresRegisterScavenging() >>> -// will replace this. >>> -cl::opt >>> -FrameIndexVirtualScavenging("enable-frame-index-scavenging", >>> - cl::Hidden, >>> - cl::desc("Enable frame index >>> elimination with" >>> - "virtual register >>> scavenging")); >>> - >>> /// createPrologEpilogCodeInserter - This function returns a pass >>> that inserts >>> /// prolog and epilog code, and eliminates abstract frame >>> references. >>> /// >>> @@ -66,6 +56,7 @@ >>> const Function* F = Fn.getFunction(); >>> const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); >>> RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : >>> NULL; >>> + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging >>> (Fn); >>> >>> // Get MachineModuleInfo so that we can track the construction of >>> the >>> // frame. >>> >>> Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.h?rev=83521&r1=83520&r2=83521&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.h (original) >>> +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.h Wed Oct 7 >>> 20:46:59 2009 >>> @@ -94,6 +94,11 @@ >>> // functions. >>> bool ShrinkWrapThisFunction; >>> >>> + // Flag to control whether to use the register scavenger to >>> resolve >>> + // frame index materialization registers. Set according to >>> + // TRI->requiresFrameIndexScavenging() for the curren function. >>> + bool FrameIndexVirtualScavenging; >>> + >>> // When using the scavenger post-pass to resolve frame reference >>> // materialization registers, maintain a map of the registers to >>> // the constant value and SP adjustment associated with it. >>> >>> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=83521&r1=83520&r2=83521&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp (original) >>> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.cpp Wed Oct 7 >>> 20:46:59 2009 >>> @@ -37,11 +37,6 @@ >>> #include "llvm/Support/raw_ostream.h" >>> using namespace llvm; >>> >>> -// FIXME: This cmd line option conditionalizes the new register >>> scavenging >>> -// implemenation in PEI. Remove the option when scavenging works >>> well enough >>> -// to be the default. >>> -extern cl::opt FrameIndexVirtualScavenging; >>> - >>> Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, >>> const ARMSubtarget &sti) >>> : ARMBaseRegisterInfo(tii, sti) { >>> @@ -84,9 +79,16 @@ >>> >>> bool >>> Thumb1RegisterInfo::requiresRegisterScavenging(const >>> MachineFunction &MF) const { >>> - return FrameIndexVirtualScavenging; >>> + return true; >>> +} >>> + >>> +bool >>> +Thumb1RegisterInfo::requiresFrameIndexScavenging(const >>> MachineFunction &MF) >>> + const { >>> + return true; >>> } >>> >>> + >>> bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) >>> const { >>> const MachineFrameInfo *FFI = MF.getFrameInfo(); >>> unsigned CFSize = FFI->getMaxCallFrameSize(); >>> @@ -128,13 +130,7 @@ >>> unsigned LdReg = DestReg; >>> if (DestReg == ARM::SP) { >>> assert(BaseReg == ARM::SP && "Unexpected!"); >>> - if (FrameIndexVirtualScavenging) { >>> - LdReg = MF.getRegInfo().createVirtualRegister >>> (ARM::tGPRRegisterClass); >>> - } else { >>> - LdReg = ARM::R3; >>> - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), >>> ARM::R12) >>> - .addReg(ARM::R3, RegState::Kill); >>> - } >>> + LdReg = MF.getRegInfo().createVirtualRegister >>> (ARM::tGPRRegisterClass); >>> } >>> >>> if (NumBytes <= 255 && NumBytes >= 0) >>> @@ -159,10 +155,6 @@ >>> else >>> MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); >>> AddDefaultPred(MIB); >>> - >>> - if (!FrameIndexVirtualScavenging && DestReg == ARM::SP) >>> - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) >>> - .addReg(ARM::R12, RegState::Kill); >>> } >>> >>> /// calcNumMI - Returns the number of instructions required to >>> materialize >>> @@ -635,7 +627,6 @@ >>> else // tLDR has an extra register operand. >>> MI.addOperand(MachineOperand::CreateReg(0, false)); >>> } else if (Desc.mayStore()) { >>> - if (FrameIndexVirtualScavenging) { >>> VReg = MF.getRegInfo().createVirtualRegister >>> (ARM::tGPRRegisterClass); >>> assert (Value && "Frame index virtual allocated, but Value arg >>> is NULL!"); >>> *Value = Offset; >>> @@ -658,52 +649,6 @@ >>> MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); >>> else // tSTR has an extra register operand. >>> MI.addOperand(MachineOperand::CreateReg(0, false)); >>> - } else { >>> - // FIXME! This is horrific!!! We need register scavenging. >>> - // Our temporary workaround has marked r3 unavailable. Of >>> course, r3 is >>> - // also a ABI register so it's possible that is is the >>> register that is >>> - // being storing here. If that's the case, we do the >>> following: >>> - // r12 = r2 >>> - // Use r2 to materialize sp + offset >>> - // str r3, r2 >>> - // r2 = r12 >>> - unsigned ValReg = MI.getOperand(0).getReg(); >>> - unsigned TmpReg = ARM::R3; >>> - bool UseRR = false; >>> - if (ValReg == ARM::R3) { >>> - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) >>> - .addReg(ARM::R2, RegState::Kill); >>> - TmpReg = ARM::R2; >>> - } >>> - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) >>> - BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) >>> - .addReg(ARM::R3, RegState::Kill); >>> - if (Opcode == ARM::tSpill) { >>> - if (FrameReg == ARM::SP) >>> - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, >>> - Offset, false, TII, *this, dl); >>> - else { >>> - emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); >>> - UseRR = true; >>> - } >>> - } else >>> - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, >>> Offset, TII, >>> - *this, dl); >>> - MI.setDesc(TII.get(ARM::tSTR)); >>> - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, >>> true); >>> - if (UseRR) // Use [reg, reg] addrmode. >>> - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); >>> - else // tSTR has an extra register operand. >>> - MI.addOperand(MachineOperand::CreateReg(0, false)); >>> - >>> - MachineBasicBlock::iterator NII = next(II); >>> - if (ValReg == ARM::R3) >>> - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) >>> - .addReg(ARM::R12, RegState::Kill); >>> - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) >>> - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) >>> - .addReg(ARM::R12, RegState::Kill); >>> - } >>> } else >>> assert(false && "Unexpected opcode!"); >>> >>> >>> Modified: llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h?rev=83521&r1=83520&r2=83521&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h (original) >>> +++ llvm/trunk/lib/Target/ARM/Thumb1RegisterInfo.h Wed Oct 7 >>> 20:46:59 2009 >>> @@ -41,6 +41,7 @@ >>> getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) >>> const; >>> >>> bool requiresRegisterScavenging(const MachineFunction &MF) const; >>> + bool requiresFrameIndexScavenging(const MachineFunction &MF) >>> const; >>> >>> bool hasReservedCallFrame(MachineFunction &MF) const; >>> >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > From tonic at nondot.org Thu Oct 8 13:02:01 2009 From: tonic at nondot.org (Tanya M. Lattner) Date: Thu, 8 Oct 2009 11:02:01 -0700 (PDT) Subject: [llvm-commits] [llvm] r83417 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: References: <200910062145.n96LjQsf011221@zion.cs.uiuc.edu> Message-ID: Chris can you approve or deny this patch? Also, are both patches required or just the last one? Thanks, Tanya On Tue, 6 Oct 2009, Jeffrey Yasskin wrote: > Here's the real twine fix, in case you want it for the 2.6 branch. > > On Tue, Oct 6, 2009 at 2:45 PM, Jeffrey Yasskin wrote: >> Author: jyasskin >> Date: Tue Oct ?6 16:45:26 2009 >> New Revision: 83417 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=83417&view=rev >> Log: >> r83391 was completely broken since Twines keep references to their inputs, and >> some of the inputs were temporaries. ?Here's a real fix for the miscompilation. >> Thanks to sabre for pointing out the problem. >> >> Modified: >> ? ?llvm/trunk/lib/Support/Triple.cpp >> >> Modified: llvm/trunk/lib/Support/Triple.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83417&r1=83416&r2=83417&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Support/Triple.cpp (original) >> +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct ?6 16:45:26 2009 >> @@ -9,6 +9,7 @@ >> >> ?#include "llvm/ADT/Triple.h" >> >> +#include "llvm/ADT/SmallString.h" >> ?#include "llvm/ADT/Twine.h" >> ?#include >> ?#include >> @@ -390,10 +391,14 @@ >> ?} >> >> ?void Triple::setArchName(const StringRef &Str) { >> - ?// Work around a miscompilation bug in gcc 4.0.3. >> - ?Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); >> - ?Twine b = Str + "-" + a; >> - ?setTriple(b); >> + ?// Work around a miscompilation bug for Twines in gcc 4.0.3. >> + ?SmallString<64> Triple; >> + ?Triple += Str; >> + ?Triple += "-"; >> + ?Triple += getVendorName(); >> + ?Triple += "-"; >> + ?Triple += getOSAndEnvironmentName(); >> + ?setTriple(Triple.str()); >> ?} >> >> ?void Triple::setVendorName(const StringRef &Str) { >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > From clattner at apple.com Thu Oct 8 13:05:48 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 8 Oct 2009 11:05:48 -0700 Subject: [llvm-commits] [llvm] r83417 - /llvm/trunk/lib/Support/Triple.cpp In-Reply-To: References: <200910062145.n96LjQsf011221@zion.cs.uiuc.edu> Message-ID: <933FB189-6510-469A-9BD9-7151AAD84830@apple.com> On Oct 8, 2009, at 11:02 AM, Tanya M. Lattner wrote: > > Chris can you approve or deny this patch? > > Also, are both patches required or just the last one? yes approved, both patches are required. -Chris > > Thanks, > Tanya > > On Tue, 6 Oct 2009, Jeffrey Yasskin wrote: > >> Here's the real twine fix, in case you want it for the 2.6 branch. >> >> On Tue, Oct 6, 2009 at 2:45 PM, Jeffrey Yasskin >> wrote: >>> Author: jyasskin >>> Date: Tue Oct 6 16:45:26 2009 >>> New Revision: 83417 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=83417&view=rev >>> Log: >>> r83391 was completely broken since Twines keep references to their >>> inputs, and >>> some of the inputs were temporaries. Here's a real fix for the >>> miscompilation. >>> Thanks to sabre for pointing out the problem. >>> >>> Modified: >>> llvm/trunk/lib/Support/Triple.cpp >>> >>> Modified: llvm/trunk/lib/Support/Triple.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=83417&r1=83416&r2=83417&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/Support/Triple.cpp (original) >>> +++ llvm/trunk/lib/Support/Triple.cpp Tue Oct 6 16:45:26 2009 >>> @@ -9,6 +9,7 @@ >>> >>> #include "llvm/ADT/Triple.h" >>> >>> +#include "llvm/ADT/SmallString.h" >>> #include "llvm/ADT/Twine.h" >>> #include >>> #include >>> @@ -390,10 +391,14 @@ >>> } >>> >>> void Triple::setArchName(const StringRef &Str) { >>> - // Work around a miscompilation bug in gcc 4.0.3. >>> - Twine a = getVendorName() + "-" + getOSAndEnvironmentName(); >>> - Twine b = Str + "-" + a; >>> - setTriple(b); >>> + // Work around a miscompilation bug for Twines in gcc 4.0.3. >>> + SmallString<64> Triple; >>> + Triple += Str; >>> + Triple += "-"; >>> + Triple += getVendorName(); >>> + Triple += "-"; >>> + Triple += getOSAndEnvironmentName(); >>> + setTriple(Triple.str()); >>> } >>> >>> void Triple::setVendorName(const StringRef &Str) { >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >> > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From dpatel at apple.com Thu Oct 8 13:48:04 2009 From: dpatel at apple.com (Devang Patel) Date: Thu, 08 Oct 2009 18:48:04 -0000 Subject: [llvm-commits] [llvm] r83563 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h Message-ID: <200910081848.n98Im4wk016066@zion.cs.uiuc.edu> Author: dpatel Date: Thu Oct 8 13:48:03 2009 New Revision: 83563 URL: http://llvm.org/viewvc/llvm-project?rev=83563&view=rev Log: Do not record line number to implicitly mark start of function if function has arguments. Extra line number entries trip gdb in some cases. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=83563&r1=83562&r2=83563&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Thu Oct 8 13:48:03 2009 @@ -1782,17 +1782,23 @@ } /// CollectVariableInfo - Populate DbgScope entries with variables' info. -void DwarfDebug::CollectVariableInfo() { - if (!MMI) return; +bool DwarfDebug::CollectVariableInfo() { + if (!MMI) return false; + bool ArgsCollected = false; MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { MDNode *Var = VI->first; + DIVariable DV (Var); + if (DV.isNull()) continue; + if (DV.getTag() == dwarf::DW_TAG_arg_variable) + ArgsCollected = true; DILocation VLoc(VI->second.first); unsigned VSlot = VI->second.second; DbgScope *Scope = getDbgScope(VLoc.getScope().getNode(), NULL); - Scope->AddVariable(new DbgVariable(DIVariable(Var), VSlot, false)); + Scope->AddVariable(new DbgVariable(DV, VSlot, false)); } + return ArgsCollected; } /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that @@ -1903,7 +1909,7 @@ #ifdef ATTACH_DEBUG_INFO_TO_AN_INSN if (!ExtractScopeInformation(MF)) return; - CollectVariableInfo(); + bool ArgsCollected = CollectVariableInfo(); #endif // Begin accumulating function debug information. @@ -1914,14 +1920,19 @@ // Emit label for the implicitly defined dbg.stoppoint at the start of the // function. - DebugLoc FDL = MF->getDefaultDebugLoc(); - if (!FDL.isUnknown()) { - DebugLocTuple DLT = MF->getDebugLocTuple(FDL); - unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.CompileUnit); - Asm->printLabel(LabelID); - O << '\n'; +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (!ArgsCollected) { +#else + if (1) { +#endif + DebugLoc FDL = MF->getDefaultDebugLoc(); + if (!FDL.isUnknown()) { + DebugLocTuple DLT = MF->getDebugLocTuple(FDL); + unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.CompileUnit); + Asm->printLabel(LabelID); + O << '\n'; + } } - if (TimePassesIsEnabled) DebugTimer->stopTimer(); } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=83563&r1=83562&r2=83563&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Thu Oct 8 13:48:03 2009 @@ -556,7 +556,7 @@ bool ExtractScopeInformation(MachineFunction *MF); /// CollectVariableInfo - Populate DbgScope entries with variables' info. - void CollectVariableInfo(); + bool CollectVariableInfo(); /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that /// start with this machine instruction. From bob.wilson at apple.com Thu Oct 8 13:49:46 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 18:49:46 -0000 Subject: [llvm-commits] [llvm] r83564 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <200910081849.n98InkTE016310@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 13:49:46 2009 New Revision: 83564 URL: http://llvm.org/viewvc/llvm-project?rev=83564&view=rev Log: Add a SelectionDAG getTargetInsertSubreg convenience function, similar to getTargetExtractSubreg. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=83564&r1=83563&r2=83564&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Thu Oct 8 13:49:46 2009 @@ -708,6 +708,11 @@ SDValue getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, SDValue Operand); + /// getTargetInsertSubreg - A convenience function for creating + /// TargetInstrInfo::INSERT_SUBREG nodes. + SDValue getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand, SDValue Subreg); + /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=83564&r1=83563&r2=83564&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Oct 8 13:49:46 2009 @@ -4769,6 +4769,17 @@ return SDValue(Subreg, 0); } +/// getTargetInsertSubreg - A convenience function for creating +/// TargetInstrInfo::INSERT_SUBREG nodes. +SDValue +SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand, SDValue Subreg) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Result = getMachineNode(TargetInstrInfo::INSERT_SUBREG, DL, + VT, Operand, Subreg, SRIdxVal); + return SDValue(Result, 0); +} + /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, From bob.wilson at apple.com Thu Oct 8 13:51:31 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 18:51:31 -0000 Subject: [llvm-commits] [llvm] r83565 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <200910081851.n98IpVVX016535@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 13:51:31 2009 New Revision: 83565 URL: http://llvm.org/viewvc/llvm-project?rev=83565&view=rev Log: Clean up a comment (indentation was wrong). Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83565&r1=83564&r2=83565&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Thu Oct 8 13:51:31 2009 @@ -59,7 +59,8 @@ return "ARM Instruction Selection"; } - /// getI32Imm - Return a target constant with the specified value, of type i32. + /// getI32Imm - Return a target constant of type i32 with the specified + /// value. inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } From bob.wilson at apple.com Thu Oct 8 13:52:56 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 18:52:56 -0000 Subject: [llvm-commits] [llvm] r83566 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <200910081852.n98IquTa016718@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 13:52:56 2009 New Revision: 83566 URL: http://llvm.org/viewvc/llvm-project?rev=83566&view=rev Log: Clean up some unnecessary initializations. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83566&r1=83565&r2=83566&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Thu Oct 8 13:52:56 2009 @@ -1405,7 +1405,7 @@ } // Quad registers are loaded with two separate instructions, where one // loads the even registers and the other loads the odd registers. - EVT RegVT = VT; + EVT RegVT; unsigned Opc2 = 0; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld3 type"); @@ -1465,7 +1465,7 @@ } // Quad registers are loaded with two separate instructions, where one // loads the even registers and the other loads the odd registers. - EVT RegVT = VT; + EVT RegVT; unsigned Opc2 = 0; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld4 type"); From bob.wilson at apple.com Thu Oct 8 13:56:11 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 18:56:11 -0000 Subject: [llvm-commits] [llvm] r83568 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vldlane.ll Message-ID: <200910081856.n98IuBQj017138@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 13:56:10 2009 New Revision: 83568 URL: http://llvm.org/viewvc/llvm-project?rev=83568&view=rev Log: Add codegen support for NEON vld2lane intrinsics with 128-bit vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vldlane.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83568&r1=83567&r2=83568&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Thu Oct 8 13:56:10 2009 @@ -1510,18 +1510,67 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld2lane type"); + case MVT::v8i8: Opc = ARM::VLD2LNd8; break; + case MVT::v4i16: Opc = ARM::VLD2LNd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD2LNd32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), Chain }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 7); + } + // Quad registers are handled by extracting subregs, doing the load, + // and then inserting the results as subregs. + EVT RegVT; + unsigned Opc2 = 0; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld2lane type"); - case MVT::v8i8: Opc = ARM::VLD2LNd8; break; - case MVT::v4i16: Opc = ARM::VLD2LNd16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD2LNd32; break; + case MVT::v8i16: + Opc = ARM::VLD2LNq16a; + Opc2 = ARM::VLD2LNq16b; + RegVT = MVT::v4i16; + break; + case MVT::v4f32: + Opc = ARM::VLD2LNq32a; + Opc2 = ARM::VLD2LNq32b; + RegVT = MVT::v2f32; + break; + case MVT::v4i32: + Opc = ARM::VLD2LNq32a; + Opc2 = ARM::VLD2LNq32b; + RegVT = MVT::v2i32; + break; } SDValue Chain = N->getOperand(0); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 7); + unsigned Lane = cast(N->getOperand(5))->getZExtValue(); + unsigned NumElts = RegVT.getVectorNumElements(); + int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + + SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(3)); + SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(4)); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1, + getI32Imm(Lane % NumElts), Chain }; + SDNode *VLdLn = CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2, + dl, RegVT, RegVT, MVT::Other, + Ops, 7); + SDValue Q0 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, + N->getOperand(3), + SDValue(VLdLn, 0)); + SDValue Q1 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, + N->getOperand(4), + SDValue(VLdLn, 1)); + Chain = SDValue(VLdLn, 2); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); + ReplaceUses(SDValue(N, 2), Chain); + return NULL; } case Intrinsic::arm_neon_vld3lane: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83568&r1=83567&r2=83568&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Thu Oct 8 13:56:10 2009 @@ -266,16 +266,24 @@ // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LND op11_8, string OpcodeStr> +class VLD2LN op11_8, string OpcodeStr> : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), "$src1 = $dst1, $src2 = $dst2", []>; -def VLD2LNd8 : VLD2LND<0b0001, "vld2.8">; -def VLD2LNd16 : VLD2LND<0b0101, "vld2.16">; -def VLD2LNd32 : VLD2LND<0b1001, "vld2.32">; +def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">; +def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">; +def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">; + +// vld2 to double-spaced even registers. +def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">; +def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">; + +// vld2 to double-spaced odd registers. +def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">; +def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">; // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LND op11_8, string OpcodeStr> Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83568&r1=83567&r2=83568&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Thu Oct 8 13:56:10 2009 @@ -57,6 +57,22 @@ NumRegs = 2; return true; + case ARM::VLD2LNq16a: + case ARM::VLD2LNq32a: + FirstOpnd = 0; + NumRegs = 2; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD2LNq16b: + case ARM::VLD2LNq32b: + FirstOpnd = 0; + NumRegs = 2; + Offset = 1; + Stride = 2; + return true; + case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: Modified: llvm/trunk/test/CodeGen/ARM/vldlane.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vldlane.ll?rev=83568&r1=83567&r2=83568&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vldlane.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vldlane.ll Thu Oct 8 13:56:10 2009 @@ -5,6 +5,10 @@ %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } %struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld2lanei8: ;CHECK: vld2.8 @@ -49,11 +53,48 @@ ret <2 x float> %tmp5 } +define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vld2laneQi16: +;CHECK: vld2.16 + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vld2laneQi32: +;CHECK: vld2.32 + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vld2laneQf: +;CHECK: vld2.32 + %tmp1 = load <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1 + %tmp5 = add <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 +} + declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly + %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } From akyrtzi at gmail.com Thu Oct 8 14:20:19 2009 From: akyrtzi at gmail.com (Argyris Kyrtzidis) Date: Thu, 8 Oct 2009 22:20:19 +0300 Subject: [llvm-commits] PATCH: Add CHECK-FIRST/LAST to FIleCheck In-Reply-To: <1E4AC538-18A1-4D81-8BCD-C8F2519F032A@gmail.com> References: <35E6386C-6EF8-415C-A4D1-2C539D2540F7@gmail.com> <1E4AC538-18A1-4D81-8BCD-C8F2519F032A@gmail.com> Message-ID: On Oct 1, 2009, at 1:19 AM, Argyris Kyrtzidis wrote: > On Oct 1, 2009, at 12:53 AM, Chris Lattner wrote: > >> >> On Sep 30, 2009, at 2:51 PM, Argyris Kyrtzidis wrote: >> >>> Actually, using these directives is not flexible enough, e.g you >>> can't use them to verify that the output is only one line. >>> >>> I attached a new patch that adds 2 command lines instead: >>> >>> -empty-before - Nothing comes before the first check >>> -empty-after - Nothing comes after the last check >>> >>> If you want to make sure that the only output is the check line, >>> you do something like: >>> >>> // RUN: FileCheck -empty-before -empty-after >>> // CHECK: foo >>> >>> Ok to go in ? >> >> can't CHECK-NOT be used for this? If a NOT could be added before >> the first line, then you could do CHECK-NOT: {{.}} or something >> like that. > > "CHECK-NOT: {{.}}" is not good because it will reject inputs that > have characters on the same line as 'foo', e.g. > > "bar foo" > > should be a valid input but the above CHECK-NOT will reject it. > > I couldn't put a CHECK-NOT to work as the -empty-before (and - > after), any suggestions ? > > -Argiris Ping ? Can the command line options go in ? -Argiris From isanbard at gmail.com Thu Oct 8 15:12:52 2009 From: isanbard at gmail.com (Bill Wendling) Date: Thu, 08 Oct 2009 20:12:52 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r83569 - in /llvm-gcc-4.2/trunk/gcc: c-decl.c cp/decl.c testsuite/objc.dg/block-weak.m Message-ID: <200910082012.n98KCqRU027876@zion.cs.uiuc.edu> Author: void Date: Thu Oct 8 15:12:52 2009 New Revision: 83569 URL: http://llvm.org/viewvc/llvm-project?rev=83569&view=rev Log: Radar 7284529 * c-decl.c (init_byref_decl): Ensure copy/dispose helper is unique. * cp/decl.c (init_byref_decl): Likewise. Patch by Mike Stump! Added: llvm-gcc-4.2/trunk/gcc/testsuite/objc.dg/block-weak.m Modified: llvm-gcc-4.2/trunk/gcc/c-decl.c llvm-gcc-4.2/trunk/gcc/cp/decl.c Modified: llvm-gcc-4.2/trunk/gcc/c-decl.c URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/c-decl.c?rev=83569&r1=83568&r2=83569&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/c-decl.c (original) +++ llvm-gcc-4.2/trunk/gcc/c-decl.c Thu Oct 8 15:12:52 2009 @@ -3873,7 +3873,7 @@ tree_cons (NULL_TREE, ptr_type_node, tree_cons (NULL_TREE, ptr_type_node, void_list_node))); - strcpy (name, "__Block_byref_id_object_copy"); + sprintf (name, "__Block_byref_id_object_copy%d", kind); block_byref_id_object_copy[kind] = build_helper_func_decl (get_identifier (name), func_type); /* Synthesize function definition. */ @@ -3891,7 +3891,7 @@ tree func_type = build_function_type (void_type_node, tree_cons (NULL_TREE, ptr_type_node, void_list_node)); - strcpy (name, "__Block_byref_id_object_dispose"); + sprintf (name, "__Block_byref_id_object_dispose%d", kind); block_byref_id_object_dispose[kind] = build_helper_func_decl (get_identifier (name), func_type); /* Synthesize function definition. */ Modified: llvm-gcc-4.2/trunk/gcc/cp/decl.c URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/cp/decl.c?rev=83569&r1=83568&r2=83569&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/cp/decl.c (original) +++ llvm-gcc-4.2/trunk/gcc/cp/decl.c Thu Oct 8 15:12:52 2009 @@ -5623,7 +5623,7 @@ tree_cons (NULL_TREE, ptr_type_node, tree_cons (NULL_TREE, ptr_type_node, void_list_node))); - strcpy (name, "__Block_byref_id_object_copy"); + sprintf (name, "__Block_byref_id_object_copy%d", kind); block_byref_id_object_copy[kind] = build_helper_func_decl (get_identifier (name), func_type); DECL_CONTEXT (block_byref_id_object_copy[kind]) = current_function_decl; @@ -5645,7 +5645,7 @@ func_type = build_function_type (void_type_node, tree_cons (NULL_TREE, ptr_type_node, void_list_node)); - strcpy (name, "__Block_byref_id_object_dispose"); + sprintf (name, "__Block_byref_id_object_dispose%d", kind); block_byref_id_object_dispose[kind] = build_helper_func_decl (get_identifier (name), func_type); DECL_CONTEXT (block_byref_id_object_dispose[kind]) = current_function_decl; Added: llvm-gcc-4.2/trunk/gcc/testsuite/objc.dg/block-weak.m URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/testsuite/objc.dg/block-weak.m?rev=83569&view=auto ============================================================================== --- llvm-gcc-4.2/trunk/gcc/testsuite/objc.dg/block-weak.m (added) +++ llvm-gcc-4.2/trunk/gcc/testsuite/objc.dg/block-weak.m Thu Oct 8 15:12:52 2009 @@ -0,0 +1,15 @@ +/* APPLE LOCAL file __block copy/dispose 7284529 */ +/* { dg-do compile { target *-*-darwin* } } */ +/* { dg-options "-fblocks" } */ + + at interface ShortcutsController + at end + + at class barbar; + at implementation ShortcutsController +- (void) _markConflicts +{ + __attribute__((__blocks__(byref))) barbar * matchDict; + __attribute__((__blocks__(byref))) void (^markConflictsBlock)(); +} + at end From dpatel at apple.com Thu Oct 8 15:41:18 2009 From: dpatel at apple.com (Devang Patel) Date: Thu, 08 Oct 2009 20:41:18 -0000 Subject: [llvm-commits] [llvm] r83571 - /llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp Message-ID: <200910082041.n98KfIoh031361@zion.cs.uiuc.edu> Author: dpatel Date: Thu Oct 8 15:41:17 2009 New Revision: 83571 URL: http://llvm.org/viewvc/llvm-project?rev=83571&view=rev Log: Clear variable debug info map at the end of the function. Modified: llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp Modified: llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp?rev=83571&r1=83570&r2=83571&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineModuleInfo.cpp Thu Oct 8 15:41:17 2009 @@ -76,6 +76,9 @@ FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + VariableDbgInfo.clear(); +#endif } /// AnalyzeModule - Scan the module for global debug information. From asl at math.spbu.ru Thu Oct 8 15:43:22 2009 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Thu, 08 Oct 2009 20:43:22 -0000 Subject: [llvm-commits] [llvm] r83572 - in /llvm/trunk: lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp test/CodeGen/ARM/t2-imm.ll Message-ID: <200910082043.n98KhMS1031451@zion.cs.uiuc.edu> Author: asl Date: Thu Oct 8 15:43:22 2009 New Revision: 83572 URL: http://llvm.org/viewvc/llvm-project?rev=83572&view=rev Log: Use lower16 / upper16 imm modifiers to asmprint 32-bit imms splitted via movt/movw pair. Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp llvm/trunk/test/CodeGen/ARM/t2-imm.ll Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=83572&r1=83571&r2=83572&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Thu Oct 8 15:43:22 2009 @@ -329,13 +329,14 @@ } case MachineOperand::MO_Immediate: { int64_t Imm = MO.getImm(); + O << '#'; if (Modifier) { if (strcmp(Modifier, "lo16") == 0) - Imm = Imm & 0xffffLL; + O << ":lower16:"; else if (strcmp(Modifier, "hi16") == 0) - Imm = (Imm & 0xffff0000LL) >> 16; + O << ":upper16:"; } - O << '#' << Imm; + O << Imm; break; } case MachineOperand::MO_MachineBasicBlock: Modified: llvm/trunk/test/CodeGen/ARM/t2-imm.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/t2-imm.ll?rev=83572&r1=83571&r2=83572&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/t2-imm.ll (original) +++ llvm/trunk/test/CodeGen/ARM/t2-imm.ll Thu Oct 8 15:43:22 2009 @@ -2,8 +2,8 @@ define i32 @f6(i32 %a) { ; CHECK:f6 -; CHECK: movw r0, #1123 -; CHECK: movt r0, #1000 +; CHECK: movw r0, #:lower16:65537123 +; CHECK: movt r0, #:upper16:65537123 %tmp = add i32 0, 65537123 ret i32 %tmp } From isanbard at gmail.com Thu Oct 8 15:52:51 2009 From: isanbard at gmail.com (Bill Wendling) Date: Thu, 08 Oct 2009 20:52:51 -0000 Subject: [llvm-commits] [llvm] r83573 - in /llvm/trunk: lib/Analysis/DebugInfo.cpp test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll Message-ID: <200910082052.n98KqqV6031879@zion.cs.uiuc.edu> Author: void Date: Thu Oct 8 15:52:51 2009 New Revision: 83573 URL: http://llvm.org/viewvc/llvm-project?rev=83573&view=rev Log: It's possible for a global variable to be optimized out of a metadata object. So we should allow a "null" with this dyn_cast. Added: llvm/trunk/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DebugInfo.cpp?rev=83573&r1=83572&r2=83573&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/DebugInfo.cpp (original) +++ llvm/trunk/lib/Analysis/DebugInfo.cpp Thu Oct 8 15:52:51 2009 @@ -116,7 +116,7 @@ return 0; if (Elt < DbgNode->getNumElements()) - return dyn_cast(DbgNode->getElement(Elt)); + return dyn_cast_or_null(DbgNode->getElement(Elt)); return 0; } Added: llvm/trunk/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll?rev=83573&view=auto ============================================================================== --- llvm/trunk/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll (added) +++ llvm/trunk/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll Thu Oct 8 15:52:51 2009 @@ -0,0 +1,72 @@ +; RUN: llc < %s + +%struct.TConstantDictionary = type { %struct.__CFDictionary* } +%struct.TSharedGlobalSet_AS = type { [52 x i32], [20 x i32], [22 x i32], [8 x i32], [20 x i32], [146 x i32] } +%struct.__CFDictionary = type opaque + + at llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @func to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define void @func() ssp { +entry: + tail call void @llvm.dbg.func.start(metadata !13) + tail call void @llvm.dbg.stoppoint(i32 1001, i32 0, metadata !1) + %0 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1] + %1 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %0, i32 0, i32 4, i32 4 ; [#uses=1] + %2 = bitcast i32* %1 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1] + tail call void @g2(%struct.TConstantDictionary* %2) ssp + tail call void @llvm.dbg.stoppoint(i32 1002, i32 0, metadata !1) + %3 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1] + %4 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %3, i32 0, i32 4, i32 3 ; [#uses=1] + %5 = bitcast i32* %4 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1] + tail call void @g4(%struct.TConstantDictionary* %5) ssp + tail call void @llvm.dbg.stoppoint(i32 1003, i32 0, metadata !1) + %6 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1] + %7 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %6, i32 0, i32 4, i32 2 ; [#uses=1] + %8 = bitcast i32* %7 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1] + tail call void @g3(%struct.TConstantDictionary* %8) ssp + tail call void @llvm.dbg.stoppoint(i32 1004, i32 0, metadata !1) + %9 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1] + %10 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %9, i32 0, i32 4, i32 1 ; [#uses=1] + %11 = bitcast i32* %10 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1] + tail call void @g4(%struct.TConstantDictionary* %11) ssp + tail call void @llvm.dbg.stoppoint(i32 1005, i32 0, metadata !1) + tail call void @g5() + tail call void @llvm.dbg.stoppoint(i32 1006, i32 0, metadata !1) + tail call void @llvm.dbg.region.end(metadata !13) + ret void +} + +declare void @llvm.dbg.func.start(metadata) nounwind readnone + +declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone + +declare void @llvm.dbg.region.end(metadata) nounwind readnone + +declare %struct.TSharedGlobalSet_AS* @g1() nounwind readonly ssp + +declare void @g2(%struct.TConstantDictionary* nocapture) ssp align 2 + +declare void @g3(%struct.TConstantDictionary* nocapture) ssp align 2 + +declare void @g4(%struct.TConstantDictionary* nocapture) ssp align 2 + +declare void @g5() + +!llvm.dbg.gv = !{!0, !9, !10, !11, !12} + +!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.408", metadata !"C.408", metadata !"_ZZ7UASInitmmmmmmmmmE5C.408", metadata !1, i32 874, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ] +!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"func.cp", metadata !"/tmp/func", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 false, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ] +!2 = metadata !{i32 458753, metadata !3, metadata !"", metadata !3, i32 0, i64 16, i64 16, i64 0, i32 0, metadata !4, metadata !7, i32 0}; [DW_TAG_array_type ] +!3 = metadata !{i32 458769, i32 0, i32 4, metadata !"testcase.ii", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ] +!4 = metadata !{i32 458774, metadata !3, metadata !"UniChar", metadata !5, i32 417, i64 0, i64 0, i64 0, i32 0, metadata !6}; [DW_TAG_typedef ] +!5 = metadata !{i32 458769, i32 0, i32 4, metadata !"MacTypes.h", metadata !"/System/Library/Frameworks/CoreServices.framework/Headers/../Frameworks/CarbonCore.framework/Headers", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 false, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ] +!6 = metadata !{i32 458788, metadata !3, metadata !"short unsigned int", metadata !3, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7}; [DW_TAG_base_type ] +!7 = metadata !{metadata !8} +!8 = metadata !{i32 458785, i64 0, i64 0}; [DW_TAG_subrange_type ] +!9 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.409", metadata !"C.409", metadata !"_ZZ7UASInitmmmmmmmmmE5C.409", metadata !1, i32 877, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ] +!10 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.410", metadata !"C.410", metadata !"_ZZ7UASInitmmmmmmmmmE5C.410", metadata !1, i32 880, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ] +!11 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.411", metadata !"C.411", metadata !"_ZZ7UASInitmmmmmmmmmE5C.411", metadata !1, i32 924, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ] +!12 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.412", metadata !"C.412", metadata !"_ZZ7UASInitmmmmmmmmmE5C.412", metadata !1, i32 928, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ] +!13 = metadata !{i32 458798, i32 0, metadata !3, metadata !"UASShutdown", metadata !"UASShutdown", metadata !"_Z11UASShutdownv", metadata !1, i32 999, metadata !14, i1 false, i1 true}; [DW_TAG_subprogram ] +!14 = metadata !{i32 458773, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0}; [DW_TAG_subroutine_type ] +!15 = metadata !{null} From dgregor at apple.com Thu Oct 8 16:24:34 2009 From: dgregor at apple.com (Douglas Gregor) Date: Thu, 08 Oct 2009 21:24:34 -0000 Subject: [llvm-commits] [llvm] r83575 - /llvm/trunk/lib/Analysis/CMakeLists.txt Message-ID: <200910082124.n98LOYYs000784@zion.cs.uiuc.edu> Author: dgregor Date: Thu Oct 8 16:24:34 2009 New Revision: 83575 URL: http://llvm.org/viewvc/llvm-project?rev=83575&view=rev Log: Update CMake build yet again after a source file was removed Modified: llvm/trunk/lib/Analysis/CMakeLists.txt Modified: llvm/trunk/lib/Analysis/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/CMakeLists.txt?rev=83575&r1=83574&r2=83575&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/CMakeLists.txt (original) +++ llvm/trunk/lib/Analysis/CMakeLists.txt Thu Oct 8 16:24:34 2009 @@ -21,7 +21,6 @@ LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp - LoopVR.cpp MallocHelper.cpp MemoryDependenceAnalysis.cpp PointerTracking.cpp From criswell at cs.uiuc.edu Thu Oct 8 16:53:14 2009 From: criswell at cs.uiuc.edu (John Criswell) Date: Thu, 8 Oct 2009 16:53:14 -0500 Subject: [llvm-commits] CVS: llvm-www/Users.html Message-ID: <200910082153.n98LrEZ0017954@maute.cs.uiuc.edu> Changes in directory llvm-www: Users.html updated: 1.66 -> 1.67 --- Log message: Added Adobe's ActionScript 3 AOT compiler for iPhone development. --- Diffs of the changes: (+15 -1) Users.html | 16 +++++++++++++++- 1 files changed, 15 insertions(+), 1 deletion(-) Index: llvm-www/Users.html diff -u llvm-www/Users.html:1.66 llvm-www/Users.html:1.67 --- llvm-www/Users.html:1.66 Sat Aug 29 15:42:59 2009 +++ llvm-www/Users.html Thu Oct 8 16:52:24 2009 @@ -49,6 +49,20 @@ and Scott Petersen's DevMtg talk for more details. + + + Adobe Systems Incorporated + + ActionScript 3 Ahead-of-Time (AOT) Compiler for iPhone software + development. See Aditya Bansod's + blog entry + and + + this video for more details. + + + + Ageia Technologies @@ -499,6 +513,6 @@ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!">
    LLVM Development List
    - Last modified: $Date: 2009/08/29 20:42:59 $ + Last modified: $Date: 2009/10/08 21:52:24 $ From bob.wilson at apple.com Thu Oct 8 17:27:33 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 22:27:33 -0000 Subject: [llvm-commits] [llvm] r83585 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/NEONPreAllocPass.cpp test/CodeGen/ARM/vldlane.ll Message-ID: <200910082227.n98MRYYu003380@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 17:27:33 2009 New Revision: 83585 URL: http://llvm.org/viewvc/llvm-project?rev=83585&view=rev Log: Add codegen support for NEON vld3lane intrinsics with 128-bit vectors. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp llvm/trunk/test/CodeGen/ARM/vldlane.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=83585&r1=83584&r2=83585&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Thu Oct 8 17:27:33 2009 @@ -1577,18 +1577,73 @@ SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld3lane type"); + case MVT::v8i8: Opc = ARM::VLD3LNd8; break; + case MVT::v4i16: Opc = ARM::VLD3LNd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD3LNd32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), N->getOperand(6), Chain }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 8); + } + // Quad registers are handled by extracting subregs, doing the load, + // and then inserting the results as subregs. + EVT RegVT; + unsigned Opc2 = 0; switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vld3lane type"); - case MVT::v8i8: Opc = ARM::VLD3LNd8; break; - case MVT::v4i16: Opc = ARM::VLD3LNd16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD3LNd32; break; + default: llvm_unreachable("unhandled vld2lane type"); + case MVT::v8i16: + Opc = ARM::VLD3LNq16a; + Opc2 = ARM::VLD3LNq16b; + RegVT = MVT::v4i16; + break; + case MVT::v4f32: + Opc = ARM::VLD3LNq32a; + Opc2 = ARM::VLD3LNq32b; + RegVT = MVT::v2f32; + break; + case MVT::v4i32: + Opc = ARM::VLD3LNq32a; + Opc2 = ARM::VLD3LNq32b; + RegVT = MVT::v2i32; + break; } SDValue Chain = N->getOperand(0); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), N->getOperand(6), Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 8); + unsigned Lane = cast(N->getOperand(6))->getZExtValue(); + unsigned NumElts = RegVT.getVectorNumElements(); + int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + + SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(3)); + SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(4)); + SDValue D2 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(5)); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1, D2, + getI32Imm(Lane % NumElts), Chain }; + SDNode *VLdLn = CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2, + dl, RegVT, RegVT, RegVT, + MVT::Other, Ops, 8); + SDValue Q0 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, + N->getOperand(3), + SDValue(VLdLn, 0)); + SDValue Q1 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, + N->getOperand(4), + SDValue(VLdLn, 1)); + SDValue Q2 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, + N->getOperand(5), + SDValue(VLdLn, 2)); + Chain = SDValue(VLdLn, 3); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); + ReplaceUses(SDValue(N, 2), Q2); + ReplaceUses(SDValue(N, 3), Chain); + return NULL; } case Intrinsic::arm_neon_vld4lane: { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=83585&r1=83584&r2=83585&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Thu Oct 8 17:27:33 2009 @@ -286,7 +286,7 @@ def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">; // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LND op11_8, string OpcodeStr> +class VLD3LN op11_8, string OpcodeStr> : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3, @@ -294,9 +294,17 @@ "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; -def VLD3LNd8 : VLD3LND<0b0010, "vld3.8">; -def VLD3LNd16 : VLD3LND<0b0110, "vld3.16">; -def VLD3LNd32 : VLD3LND<0b1010, "vld3.32">; +def VLD3LNd8 : VLD3LN<0b0010, "vld3.8">; +def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">; +def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">; + +// vld3 to double-spaced even registers. +def VLD3LNq16a: VLD3LN<0b0101, "vld3.16">; +def VLD3LNq32a: VLD3LN<0b1001, "vld3.32">; + +// vld3 to double-spaced odd registers. +def VLD3LNq16b: VLD3LN<0b0101, "vld3.16">; +def VLD3LNq32b: VLD3LN<0b1001, "vld3.32">; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LND op11_8, string OpcodeStr> Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=83585&r1=83584&r2=83585&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Thu Oct 8 17:27:33 2009 @@ -57,6 +57,13 @@ NumRegs = 2; return true; + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + FirstOpnd = 0; + NumRegs = 4; + return true; + case ARM::VLD2LNq16a: case ARM::VLD2LNq32a: FirstOpnd = 0; @@ -73,13 +80,6 @@ Stride = 2; return true; - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - FirstOpnd = 0; - NumRegs = 4; - return true; - case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -109,6 +109,22 @@ Stride = 2; return true; + case ARM::VLD3LNq16a: + case ARM::VLD3LNq32a: + FirstOpnd = 0; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD3LNq16b: + case ARM::VLD3LNq32b: + FirstOpnd = 0; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: Modified: llvm/trunk/test/CodeGen/ARM/vldlane.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vldlane.ll?rev=83585&r1=83584&r2=83585&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vldlane.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vldlane.ll Thu Oct 8 17:27:33 2009 @@ -100,6 +100,10 @@ %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } %struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld3lanei8: ;CHECK: vld3.8 @@ -152,11 +156,54 @@ ret <2 x float> %tmp7 } +define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vld3laneQi16: +;CHECK: vld3.16 + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vld3laneQi32: +;CHECK: vld3.32 + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3) + %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 + %tmp6 = add <4 x i32> %tmp3, %tmp4 + %tmp7 = add <4 x i32> %tmp5, %tmp6 + ret <4 x i32> %tmp7 +} + +define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vld3laneQf: +;CHECK: vld3.32 + %tmp1 = load <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2 + %tmp6 = add <4 x float> %tmp3, %tmp4 + %tmp7 = add <4 x float> %tmp5, %tmp6 + ret <4 x float> %tmp7 +} + declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly + %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } From evan.cheng at apple.com Thu Oct 8 17:28:29 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 8 Oct 2009 15:28:29 -0700 Subject: [llvm-commits] [llvm] r83572 - in /llvm/trunk: lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp test/CodeGen/ARM/t2-imm.ll In-Reply-To: <200910082043.n98KhMS1031451@zion.cs.uiuc.edu> References: <200910082043.n98KhMS1031451@zion.cs.uiuc.edu> Message-ID: <6E7DA8A2-5D85-4CEF-8F22-4FA1FB21FC35@apple.com> Should this be controlled by -asm-verbose? Evan On Oct 8, 2009, at 1:43 PM, Anton Korobeynikov wrote: > Author: asl > Date: Thu Oct 8 15:43:22 2009 > New Revision: 83572 > > URL: http://llvm.org/viewvc/llvm-project?rev=83572&view=rev > Log: > Use lower16 / upper16 imm modifiers to asmprint 32-bit imms splitted > via movt/movw pair. > > Modified: > llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp > llvm/trunk/test/CodeGen/ARM/t2-imm.ll > > Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=83572&r1=83571&r2=83572&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original) > +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Thu Oct > 8 15:43:22 2009 > @@ -329,13 +329,14 @@ > } > case MachineOperand::MO_Immediate: { > int64_t Imm = MO.getImm(); > + O << '#'; > if (Modifier) { > if (strcmp(Modifier, "lo16") == 0) > - Imm = Imm & 0xffffLL; > + O << ":lower16:"; > else if (strcmp(Modifier, "hi16") == 0) > - Imm = (Imm & 0xffff0000LL) >> 16; > + O << ":upper16:"; > } > - O << '#' << Imm; > + O << Imm; > break; > } > case MachineOperand::MO_MachineBasicBlock: > > Modified: llvm/trunk/test/CodeGen/ARM/t2-imm.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/t2-imm.ll?rev=83572&r1=83571&r2=83572&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/test/CodeGen/ARM/t2-imm.ll (original) > +++ llvm/trunk/test/CodeGen/ARM/t2-imm.ll Thu Oct 8 15:43:22 2009 > @@ -2,8 +2,8 @@ > > define i32 @f6(i32 %a) { > ; CHECK:f6 > -; CHECK: movw r0, #1123 > -; CHECK: movt r0, #1000 > +; CHECK: movw r0, #:lower16:65537123 > +; CHECK: movt r0, #:upper16:65537123 > %tmp = add i32 0, 65537123 > ret i32 %tmp > } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From asl at math.spbu.ru Thu Oct 8 17:33:42 2009 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Fri, 9 Oct 2009 02:33:42 +0400 Subject: [llvm-commits] [llvm] r83572 - in /llvm/trunk: lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp test/CodeGen/ARM/t2-imm.ll In-Reply-To: <6E7DA8A2-5D85-4CEF-8F22-4FA1FB21FC35@apple.com> References: <200910082043.n98KhMS1031451@zion.cs.uiuc.edu> <6E7DA8A2-5D85-4CEF-8F22-4FA1FB21FC35@apple.com> Message-ID: > Should this be controlled by -asm-verbose? No, for example, there is no other way to split address of global into low and upper 16 bits. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From bob.wilson at apple.com Thu Oct 8 17:33:53 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 08 Oct 2009 22:33:53 -0000 Subject: [llvm-commits] [llvm] r83587 - in /llvm/trunk/test/CodeGen/ARM: vqabs.ll vqadd.ll vqdmlal.ll vqdmlsl.ll vqdmulh.ll vqdmull.ll vqmovn.ll vqneg.ll vqrshl.ll vqrshrn.ll vqshl.ll vqshrn.ll vqsub.ll Message-ID: <200910082233.n98MXrZw003654@zion.cs.uiuc.edu> Author: bwilson Date: Thu Oct 8 17:33:53 2009 New Revision: 83587 URL: http://llvm.org/viewvc/llvm-project?rev=83587&view=rev Log: Convert more NEON tests to use FileCheck. Modified: llvm/trunk/test/CodeGen/ARM/vqabs.ll llvm/trunk/test/CodeGen/ARM/vqadd.ll llvm/trunk/test/CodeGen/ARM/vqdmlal.ll llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll llvm/trunk/test/CodeGen/ARM/vqdmulh.ll llvm/trunk/test/CodeGen/ARM/vqdmull.ll llvm/trunk/test/CodeGen/ARM/vqmovn.ll llvm/trunk/test/CodeGen/ARM/vqneg.ll llvm/trunk/test/CodeGen/ARM/vqrshl.ll llvm/trunk/test/CodeGen/ARM/vqrshrn.ll llvm/trunk/test/CodeGen/ARM/vqshl.ll llvm/trunk/test/CodeGen/ARM/vqshrn.ll llvm/trunk/test/CodeGen/ARM/vqsub.ll Modified: llvm/trunk/test/CodeGen/ARM/vqabs.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqabs.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqabs.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqabs.ll Thu Oct 8 17:33:53 2009 @@ -1,39 +1,48 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqabs\\.s8} %t | count 2 -; RUN: grep {vqabs\\.s16} %t | count 2 -; RUN: grep {vqabs\\.s32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind { +;CHECK: vqabss8: +;CHECK: vqabs.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp2 } define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind { +;CHECK: vqabss16: +;CHECK: vqabs.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind { +;CHECK: vqabss32: +;CHECK: vqabs.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp2 } define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind { +;CHECK: vqabsQs8: +;CHECK: vqabs.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp2 } define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind { +;CHECK: vqabsQs16: +;CHECK: vqabs.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1) ret <8 x i16> %tmp2 } define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind { +;CHECK: vqabsQs32: +;CHECK: vqabs.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vqadd.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqadd.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqadd.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqadd.ll Thu Oct 8 17:33:53 2009 @@ -1,14 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqadd\\.s8} %t | count 2 -; RUN: grep {vqadd\\.s16} %t | count 2 -; RUN: grep {vqadd\\.s32} %t | count 2 -; RUN: grep {vqadd\\.s64} %t | count 2 -; RUN: grep {vqadd\\.u8} %t | count 2 -; RUN: grep {vqadd\\.u16} %t | count 2 -; RUN: grep {vqadd\\.u32} %t | count 2 -; RUN: grep {vqadd\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqadds8: +;CHECK: vqadd.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -16,6 +10,8 @@ } define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqadds16: +;CHECK: vqadd.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -23,6 +19,8 @@ } define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqadds32: +;CHECK: vqadd.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -30,6 +28,8 @@ } define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqadds64: +;CHECK: vqadd.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -37,6 +37,8 @@ } define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqaddu8: +;CHECK: vqadd.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -44,6 +46,8 @@ } define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqaddu16: +;CHECK: vqadd.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -51,6 +55,8 @@ } define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqaddu32: +;CHECK: vqadd.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -58,6 +64,8 @@ } define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqaddu64: +;CHECK: vqadd.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -65,6 +73,8 @@ } define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqaddQs8: +;CHECK: vqadd.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -72,6 +82,8 @@ } define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqaddQs16: +;CHECK: vqadd.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -79,6 +91,8 @@ } define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqaddQs32: +;CHECK: vqadd.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -86,6 +100,8 @@ } define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqaddQs64: +;CHECK: vqadd.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -93,6 +109,8 @@ } define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqaddQu8: +;CHECK: vqadd.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -100,6 +118,8 @@ } define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqaddQu16: +;CHECK: vqadd.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -107,6 +127,8 @@ } define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqaddQu32: +;CHECK: vqadd.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -114,6 +136,8 @@ } define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqaddQu64: +;CHECK: vqadd.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vqdmlal.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmlal.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqdmlal.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqdmlal.ll Thu Oct 8 17:33:53 2009 @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqdmlal\\.s16} %t | count 1 -; RUN: grep {vqdmlal\\.s32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vqdmlals16: +;CHECK: vqdmlal.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -11,6 +11,8 @@ } define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vqdmlals32: +;CHECK: vqdmlal.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C Modified: llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll Thu Oct 8 17:33:53 2009 @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqdmlsl\\.s16} %t | count 1 -; RUN: grep {vqdmlsl\\.s32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vqdmlsls16: +;CHECK: vqdmlsl.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -11,6 +11,8 @@ } define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vqdmlsls32: +;CHECK: vqdmlsl.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C Modified: llvm/trunk/test/CodeGen/ARM/vqdmulh.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmulh.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqdmulh.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqdmulh.ll Thu Oct 8 17:33:53 2009 @@ -1,10 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqdmulh\\.s16} %t | count 2 -; RUN: grep {vqdmulh\\.s32} %t | count 2 -; RUN: grep {vqrdmulh\\.s16} %t | count 2 -; RUN: grep {vqrdmulh\\.s32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqdmulhs16: +;CHECK: vqdmulh.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -12,6 +10,8 @@ } define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqdmulhs32: +;CHECK: vqdmulh.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -19,6 +19,8 @@ } define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqdmulhQs16: +;CHECK: vqdmulh.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -26,6 +28,8 @@ } define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqdmulhQs32: +;CHECK: vqdmulh.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -39,6 +43,8 @@ declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqrdmulhs16: +;CHECK: vqrdmulh.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -46,6 +52,8 @@ } define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqrdmulhs32: +;CHECK: vqrdmulh.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -53,6 +61,8 @@ } define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqrdmulhQs16: +;CHECK: vqrdmulh.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -60,6 +70,8 @@ } define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqrdmulhQs32: +;CHECK: vqrdmulh.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vqdmull.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmull.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqdmull.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqdmull.ll Thu Oct 8 17:33:53 2009 @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqdmull\\.s16} %t | count 1 -; RUN: grep {vqdmull\\.s32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqdmulls16: +;CHECK: vqdmull.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -10,6 +10,8 @@ } define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqdmulls32: +;CHECK: vqdmull.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vqmovn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqmovn.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqmovn.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqmovn.ll Thu Oct 8 17:33:53 2009 @@ -1,63 +1,72 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqmovn\\.s16} %t | count 1 -; RUN: grep {vqmovn\\.s32} %t | count 1 -; RUN: grep {vqmovn\\.s64} %t | count 1 -; RUN: grep {vqmovn\\.u16} %t | count 1 -; RUN: grep {vqmovn\\.u32} %t | count 1 -; RUN: grep {vqmovn\\.u64} %t | count 1 -; RUN: grep {vqmovun\\.s16} %t | count 1 -; RUN: grep {vqmovun\\.s32} %t | count 1 -; RUN: grep {vqmovun\\.s64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind { +;CHECK: vqmovns16: +;CHECK: vqmovn.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1) ret <8 x i8> %tmp2 } define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind { +;CHECK: vqmovns32: +;CHECK: vqmovn.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind { +;CHECK: vqmovns64: +;CHECK: vqmovn.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1) ret <2 x i32> %tmp2 } define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind { +;CHECK: vqmovnu16: +;CHECK: vqmovn.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1) ret <8 x i8> %tmp2 } define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind { +;CHECK: vqmovnu32: +;CHECK: vqmovn.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind { +;CHECK: vqmovnu64: +;CHECK: vqmovn.u64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1) ret <2 x i32> %tmp2 } define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind { +;CHECK: vqmovuns16: +;CHECK: vqmovun.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1) ret <8 x i8> %tmp2 } define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind { +;CHECK: vqmovuns32: +;CHECK: vqmovun.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind { +;CHECK: vqmovuns64: +;CHECK: vqmovun.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1) ret <2 x i32> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vqneg.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqneg.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqneg.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqneg.ll Thu Oct 8 17:33:53 2009 @@ -1,39 +1,48 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqneg\\.s8} %t | count 2 -; RUN: grep {vqneg\\.s16} %t | count 2 -; RUN: grep {vqneg\\.s32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind { +;CHECK: vqnegs8: +;CHECK: vqneg.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp2 } define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind { +;CHECK: vqnegs16: +;CHECK: vqneg.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind { +;CHECK: vqnegs32: +;CHECK: vqneg.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp2 } define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind { +;CHECK: vqnegQs8: +;CHECK: vqneg.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp2 } define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind { +;CHECK: vqnegQs16: +;CHECK: vqneg.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1) ret <8 x i16> %tmp2 } define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind { +;CHECK: vqnegQs32: +;CHECK: vqneg.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vqrshl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqrshl.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqrshl.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqrshl.ll Thu Oct 8 17:33:53 2009 @@ -1,14 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqrshl\\.s8} %t | count 2 -; RUN: grep {vqrshl\\.s16} %t | count 2 -; RUN: grep {vqrshl\\.s32} %t | count 2 -; RUN: grep {vqrshl\\.s64} %t | count 2 -; RUN: grep {vqrshl\\.u8} %t | count 2 -; RUN: grep {vqrshl\\.u16} %t | count 2 -; RUN: grep {vqrshl\\.u32} %t | count 2 -; RUN: grep {vqrshl\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqrshls8: +;CHECK: vqrshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -16,6 +10,8 @@ } define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqrshls16: +;CHECK: vqrshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -23,6 +19,8 @@ } define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqrshls32: +;CHECK: vqrshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -30,6 +28,8 @@ } define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqrshls64: +;CHECK: vqrshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -37,6 +37,8 @@ } define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqrshlu8: +;CHECK: vqrshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -44,6 +46,8 @@ } define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqrshlu16: +;CHECK: vqrshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -51,6 +55,8 @@ } define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqrshlu32: +;CHECK: vqrshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -58,6 +64,8 @@ } define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqrshlu64: +;CHECK: vqrshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -65,6 +73,8 @@ } define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqrshlQs8: +;CHECK: vqrshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -72,6 +82,8 @@ } define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqrshlQs16: +;CHECK: vqrshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -79,6 +91,8 @@ } define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqrshlQs32: +;CHECK: vqrshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -86,6 +100,8 @@ } define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqrshlQs64: +;CHECK: vqrshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -93,6 +109,8 @@ } define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqrshlQu8: +;CHECK: vqrshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -100,6 +118,8 @@ } define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqrshlQu16: +;CHECK: vqrshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -107,6 +127,8 @@ } define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqrshlQu32: +;CHECK: vqrshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -114,6 +136,8 @@ } define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqrshlQu64: +;CHECK: vqrshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) Modified: llvm/trunk/test/CodeGen/ARM/vqrshrn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqrshrn.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqrshrn.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqrshrn.ll Thu Oct 8 17:33:53 2009 @@ -1,63 +1,72 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqrshrn\\.s16} %t | count 1 -; RUN: grep {vqrshrn\\.s32} %t | count 1 -; RUN: grep {vqrshrn\\.s64} %t | count 1 -; RUN: grep {vqrshrn\\.u16} %t | count 1 -; RUN: grep {vqrshrn\\.u32} %t | count 1 -; RUN: grep {vqrshrn\\.u64} %t | count 1 -; RUN: grep {vqrshrun\\.s16} %t | count 1 -; RUN: grep {vqrshrun\\.s32} %t | count 1 -; RUN: grep {vqrshrun\\.s64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind { +;CHECK: vqrshrns8: +;CHECK: vqrshrn.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind { +;CHECK: vqrshrns16: +;CHECK: vqrshrn.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind { +;CHECK: vqrshrns32: +;CHECK: vqrshrn.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 } define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind { +;CHECK: vqrshrnu8: +;CHECK: vqrshrn.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind { +;CHECK: vqrshrnu16: +;CHECK: vqrshrn.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind { +;CHECK: vqrshrnu32: +;CHECK: vqrshrn.u64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 } define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind { +;CHECK: vqrshruns8: +;CHECK: vqrshrun.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind { +;CHECK: vqrshruns16: +;CHECK: vqrshrun.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind { +;CHECK: vqrshruns32: +;CHECK: vqrshrun.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vqshl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqshl.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqshl.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqshl.ll Thu Oct 8 17:33:53 2009 @@ -1,26 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqshl\\.s8} %t | count 4 -; RUN: grep {vqshl\\.s16} %t | count 4 -; RUN: grep {vqshl\\.s32} %t | count 4 -; RUN: grep {vqshl\\.s64} %t | count 4 -; RUN: grep {vqshl\\.u8} %t | count 4 -; RUN: grep {vqshl\\.u16} %t | count 4 -; RUN: grep {vqshl\\.u32} %t | count 4 -; RUN: grep {vqshl\\.u64} %t | count 4 -; RUN: grep {vqshl\\.s8.*#7} %t | count 2 -; RUN: grep {vqshl\\.s16.*#15} %t | count 2 -; RUN: grep {vqshl\\.s32.*#31} %t | count 2 -; RUN: grep {vqshl\\.s64.*#63} %t | count 2 -; RUN: grep {vqshl\\.u8.*#7} %t | count 2 -; RUN: grep {vqshl\\.u16.*#15} %t | count 2 -; RUN: grep {vqshl\\.u32.*#31} %t | count 2 -; RUN: grep {vqshl\\.u64.*#63} %t | count 2 -; RUN: grep {vqshlu\\.s8} %t | count 2 -; RUN: grep {vqshlu\\.s16} %t | count 2 -; RUN: grep {vqshlu\\.s32} %t | count 2 -; RUN: grep {vqshlu\\.s64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqshls8: +;CHECK: vqshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -28,6 +10,8 @@ } define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqshls16: +;CHECK: vqshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -35,6 +19,8 @@ } define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqshls32: +;CHECK: vqshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -42,6 +28,8 @@ } define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqshls64: +;CHECK: vqshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -49,6 +37,8 @@ } define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqshlu8: +;CHECK: vqshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -56,6 +46,8 @@ } define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqshlu16: +;CHECK: vqshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -63,6 +55,8 @@ } define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqshlu32: +;CHECK: vqshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -70,6 +64,8 @@ } define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqshlu64: +;CHECK: vqshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -77,6 +73,8 @@ } define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqshlQs8: +;CHECK: vqshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -84,6 +82,8 @@ } define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqshlQs16: +;CHECK: vqshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -91,6 +91,8 @@ } define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqshlQs32: +;CHECK: vqshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -98,6 +100,8 @@ } define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqshlQs64: +;CHECK: vqshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -105,6 +109,8 @@ } define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqshlQu8: +;CHECK: vqshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -112,6 +118,8 @@ } define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqshlQu16: +;CHECK: vqshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -119,6 +127,8 @@ } define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqshlQu32: +;CHECK: vqshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -126,6 +136,8 @@ } define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqshlQu64: +;CHECK: vqshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -133,144 +145,192 @@ } define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind { +;CHECK: vqshls_n8: +;CHECK: vqshl.s8{{.*#7}} %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind { +;CHECK: vqshls_n16: +;CHECK: vqshl.s16{{.*#15}} %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind { +;CHECK: vqshls_n32: +;CHECK: vqshl.s32{{.*#31}} %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind { +;CHECK: vqshls_n64: +;CHECK: vqshl.s64{{.*#63}} %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind { +;CHECK: vqshlu_n8: +;CHECK: vqshl.u8{{.*#7}} %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind { +;CHECK: vqshlu_n16: +;CHECK: vqshl.u16{{.*#15}} %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind { +;CHECK: vqshlu_n32: +;CHECK: vqshl.u32{{.*#31}} %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind { +;CHECK: vqshlu_n64: +;CHECK: vqshl.u64{{.*#63}} %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind { +;CHECK: vqshlsu_n8: +;CHECK: vqshlu.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind { +;CHECK: vqshlsu_n16: +;CHECK: vqshlu.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind { +;CHECK: vqshlsu_n32: +;CHECK: vqshlu.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind { +;CHECK: vqshlsu_n64: +;CHECK: vqshlu.s64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind { +;CHECK: vqshlQs_n8: +;CHECK: vqshl.s8{{.*#7}} %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind { +;CHECK: vqshlQs_n16: +;CHECK: vqshl.s16{{.*#15}} %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind { +;CHECK: vqshlQs_n32: +;CHECK: vqshl.s32{{.*#31}} %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind { +;CHECK: vqshlQs_n64: +;CHECK: vqshl.s64{{.*#63}} %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 } define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind { +;CHECK: vqshlQu_n8: +;CHECK: vqshl.u8{{.*#7}} %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind { +;CHECK: vqshlQu_n16: +;CHECK: vqshl.u16{{.*#15}} %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind { +;CHECK: vqshlQu_n32: +;CHECK: vqshl.u32{{.*#31}} %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind { +;CHECK: vqshlQu_n64: +;CHECK: vqshl.u64{{.*#63}} %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 } define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind { +;CHECK: vqshlQsu_n8: +;CHECK: vqshlu.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind { +;CHECK: vqshlQsu_n16: +;CHECK: vqshlu.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind { +;CHECK: vqshlQsu_n32: +;CHECK: vqshlu.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind { +;CHECK: vqshlQsu_n64: +;CHECK: vqshlu.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vqshrn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqshrn.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqshrn.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqshrn.ll Thu Oct 8 17:33:53 2009 @@ -1,63 +1,72 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqshrn\\.s16} %t | count 1 -; RUN: grep {vqshrn\\.s32} %t | count 1 -; RUN: grep {vqshrn\\.s64} %t | count 1 -; RUN: grep {vqshrn\\.u16} %t | count 1 -; RUN: grep {vqshrn\\.u32} %t | count 1 -; RUN: grep {vqshrn\\.u64} %t | count 1 -; RUN: grep {vqshrun\\.s16} %t | count 1 -; RUN: grep {vqshrun\\.s32} %t | count 1 -; RUN: grep {vqshrun\\.s64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind { +;CHECK: vqshrns8: +;CHECK: vqshrn.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind { +;CHECK: vqshrns16: +;CHECK: vqshrn.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind { +;CHECK: vqshrns32: +;CHECK: vqshrn.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 } define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind { +;CHECK: vqshrnu8: +;CHECK: vqshrn.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind { +;CHECK: vqshrnu16: +;CHECK: vqshrn.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind { +;CHECK: vqshrnu32: +;CHECK: vqshrn.u64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 } define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind { +;CHECK: vqshruns8: +;CHECK: vqshrun.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind { +;CHECK: vqshruns16: +;CHECK: vqshrun.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind { +;CHECK: vqshruns32: +;CHECK: vqshrun.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 Modified: llvm/trunk/test/CodeGen/ARM/vqsub.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqsub.ll?rev=83587&r1=83586&r2=83587&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vqsub.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vqsub.ll Thu Oct 8 17:33:53 2009 @@ -1,14 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vqsub\\.s8} %t | count 2 -; RUN: grep {vqsub\\.s16} %t | count 2 -; RUN: grep {vqsub\\.s32} %t | count 2 -; RUN: grep {vqsub\\.s64} %t | count 2 -; RUN: grep {vqsub\\.u8} %t | count 2 -; RUN: grep {vqsub\\.u16} %t | count 2 -; RUN: grep {vqsub\\.u32} %t | count 2 -; RUN: grep {vqsub\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqsubs8: +;CHECK: vqsub.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -16,6 +10,8 @@ } define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqsubs16: +;CHECK: vqsub.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -23,6 +19,8 @@ } define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqsubs32: +;CHECK: vqsub.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -30,6 +28,8 @@ } define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqsubs64: +;CHECK: vqsub.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -37,6 +37,8 @@ } define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqsubu8: +;CHECK: vqsub.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -44,6 +46,8 @@ } define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqsubu16: +;CHECK: vqsub.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -51,6 +55,8 @@ } define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqsubu32: +;CHECK: vqsub.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -58,6 +64,8 @@ } define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqsubu64: +;CHECK: vqsub.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -65,6 +73,8 @@ } define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqsubQs8: +;CHECK: vqsub.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -72,6 +82,8 @@ } define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqsubQs16: +;CHECK: vqsub.s16 %tmp1 = load <8 x i16>* %