From anton at korobeynikov.info Mon Feb 7 01:26:43 2011
From: anton at korobeynikov.info (Anton Korobeynikov)
Date: Mon, 7 Feb 2011 10:26:43 +0300
Subject: [llvm-commits] [llvm] r124995 - in /llvm/trunk:
lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMBuildAttrs.h
test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
In-Reply-To: Syntax:
- <result> = shl <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = shl <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = shl nuw <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = shl nsw <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = shl nuw nsw <ty> <op1>, <op2> ; yields {ty}:result
Overview:
@@ -3704,6 +3707,14 @@
vectors, each vector element of op1 is shifted by the corresponding
shift amount in op2.
If the nuw keyword is present, then the shift produces a + trap value if it shifts out any non-zero bits. If + the nsw keywrod is present, then the shift produces a + trap value if it shifts out any bits that disagree + with the resultant sign bit. As such, NUW/NSW have the same semantics as + they would if the shift were expressed as a mul instruction with the same + nsw/nuw bits in (mul %op1, (shl 1, %op2)).
+
<result> = shl i32 4, %var ; yields {i32}: 4 << %var
@@ -3723,7 +3734,8 @@
Syntax:
- <result> = lshr <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = lshr <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = lshr exact <ty> <op1>, <op2> ; yields {ty}:result
Overview:
@@ -3743,6 +3755,11 @@
vectors, each vector element of op1 is shifted by the corresponding
shift amount in op2.
+If the exact keyword is present, the result value of the
+ lshr is a trap value if any of the bits
+ shifted out are non-zero.
+
+
Example:
<result> = lshr i32 4, 1 ; yields {i32}:result = 2
@@ -3762,7 +3779,8 @@
Syntax:
- <result> = ashr <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = ashr <ty> <op1>, <op2> ; yields {ty}:result
+ <result> = ashr exact <ty> <op1>, <op2> ; yields {ty}:result
Overview:
@@ -3783,6 +3801,10 @@
the arguments are vectors, each vector element of op1 is shifted by
the corresponding shift amount in op2.
+If the exact keyword is present, the result value of the
+ ashr is a trap value if any of the bits
+ shifted out are non-zero.
+
Example:
<result> = ashr i32 4, 1 ; yields {i32}:result = 2
Modified: llvm/trunk/include/llvm/Constants.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=125006&r1=125005&r2=125006&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Constants.h (original)
+++ llvm/trunk/include/llvm/Constants.h Mon Feb 7 10:40:21 2011
@@ -724,8 +724,12 @@
static Constant *getNUWSub(Constant *C1, Constant *C2);
static Constant *getNSWMul(Constant *C1, Constant *C2);
static Constant *getNUWMul(Constant *C1, Constant *C2);
+ static Constant *getNSWShl(Constant *C1, Constant *C2);
+ static Constant *getNUWShl(Constant *C1, Constant *C2);
static Constant *getExactSDiv(Constant *C1, Constant *C2);
static Constant *getExactUDiv(Constant *C1, Constant *C2);
+ static Constant *getExactAShr(Constant *C1, Constant *C2);
+ static Constant *getExactLShr(Constant *C1, Constant *C2);
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
Modified: llvm/trunk/include/llvm/Operator.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Operator.h?rev=125006&r1=125005&r2=125006&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Operator.h (original)
+++ llvm/trunk/include/llvm/Operator.h Mon Feb 7 10:40:21 2011
@@ -106,66 +106,14 @@
static inline bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Add ||
I->getOpcode() == Instruction::Sub ||
- I->getOpcode() == Instruction::Mul;
+ I->getOpcode() == Instruction::Mul ||
+ I->getOpcode() == Instruction::Shl;
}
static inline bool classof(const ConstantExpr *CE) {
return CE->getOpcode() == Instruction::Add ||
CE->getOpcode() == Instruction::Sub ||
- CE->getOpcode() == Instruction::Mul;
- }
- static inline bool classof(const Value *V) {
- return (isa(V) && classof(cast(V))) ||
- (isa(V) && classof(cast(V)));
- }
-};
-
-/// AddOperator - Utility class for integer addition operators.
-///
-class AddOperator : public OverflowingBinaryOperator {
- ~AddOperator(); // do not implement
-public:
- static inline bool classof(const AddOperator *) { return true; }
- static inline bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::Add;
- }
- static inline bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::Add;
- }
- static inline bool classof(const Value *V) {
- return (isa(V) && classof(cast(V))) ||
- (isa(V) && classof(cast(V)));
- }
-};
-
-/// SubOperator - Utility class for integer subtraction operators.
-///
-class SubOperator : public OverflowingBinaryOperator {
- ~SubOperator(); // do not implement
-public:
- static inline bool classof(const SubOperator *) { return true; }
- static inline bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::Sub;
- }
- static inline bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::Sub;
- }
- static inline bool classof(const Value *V) {
- return (isa(V) && classof(cast(V))) ||
- (isa(V) && classof(cast(V)));
- }
-};
-
-/// MulOperator - Utility class for integer multiplication operators.
-///
-class MulOperator : public OverflowingBinaryOperator {
- ~MulOperator(); // do not implement
-public:
- static inline bool classof(const MulOperator *) { return true; }
- static inline bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::Mul;
- }
- static inline bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::Mul;
+ CE->getOpcode() == Instruction::Mul ||
+ CE->getOpcode() == Instruction::Shl;
}
static inline bool classof(const Value *V) {
return (isa(V) && classof(cast(V))) ||
@@ -196,63 +144,74 @@
return SubclassOptionalData & IsExact;
}
- static inline bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::SDiv ||
- CE->getOpcode() == Instruction::UDiv;
- }
- static inline bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::SDiv ||
- I->getOpcode() == Instruction::UDiv;
- }
- static inline bool classof(const Value *V) {
- return (isa(V) && classof(cast(V))) ||
- (isa(V) && classof(cast(V)));
+ static bool isPossiblyExactOpcode(unsigned OpC) {
+ return OpC == Instruction::SDiv ||
+ OpC == Instruction::UDiv ||
+ OpC == Instruction::AShr ||
+ OpC == Instruction::LShr;
}
-};
-
-/// SDivOperator - An Operator with opcode Instruction::SDiv.
-///
-class SDivOperator : public PossiblyExactOperator {
-public:
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SDivOperator *) { return true; }
static inline bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::SDiv;
+ return isPossiblyExactOpcode(CE->getOpcode());
}
static inline bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::SDiv;
+ return isPossiblyExactOpcode(I->getOpcode());
}
static inline bool classof(const Value *V) {
return (isa(V) && classof(cast(V))) ||
(isa(V) && classof(cast(V)));
}
};
+
-/// UDivOperator - An Operator with opcode Instruction::UDiv.
-///
-class UDivOperator : public PossiblyExactOperator {
+
+/// ConcreteOperator - A helper template for defining operators for individual
+/// opcodes.
+template
+class ConcreteOperator : public SuperClass {
+ ~ConcreteOperator(); // DO NOT IMPLEMENT
public:
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const UDivOperator *) { return true; }
- static inline bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::UDiv;
+ static inline bool classof(const ConcreteOperator *) {
+ return true;
}
static inline bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::UDiv;
+ return I->getOpcode() == Opc;
+ }
+ static inline bool classof(const ConstantExpr *CE) {
+ return CE->getOpcode() == Opc;
}
static inline bool classof(const Value *V) {
return (isa(V) && classof(cast(V))) ||
- (isa(V) && classof(cast(V)));
+ (isa(V) && classof(cast(V)));
}
};
+
+class AddOperator
+ : public ConcreteOperator {};
+class SubOperator
+ : public ConcreteOperator {};
+class MulOperator
+ : public ConcreteOperator {};
+class ShlOperator
+ : public ConcreteOperator {};
+
+
+class SDivOperator
+ : public ConcreteOperator {};
+class UDivOperator
+ : public ConcreteOperator {};
+class AShrOperator
+ : public ConcreteOperator {};
+class LShrOperator
+ : public ConcreteOperator {};
+
-class GEPOperator : public Operator {
+
+class GEPOperator
+ : public ConcreteOperator {
enum {
IsInBounds = (1 << 0)
};
- ~GEPOperator(); // do not implement
-
friend class GetElementPtrInst;
friend class ConstantExpr;
void setIsInBounds(bool B) {
@@ -301,8 +260,8 @@
/// value, just potentially different types.
bool hasAllZeroIndices() const {
for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
- if (Constant *C = dyn_cast(I))
- if (C->isNullValue())
+ if (ConstantInt *C = dyn_cast(I))
+ if (C->isZero())
continue;
return false;
}
@@ -319,21 +278,6 @@
}
return true;
}
-
-
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const GEPOperator *) { return true; }
- static inline bool classof(const GetElementPtrInst *) { return true; }
- static inline bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::GetElementPtr;
- }
- static inline bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::GetElementPtr;
- }
- static inline bool classof(const Value *V) {
- return (isa(V) && classof(cast(V))) ||
- (isa(V) && classof(cast(V)));
- }
};
} // End llvm namespace
Modified: llvm/trunk/lib/AsmParser/LLParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.cpp?rev=125006&r1=125005&r2=125006&view=diff
==============================================================================
--- llvm/trunk/lib/AsmParser/LLParser.cpp (original)
+++ llvm/trunk/lib/AsmParser/LLParser.cpp Mon Feb 7 10:40:21 2011
@@ -2286,7 +2286,10 @@
case lltok::kw_fdiv:
case lltok::kw_urem:
case lltok::kw_srem:
- case lltok::kw_frem: {
+ case lltok::kw_frem:
+ case lltok::kw_shl:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
bool NUW = false;
bool NSW = false;
bool Exact = false;
@@ -2294,9 +2297,8 @@
Constant *Val0, *Val1;
Lex.Lex();
LocTy ModifierLoc = Lex.getLoc();
- if (Opc == Instruction::Add ||
- Opc == Instruction::Sub ||
- Opc == Instruction::Mul) {
+ if (Opc == Instruction::Add || Opc == Instruction::Sub ||
+ Opc == Instruction::Mul || Opc == Instruction::Shl) {
if (EatIfPresent(lltok::kw_nuw))
NUW = true;
if (EatIfPresent(lltok::kw_nsw)) {
@@ -2304,7 +2306,8 @@
if (EatIfPresent(lltok::kw_nuw))
NUW = true;
}
- } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv) {
+ } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr || Opc == Instruction::AShr) {
if (EatIfPresent(lltok::kw_exact))
Exact = true;
}
@@ -2331,6 +2334,9 @@
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::AShr:
+ case Instruction::LShr:
if (!Val0->getType()->isIntOrIntVectorTy())
return Error(ID.Loc, "constexpr requires integer operands");
break;
@@ -2355,9 +2361,6 @@
}
// Logical Operations
- case lltok::kw_shl:
- case lltok::kw_lshr:
- case lltok::kw_ashr:
case lltok::kw_and:
case lltok::kw_or:
case lltok::kw_xor: {
@@ -3002,55 +3005,38 @@
// Binary Operators.
case lltok::kw_add:
case lltok::kw_sub:
- case lltok::kw_mul: {
- bool NUW = false;
- bool NSW = false;
+ case lltok::kw_mul:
+ case lltok::kw_shl: {
LocTy ModifierLoc = Lex.getLoc();
- if (EatIfPresent(lltok::kw_nuw))
- NUW = true;
- if (EatIfPresent(lltok::kw_nsw)) {
- NSW = true;
- if (EatIfPresent(lltok::kw_nuw))
- NUW = true;
- }
- bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
- if (!Result) {
- if (!Inst->getType()->isIntOrIntVectorTy()) {
- if (NUW)
- return Error(ModifierLoc, "nuw only applies to integer operations");
- if (NSW)
- return Error(ModifierLoc, "nsw only applies to integer operations");
- }
- if (NUW)
- cast(Inst)->setHasNoUnsignedWrap(true);
- if (NSW)
- cast(Inst)->setHasNoSignedWrap(true);
- }
- return Result;
+ bool NUW = EatIfPresent(lltok::kw_nuw);
+ bool NSW = EatIfPresent(lltok::kw_nsw);
+ if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+
+ if (NUW) cast(Inst)->setHasNoUnsignedWrap(true);
+ if (NSW) cast(Inst)->setHasNoSignedWrap(true);
+ return false;
}
case lltok::kw_fadd:
case lltok::kw_fsub:
case lltok::kw_fmul: return ParseArithmetic(Inst, PFS, KeywordVal, 2);
case lltok::kw_sdiv:
- case lltok::kw_udiv: {
- bool Exact = false;
- if (EatIfPresent(lltok::kw_exact))
- Exact = true;
- bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
- if (!Result)
- if (Exact)
- cast(Inst)->setIsExact(true);
- return Result;
+ case lltok::kw_udiv:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
+ bool Exact = EatIfPresent(lltok::kw_exact);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+ if (Exact) cast(Inst)->setIsExact(true);
+ return false;
}
case lltok::kw_urem:
case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1);
case lltok::kw_fdiv:
case lltok::kw_frem: return ParseArithmetic(Inst, PFS, KeywordVal, 2);
- case lltok::kw_shl:
- case lltok::kw_lshr:
- case lltok::kw_ashr:
case lltok::kw_and:
case lltok::kw_or:
case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
Modified: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp?rev=125006&r1=125005&r2=125006&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp (original)
+++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp Mon Feb 7 10:40:21 2011
@@ -1085,13 +1085,16 @@
if (Record.size() >= 4) {
if (Opc == Instruction::Add ||
Opc == Instruction::Sub ||
- Opc == Instruction::Mul) {
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
Flags |= OverflowingBinaryOperator::NoSignedWrap;
if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
} else if (Opc == Instruction::SDiv ||
- Opc == Instruction::UDiv) {
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
if (Record[3] & (1 << bitc::PEO_EXACT))
Flags |= SDivOperator::IsExact;
}
@@ -1901,13 +1904,16 @@
if (OpNum < Record.size()) {
if (Opc == Instruction::Add ||
Opc == Instruction::Sub ||
- Opc == Instruction::Mul) {
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
cast(I)->setHasNoSignedWrap(true);
if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
cast(I)->setHasNoUnsignedWrap(true);
} else if (Opc == Instruction::SDiv ||
- Opc == Instruction::UDiv) {
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
if (Record[OpNum] & (1 << bitc::PEO_EXACT))
cast(I)->setIsExact(true);
}
Modified: llvm/trunk/lib/VMCore/Constants.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=125006&r1=125005&r2=125006&view=diff
==============================================================================
--- llvm/trunk/lib/VMCore/Constants.cpp (original)
+++ llvm/trunk/lib/VMCore/Constants.cpp Mon Feb 7 10:40:21 2011
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the Constant* classes.
+// This file implements the Constant *classes.
//
//===----------------------------------------------------------------------===//
@@ -72,7 +72,7 @@
}
}
-Constant* Constant::getIntegerValue(const Type *Ty, const APInt &V) {
+Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) {
const Type *ScalarTy = Ty->getScalarType();
// Create the base integer constant.
@@ -89,7 +89,7 @@
return C;
}
-Constant* Constant::getAllOnesValue(const Type *Ty) {
+Constant *Constant::getAllOnesValue(const Type *Ty) {
if (const IntegerType *ITy = dyn_cast(Ty))
return ConstantInt::get(Ty->getContext(),
APInt::getAllOnesValue(ITy->getBitWidth()));
@@ -296,7 +296,7 @@
return Slot;
}
-Constant* ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
+Constant *ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
Constant *C = get(cast(Ty->getScalarType()),
V, isSigned);
@@ -321,7 +321,7 @@
return get(Ty, V, true);
}
-Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
+Constant *ConstantInt::get(const Type* Ty, const APInt& V) {
ConstantInt *C = get(Ty->getContext(), V);
assert(C->getType() == Ty->getScalarType() &&
"ConstantInt type doesn't match the type implied by its value!");
@@ -360,7 +360,7 @@
/// get() - This returns a constant fp for the specified value in the
/// specified type. This should only be used for simple constant values like
/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant* ConstantFP::get(const Type* Ty, double V) {
+Constant *ConstantFP::get(const Type* Ty, double V) {
LLVMContext &Context = Ty->getContext();
APFloat FV(V);
@@ -378,7 +378,7 @@
}
-Constant* ConstantFP::get(const Type* Ty, StringRef Str) {
+Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
LLVMContext &Context = Ty->getContext();
APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
@@ -401,7 +401,7 @@
}
-Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) {
+Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) {
if (const VectorType *PTy = dyn_cast(Ty))
if (PTy->getElementType()->isFloatingPointTy()) {
std::vector zeros(PTy->getNumElements(),
@@ -509,7 +509,7 @@
}
-Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
+Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals,
unsigned NumVals) {
// FIXME: make this the primary ctor method.
return get(T, std::vector(Vals, Vals+NumVals));
@@ -521,7 +521,7 @@
/// Otherwise, the length parameter specifies how much of the string to use
/// and it won't be null terminated.
///
-Constant* ConstantArray::get(LLVMContext &Context, StringRef Str,
+Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
bool AddNull) {
std::vector ElementVals;
ElementVals.reserve(Str.size() + size_t(AddNull));
@@ -557,7 +557,7 @@
}
// ConstantStruct accessors.
-Constant* ConstantStruct::get(const StructType* T,
+Constant *ConstantStruct::get(const StructType* T,
const std::vector& V) {
LLVMContextImpl* pImpl = T->getContext().pImpl;
@@ -569,7 +569,7 @@
return ConstantAggregateZero::get(T);
}
-Constant* ConstantStruct::get(LLVMContext &Context,
+Constant *ConstantStruct::get(LLVMContext &Context,
const std::vector& V, bool packed) {
std::vector StructEls;
StructEls.reserve(V.size());
@@ -578,8 +578,8 @@
return get(StructType::get(Context, StructEls, packed), V);
}
-Constant* ConstantStruct::get(LLVMContext &Context,
- Constant* const *Vals, unsigned NumVals,
+Constant *ConstantStruct::get(LLVMContext &Context,
+ Constant *const *Vals, unsigned NumVals,
bool Packed) {
// FIXME: make this the primary ctor method.
return get(Context, std::vector(Vals, Vals+NumVals), Packed);
@@ -601,7 +601,7 @@
}
// ConstantVector accessors.
-Constant* ConstantVector::get(const VectorType* T,
+Constant *ConstantVector::get(const VectorType* T,
const std::vector& V) {
assert(!V.empty() && "Vectors can't be empty");
LLVMContext &Context = T->getContext();
@@ -629,68 +629,89 @@
return pImpl->VectorConstants.getOrCreate(T, V);
}
-Constant* ConstantVector::get(const std::vector& V) {
+Constant *ConstantVector::get(const std::vector& V) {
assert(!V.empty() && "Cannot infer type if V is empty");
return get(VectorType::get(V.front()->getType(),V.size()), V);
}
-Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) {
+Constant *ConstantVector::get(Constant *const* Vals, unsigned NumVals) {
// FIXME: make this the primary ctor method.
return get(std::vector(Vals, Vals+NumVals));
}
-Constant* ConstantExpr::getNSWNeg(Constant* C) {
+Constant *ConstantExpr::getNSWNeg(Constant *C) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
}
-Constant* ConstantExpr::getNUWNeg(Constant* C) {
+Constant *ConstantExpr::getNUWNeg(Constant *C) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
}
-Constant* ConstantExpr::getNSWAdd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getNSWAdd(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::Add, C1, C2,
OverflowingBinaryOperator::NoSignedWrap);
}
-Constant* ConstantExpr::getNUWAdd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getNUWAdd(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::Add, C1, C2,
OverflowingBinaryOperator::NoUnsignedWrap);
}
-Constant* ConstantExpr::getNSWSub(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getNSWSub(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::Sub, C1, C2,
OverflowingBinaryOperator::NoSignedWrap);
}
-Constant* ConstantExpr::getNUWSub(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getNUWSub(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::Sub, C1, C2,
OverflowingBinaryOperator::NoUnsignedWrap);
}
-Constant* ConstantExpr::getNSWMul(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getNSWMul(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::Mul, C1, C2,
OverflowingBinaryOperator::NoSignedWrap);
}
-Constant* ConstantExpr::getNUWMul(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getNUWMul(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::Mul, C1, C2,
OverflowingBinaryOperator::NoUnsignedWrap);
}
-Constant* ConstantExpr::getExactSDiv(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getNSWShl(Constant *C1, Constant *C2) {
+ return getTy(C1->getType(), Instruction::Shl, C1, C2,
+ OverflowingBinaryOperator::NoSignedWrap);
+}
+
+Constant *ConstantExpr::getNUWShl(Constant *C1, Constant *C2) {
+ return getTy(C1->getType(), Instruction::Shl, C1, C2,
+ OverflowingBinaryOperator::NoUnsignedWrap);
+}
+
+Constant *ConstantExpr::getExactSDiv(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::SDiv, C1, C2,
PossiblyExactOperator::IsExact);
}
-Constant* ConstantExpr::getExactUDiv(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getExactUDiv(Constant *C1, Constant *C2) {
return getTy(C1->getType(), Instruction::UDiv, C1, C2,
PossiblyExactOperator::IsExact);
}
+Constant *ConstantExpr::getExactAShr(Constant *C1, Constant *C2) {
+ return getTy(C1->getType(), Instruction::AShr, C1, C2,
+ PossiblyExactOperator::IsExact);
+}
+
+Constant *ConstantExpr::getExactLShr(Constant *C1, Constant *C2) {
+ return getTy(C1->getType(), Instruction::LShr, C1, C2,
+ PossiblyExactOperator::IsExact);
+}
+
+
// Utility function for determining if a ConstantExpr is a CastOp or not. This
// can't be inline because we don't want to #include Instruction.h into
// Constant.h
@@ -816,7 +837,7 @@
/// operands replaced with the specified values. The specified operands must
/// match count and type with the existing ones.
Constant *ConstantExpr::
-getWithOperands(Constant* const *Ops, unsigned NumOps) const {
+getWithOperands(Constant *const *Ops, unsigned NumOps) const {
assert(NumOps == getNumOperands() && "Operand count mismatch!");
bool AnyChange = false;
for (unsigned i = 0; i != NumOps; ++i) {
@@ -1486,7 +1507,7 @@
return getTy(C1->getType(), Opcode, C1, C2, Flags);
}
-Constant* ConstantExpr::getSizeOf(const Type* Ty) {
+Constant *ConstantExpr::getSizeOf(const Type* Ty) {
// sizeof is implemented as: (i64) gep (Ty*)null, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
@@ -1496,7 +1517,7 @@
Type::getInt64Ty(Ty->getContext()));
}
-Constant* ConstantExpr::getAlignOf(const Type* Ty) {
+Constant *ConstantExpr::getAlignOf(const Type* Ty) {
// alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
const Type *AligningTy = StructType::get(Ty->getContext(),
@@ -1510,12 +1531,12 @@
Type::getInt64Ty(Ty->getContext()));
}
-Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
FieldNo));
}
-Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
+Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
// offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
// Note that a non-inbounds gep is used, as null isn't within any object.
Constant *GEPIdx[] = {
@@ -1823,7 +1844,7 @@
return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
}
-Constant* ConstantExpr::getNeg(Constant* C) {
+Constant *ConstantExpr::getNeg(Constant *C) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
return get(Instruction::Sub,
@@ -1831,7 +1852,7 @@
C);
}
-Constant* ConstantExpr::getFNeg(Constant* C) {
+Constant *ConstantExpr::getFNeg(Constant *C) {
assert(C->getType()->isFPOrFPVectorTy() &&
"Cannot FNEG a non-floating-point value!");
return get(Instruction::FSub,
@@ -1839,81 +1860,81 @@
C);
}
-Constant* ConstantExpr::getNot(Constant* C) {
+Constant *ConstantExpr::getNot(Constant *C) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NOT a nonintegral value!");
return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
}
-Constant* ConstantExpr::getAdd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2) {
return get(Instruction::Add, C1, C2);
}
-Constant* ConstantExpr::getFAdd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
return get(Instruction::FAdd, C1, C2);
}
-Constant* ConstantExpr::getSub(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2) {
return get(Instruction::Sub, C1, C2);
}
-Constant* ConstantExpr::getFSub(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
return get(Instruction::FSub, C1, C2);
}
-Constant* ConstantExpr::getMul(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2) {
return get(Instruction::Mul, C1, C2);
}
-Constant* ConstantExpr::getFMul(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
return get(Instruction::FMul, C1, C2);
}
-Constant* ConstantExpr::getUDiv(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2) {
return get(Instruction::UDiv, C1, C2);
}
-Constant* ConstantExpr::getSDiv(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2) {
return get(Instruction::SDiv, C1, C2);
}
-Constant* ConstantExpr::getFDiv(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
return get(Instruction::FDiv, C1, C2);
}
-Constant* ConstantExpr::getURem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
return get(Instruction::URem, C1, C2);
}
-Constant* ConstantExpr::getSRem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
return get(Instruction::SRem, C1, C2);
}
-Constant* ConstantExpr::getFRem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
return get(Instruction::FRem, C1, C2);
}
-Constant* ConstantExpr::getAnd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
return get(Instruction::And, C1, C2);
}
-Constant* ConstantExpr::getOr(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
return get(Instruction::Or, C1, C2);
}
-Constant* ConstantExpr::getXor(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
return get(Instruction::Xor, C1, C2);
}
-Constant* ConstantExpr::getShl(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2) {
return get(Instruction::Shl, C1, C2);
}
-Constant* ConstantExpr::getLShr(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2) {
return get(Instruction::LShr, C1, C2);
}
-Constant* ConstantExpr::getAShr(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2) {
return get(Instruction::AShr, C1, C2);
}
Modified: llvm/trunk/test/Assembler/2003-05-21-MalformedShiftCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Assembler/2003-05-21-MalformedShiftCrash.ll?rev=125006&r1=125005&r2=125006&view=diff
==============================================================================
--- llvm/trunk/test/Assembler/2003-05-21-MalformedShiftCrash.ll (original)
+++ llvm/trunk/test/Assembler/2003-05-21-MalformedShiftCrash.ll Mon Feb 7 10:40:21 2011
@@ -1,4 +1,4 @@
; Found by inspection of the code
-; RUN: not llvm-as < %s > /dev/null |& grep {constexpr requires integer or integer vector operands}
+; RUN: not llvm-as < %s > /dev/null |& grep {constexpr requires integer operands}
global i32 ashr (float 1.0, float 2.0)
Modified: llvm/trunk/test/Assembler/flags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Assembler/flags.ll?rev=125006&r1=125005&r2=125006&view=diff
==============================================================================
--- llvm/trunk/test/Assembler/flags.ll (original)
+++ llvm/trunk/test/Assembler/flags.ll Mon Feb 7 10:40:21 2011
@@ -92,6 +92,12 @@
ret i64 %z
}
+define i64 @shl_both(i64 %x, i64 %y) {
+; CHECK: %z = shl nuw nsw i64 %x, %y
+ %z = shl nuw nsw i64 %x, %y
+ ret i64 %z
+}
+
define i64 @sdiv_exact(i64 %x, i64 %y) {
; CHECK: %z = sdiv exact i64 %x, %y
%z = sdiv exact i64 %x, %y
@@ -116,6 +122,29 @@
ret i64 %z
}
+define i64 @ashr_plain(i64 %x, i64 %y) {
+; CHECK: %z = ashr i64 %x, %y
+ %z = ashr i64 %x, %y
+ ret i64 %z
+}
+
+define i64 @ashr_exact(i64 %x, i64 %y) {
+; CHECK: %z = ashr exact i64 %x, %y
+ %z = ashr exact i64 %x, %y
+ ret i64 %z
+}
+
+define i64 @lshr_plain(i64 %x, i64 %y) {
+; CHECK: %z = lshr i64 %x, %y
+ %z = lshr i64 %x, %y
+ ret i64 %z
+}
+
+define i64 @lshr_exact(i64 %x, i64 %y) {
+; CHECK: %z = lshr exact i64 %x, %y
+ %z = lshr exact i64 %x, %y
+ ret i64 %z
+}
define i64* @gep_nw(i64* %p, i64 %x) {
; CHECK: %z = getelementptr inbounds i64* %p, i64 %x
@@ -154,6 +183,16 @@
ret i64 udiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
}
+define i64 @ashr_exact_ce() {
+; CHECK: ret i64 ashr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+ ret i64 ashr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+}
+
+define i64 @lshr_exact_ce() {
+; CHECK: ret i64 lshr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+ ret i64 lshr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+}
+
define i64* @gep_nw_ce() {
; CHECK: ret i64* getelementptr inbounds (i64* @addr, i64 171)
ret i64* getelementptr inbounds (i64* @addr, i64 171)
@@ -214,6 +253,12 @@
ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
}
+define i64 @shl_signed_ce() {
+; CHECK: ret i64 shl nsw (i64 ptrtoint (i64* @addr to i64), i64 17)
+ ret i64 shl nsw (i64 ptrtoint (i64* @addr to i64), i64 17)
+}
+
+
define i64 @add_unsigned_ce() {
; CHECK: ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
From Edmund.Grimley-Evans at arm.com Mon Feb 7 10:59:50 2011
From: Edmund.Grimley-Evans at arm.com (Edmund Grimley-Evans)
Date: Mon, 7 Feb 2011 16:59:50 -0000
Subject: [llvm-commits] [PATCH] s/CORTEX-A8/cortex-a8/ in
ARMAsmPrinter.cpp
References:
Message-ID:
Jason Kim:
> Gosh darn it, at least one version likes upper case in the
> .ARM.attributes. for ELF.o
> I guess we can put in lower case in .s, and upper case in .o?
That seems to be what the CodeSourcery tools do. I tested a more recent version, too.
--
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
From fvbommel at gmail.com Mon Feb 7 11:31:05 2011
From: fvbommel at gmail.com (Frits van Bommel)
Date: Mon, 7 Feb 2011 18:31:05 +0100
Subject: [llvm-commits] [llvm] r125006 - in /llvm/trunk:
docs/LangRef.html include/llvm/Constants.h include/llvm/Operator.h
lib/AsmParser/LLParser.cpp lib/Bitcode/Reader/BitcodeReader.cpp
lib/VMCore/Constants.cpp test/Assembler/2003-05-21-MalformedShiftCr
Message-ID:
On Mon, Feb 7, 2011 at 5:40 PM, Chris Lattner wrote:
> --- llvm/trunk/lib/VMCore/Constants.cpp (original)
> +++ llvm/trunk/lib/VMCore/Constants.cpp Mon Feb ?7 10:40:21 2011
> @@ -7,7 +7,7 @@
> ?//
> ?//===----------------------------------------------------------------------===//
> ?//
> -// This file implements the Constant* classes.
> +// This file implements the Constant *classes.
I don't think this was intentional :).
From bob.wilson at apple.com Mon Feb 7 11:43:03 2011
From: bob.wilson at apple.com (Bob Wilson)
Date: Mon, 07 Feb 2011 17:43:03 -0000
Subject: [llvm-commits] [llvm] r125009 -
/llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
Message-ID: <20110207174303.559592A6C12C@llvm.org>
Author: bwilson
Date: Mon Feb 7 11:43:03 2011
New Revision: 125009
URL: http://llvm.org/viewvc/llvm-project?rev=125009&view=rev
Log:
Move code for OffsetCompare struct closer to where it is used.
Modified:
llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp?rev=125009&r1=125008&r2=125009&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp Mon Feb 7 11:43:03 2011
@@ -1327,17 +1327,6 @@
return NumMerges > 0;
}
-namespace {
- struct OffsetCompare {
- bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
- int LOffset = getMemoryOpOffset(LHS);
- int ROffset = getMemoryOpOffset(RHS);
- assert(LHS == RHS || LOffset != ROffset);
- return LOffset > ROffset;
- }
- };
-}
-
/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
/// directly restore the value of LR into pc.
@@ -1578,6 +1567,17 @@
return true;
}
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
SmallVector &Ops,
unsigned Base, bool isLd,
From bob.wilson at apple.com Mon Feb 7 11:43:06 2011
From: bob.wilson at apple.com (Bob Wilson)
Date: Mon, 07 Feb 2011 17:43:06 -0000
Subject: [llvm-commits] [llvm] r125010 - in /llvm/trunk/lib/Target/ARM:
ARMInstrInfo.td ARMInstrThumb2.td
Message-ID: <20110207174307.054D32A6C12D@llvm.org>
Author: bwilson
Date: Mon Feb 7 11:43:06 2011
New Revision: 125010
URL: http://llvm.org/viewvc/llvm-project?rev=125010&view=rev
Log:
Remove inaccurate comments: so_imm and t2_so_imm operands are not encoded
until the instructions are emitted or printed.
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=125010&r1=125009&r2=125010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Feb 7 11:43:06 2011
@@ -407,10 +407,7 @@
}
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
-// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
-// represented in the imm field in the same 12-bit form that they are encoded
-// into so_imm instructions: the 8-bit immediate is the least significant bits
-// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+// 8-bit immediate rotated by an arbitrary number of bits.
def so_imm : Operand, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
let EncoderMethod = "getSOImmOpValue";
let PrintMethod = "printSOImmOperand";
Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=125010&r1=125009&r2=125010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Mon Feb 7 11:43:06 2011
@@ -43,10 +43,7 @@
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
-// immediate splatted into multiple bytes of the word. t2_so_imm values are
-// represented in the imm field in the same 12-bit form that they are encoded
-// into t2_so_imm instructions: the 8-bit immediate is the least significant
-// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
+// immediate splatted into multiple bytes of the word.
def t2_so_imm : Operand, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> {
let EncoderMethod = "getT2SOImmOpValue";
}
From bob.wilson at apple.com Mon Feb 7 11:43:09 2011
From: bob.wilson at apple.com (Bob Wilson)
Date: Mon, 07 Feb 2011 17:43:09 -0000
Subject: [llvm-commits] [llvm] r125011 -
/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
Message-ID: <20110207174309.D1E7D2A6C12E@llvm.org>
Author: bwilson
Date: Mon Feb 7 11:43:09 2011
New Revision: 125011
URL: http://llvm.org/viewvc/llvm-project?rev=125011&view=rev
Log:
Fix a comment: addrmode6 no longer includes the optional writeback flag.
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=125011&r1=125010&r2=125011&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Feb 7 11:43:09 2011
@@ -552,7 +552,7 @@
let EncoderMethod = "getAddrMode5OpValue";
}
-// addrmode6 := reg with optional writeback
+// addrmode6 := reg with optional alignment
//
def addrmode6 : Operand,
ComplexPattern{
From bob.wilson at apple.com Mon Feb 7 11:43:12 2011
From: bob.wilson at apple.com (Bob Wilson)
Date: Mon, 07 Feb 2011 17:43:12 -0000
Subject: [llvm-commits] [llvm] r125012 -
/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Message-ID: <20110207174312.DBCDF2A6C12C@llvm.org>
Author: bwilson
Date: Mon Feb 7 11:43:12 2011
New Revision: 125012
URL: http://llvm.org/viewvc/llvm-project?rev=125012&view=rev
Log:
Fix some NEON instruction itineraries.
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=125012&r1=125011&r2=125012&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Feb 7 11:43:12 2011
@@ -284,7 +284,7 @@
class VLD1D4WB op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4, "vld1", Dt,
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
"\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
[]> {
let Inst{5-4} = Rn{5-4};
@@ -451,7 +451,7 @@
class VLD4DWB op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4,
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
@@ -461,9 +461,9 @@
def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
-def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
-def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
@@ -473,14 +473,18 @@
def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
-def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q8oddPseudo : VLDQQQQPseudo;
+def VLD4q16oddPseudo : VLDQQQQPseudo;
+def VLD4q32oddPseudo : VLDQQQQPseudo;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
@@ -764,7 +768,7 @@
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
- IIC_VLD4ln, "vld4", Dt,
+ IIC_VLD4lnu, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
[]> {
From bob.wilson at apple.com Mon Feb 7 11:43:16 2011
From: bob.wilson at apple.com (Bob Wilson)
Date: Mon, 07 Feb 2011 17:43:16 -0000
Subject: [llvm-commits] [llvm] r125013 - in /llvm/trunk/lib/Target/ARM:
ARMExpandPseudoInsts.cpp ARMISelDAGToDAG.cpp ARMInstrNEON.td
Message-ID: <20110207174316.29C9E2A6C12D@llvm.org>
Author: bwilson
Date: Mon Feb 7 11:43:15 2011
New Revision: 125013
URL: http://llvm.org/viewvc/llvm-project?rev=125013&view=rev
Log:
Change VLD3/4 and VST3/4 for quad registers to not update the address register.
These operations are expanded to pairs of loads or stores, and the first one
uses the address register update to produce the address for the second one.
So far, the second load/store has also updated the address register, just
for convenience, since that output has never been used. In anticipation of
actually supporting post-increment updates for these operations, this changes
the non-updating operations to use a non-updating load/store for the second
instruction.
Modified:
llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=125013&r1=125012&r2=125013&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Mon Feb 7 11:43:15 2011
@@ -203,10 +203,13 @@
{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 },
{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 },
{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 },
{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4},
@@ -235,10 +238,13 @@
{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 },
{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 },
{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 },
{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
@@ -306,10 +312,13 @@
{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 },
{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 },
{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 },
{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
@@ -331,11 +340,14 @@
{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 },
{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 },
{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
-{ ARM::VST4q8oddPseudo_UPD , ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
};
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
@@ -1036,6 +1048,9 @@
case ARM::VLD3q8Pseudo_UPD:
case ARM::VLD3q16Pseudo_UPD:
case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
case ARM::VLD3q8oddPseudo_UPD:
case ARM::VLD3q16oddPseudo_UPD:
case ARM::VLD3q32oddPseudo_UPD:
@@ -1050,6 +1065,9 @@
case ARM::VLD4q8Pseudo_UPD:
case ARM::VLD4q16Pseudo_UPD:
case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
@@ -1111,6 +1129,9 @@
case ARM::VST3q8Pseudo_UPD:
case ARM::VST3q16Pseudo_UPD:
case ARM::VST3q32Pseudo_UPD:
+ case ARM::VST3q8oddPseudo:
+ case ARM::VST3q16oddPseudo:
+ case ARM::VST3q32oddPseudo:
case ARM::VST3q8oddPseudo_UPD:
case ARM::VST3q16oddPseudo_UPD:
case ARM::VST3q32oddPseudo_UPD:
@@ -1125,6 +1146,9 @@
case ARM::VST4q8Pseudo_UPD:
case ARM::VST4q16Pseudo_UPD:
case ARM::VST4q32Pseudo_UPD:
+ case ARM::VST4q8oddPseudo:
+ case ARM::VST4q16oddPseudo:
+ case ARM::VST4q32oddPseudo:
case ARM::VST4q8oddPseudo_UPD:
case ARM::VST4q16oddPseudo_UPD:
case ARM::VST4q32oddPseudo_UPD:
Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=125013&r1=125012&r2=125013&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon Feb 7 11:43:15 2011
@@ -1487,9 +1487,9 @@
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue SuperReg;
if (is64BitVector) {
- unsigned Opc = DOpcodes[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ SDNode *VLd = CurDAG->getMachineNode(DOpcodes[OpcodeIndex], dl,
+ ResTy, MVT::Other, Ops, 5);
if (NumVecs == 1)
return VLd;
@@ -1507,9 +1507,9 @@
if (NumVecs <= 2) {
// Quad registers are directly supported for VLD1 and VLD2,
// loading pairs of D regs.
- unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ SDNode *VLd = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, MVT::Other, Ops, 5);
if (NumVecs == 1)
return VLd;
@@ -1522,22 +1522,20 @@
EVT AddrTy = MemAddr.getValueType();
// Load the even subregs.
- unsigned Opc = QOpcodes0[OpcodeIndex];
SDValue ImplDef =
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
- SDNode *VLdA =
- CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, AddrTy, MVT::Other, OpsA, 7);
Chain = SDValue(VLdA, 2);
// Load the odd subregs.
- Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
+ const SDValue OpsB[] = { SDValue(VLdA, 1), Align, SDValue(VLdA, 0),
Pred, Reg0, Chain };
- SDNode *VLdB =
- CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
+ SDNode *VLdB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl,
+ ResTy, MVT::Other, OpsB, 6);
SuperReg = SDValue(VLdB, 0);
- Chain = SDValue(VLdB, 2);
+ Chain = SDValue(VLdB, 1);
}
// Extract out the Q registers.
@@ -1588,54 +1586,46 @@
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector Ops;
- Ops.push_back(MemAddr);
- Ops.push_back(Align);
-
if (is64BitVector) {
+ SDValue SrcReg;
if (NumVecs == 1) {
- Ops.push_back(N->getOperand(3));
+ SrcReg = N->getOperand(3);
} else {
- SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
// Form a REG_SEQUENCE to force register allocation.
if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
else {
SDValue V2 = N->getOperand(2+3);
- // If it's a vld3, form a quad D-register and leave the last part as
+ // If it's a vst3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
- Ops.push_back(RegSeq);
}
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
+ const SDValue Ops[] = { MemAddr, Align, SrcReg, Pred, Reg0, Chain };
+ return CurDAG->getMachineNode(DOpcodes[OpcodeIndex], dl,
+ MVT::Other, Ops, 6);
}
if (NumVecs <= 2) {
// Quad registers are directly supported for VST1 and VST2.
- unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDValue SrcReg;
if (NumVecs == 1) {
- Ops.push_back(N->getOperand(3));
+ SrcReg = N->getOperand(3);
} else {
// Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
- Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
+ SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
}
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
+ const SDValue Ops[] = { MemAddr, Align, SrcReg, Pred, Reg0, Chain };
+ return CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MVT::Other, Ops, 6);
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -1651,23 +1641,17 @@
SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
- Ops.push_back(Reg0); // post-access address offset
- Ops.push_back(RegSeq);
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), 7);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MemAddr.getValueType(),
+ MVT::Other, OpsA, 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- Ops[6] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), 7);
- Chain = SDValue(VStB, 1);
+ const SDValue OpsB[] = { SDValue(VStA, 0), Align, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl,
+ MVT::Other, OpsB, 6);
+ Chain = SDValue(VStB, 0);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
}
@@ -2530,9 +2514,9 @@
unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
- ARM::VLD3q16oddPseudo_UPD,
- ARM::VLD3q32oddPseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -2542,9 +2526,9 @@
unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
- ARM::VLD4q16oddPseudo_UPD,
- ARM::VLD4q32oddPseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -2591,9 +2575,9 @@
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
- ARM::VST3q16oddPseudo_UPD,
- ARM::VST3q32oddPseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -2603,9 +2587,9 @@
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
- ARM::VST4q16oddPseudo_UPD,
- ARM::VST4q32oddPseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=125013&r1=125012&r2=125013&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Feb 7 11:43:15 2011
@@ -176,6 +176,8 @@
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQQQQPseudo
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">;
class VLDQQQQWBPseudo
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
@@ -412,7 +414,7 @@
def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
-// ...with double-spaced registers (non-updating versions for disassembly only):
+// ...with double-spaced registers:
def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
@@ -425,6 +427,10 @@
def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo : VLDQQQQPseudo;
+def VLD3q16oddPseudo : VLDQQQQPseudo;
+def VLD3q32oddPseudo : VLDQQQQPseudo;
+
def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo;
def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
@@ -465,7 +471,7 @@
def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
-// ...with double-spaced registers (non-updating versions for disassembly only):
+// ...with double-spaced registers:
def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
@@ -1017,6 +1023,8 @@
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
+class VSTQQQQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
class VSTQQQQWBPseudo
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
@@ -1252,7 +1260,7 @@
def VST3d16Pseudo_UPD : VSTQQWBPseudo;
def VST3d32Pseudo_UPD : VSTQQWBPseudo;
-// ...with double-spaced registers (non-updating versions for disassembly only):
+// ...with double-spaced registers:
def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
@@ -1265,6 +1273,10 @@
def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo : VSTQQQQPseudo;
+def VST3q16oddPseudo : VSTQQQQPseudo;
+def VST3q32oddPseudo : VSTQQQQPseudo;
+
def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
@@ -1305,7 +1317,7 @@
def VST4d16Pseudo_UPD : VSTQQWBPseudo;
def VST4d32Pseudo_UPD : VSTQQWBPseudo;
-// ...with double-spaced registers (non-updating versions for disassembly only):
+// ...with double-spaced registers:
def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
@@ -1318,6 +1330,10 @@
def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo : VSTQQQQPseudo;
+def VST4q16oddPseudo : VSTQQQQPseudo;
+def VST4q32oddPseudo : VSTQQQQPseudo;
+
def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
From bob.wilson at apple.com Mon Feb 7 11:43:21 2011
From: bob.wilson at apple.com (Bob Wilson)
Date: Mon, 07 Feb 2011 17:43:21 -0000
Subject: [llvm-commits] [llvm] r125014 - in /llvm/trunk:
lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMISelLowering.cpp
lib/Target/ARM/ARMISelLowering.h test/CodeGen/ARM/vld1.ll
test/CodeGen/ARM/vld2.ll test/CodeGen/ARM/vld3.ll test/CodeGen/ARM/vld4.ll
test/CodeGen/ARM/vlddup.ll test/CodeGen/ARM/vldlane.ll
test/CodeGen/ARM/vst1.ll test/CodeGen/ARM/vst2.ll test/CodeGen/ARM/vst3.ll
test/CodeGen/ARM/vst4.ll test/CodeGen/ARM/vstlane.ll
Message-ID: <20110207174322.1BE182A6C12C@llvm.org>
Author: bwilson
Date: Mon Feb 7 11:43:21 2011
New Revision: 125014
URL: http://llvm.org/viewvc/llvm-project?rev=125014&view=rev
Log:
Add codegen support for using post-increment NEON load/store instructions.
The vld1-lane, vld1-dup and vst1-lane instructions do not yet support using
post-increment versions, but all the rest of the NEON load/store instructions
should be handled now.
Modified:
llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/test/CodeGen/ARM/vld1.ll
llvm/trunk/test/CodeGen/ARM/vld2.ll
llvm/trunk/test/CodeGen/ARM/vld3.ll
llvm/trunk/test/CodeGen/ARM/vld4.ll
llvm/trunk/test/CodeGen/ARM/vlddup.ll
llvm/trunk/test/CodeGen/ARM/vldlane.ll
llvm/trunk/test/CodeGen/ARM/vst1.ll
llvm/trunk/test/CodeGen/ARM/vst2.ll
llvm/trunk/test/CodeGen/ARM/vst3.ll
llvm/trunk/test/CodeGen/ARM/vst4.ll
llvm/trunk/test/CodeGen/ARM/vstlane.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon Feb 7 11:43:21 2011
@@ -196,26 +196,30 @@
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
- SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
- SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// load/store of D registers and Q registers.
- SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 2, 3 or 4. The opcode array specifies the instructions used
/// for loading D registers. (Q registers are not supported.)
- SDNode *SelectVLDDup(SDNode *N, unsigned NumVecs, unsigned *Opcodes);
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *Opcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@@ -1439,14 +1443,15 @@
return CurDAG->getTargetConstant(Alignment, MVT::i32);
}
-SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1482,46 +1487,39 @@
ResTyElts *= 2;
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
}
+ std::vector ResTys;
+ ResTys.push_back(ResTy);
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SDValue SuperReg;
- if (is64BitVector) {
- const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- SDNode *VLd = CurDAG->getMachineNode(DOpcodes[OpcodeIndex], dl,
- ResTy, MVT::Other, Ops, 5);
- if (NumVecs == 1)
- return VLd;
-
- SuperReg = SDValue(VLd, 0);
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
- dl, VT, SuperReg);
- ReplaceUses(SDValue(N, Vec), D);
- }
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
- return NULL;
- }
-
- if (NumVecs <= 2) {
- // Quad registers are directly supported for VLD1 and VLD2,
- // loading pairs of D regs.
- const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- SDNode *VLd = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
- ResTy, MVT::Other, Ops, 5);
- if (NumVecs == 1)
- return VLd;
+ SDNode *VLd;
+ SmallVector Ops;
- SuperReg = SDValue(VLd, 0);
- Chain = SDValue(VLd, 1);
+ // Double registers and VLD1/VLD2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
EVT AddrTy = MemAddr.getValueType();
- // Load the even subregs.
+ // Load the even subregs. This is always an updating load, so that it
+ // provides the address to the second load for the odd subregs.
SDValue ImplDef =
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
@@ -1530,37 +1528,54 @@
Chain = SDValue(VLdA, 2);
// Load the odd subregs.
- const SDValue OpsB[] = { SDValue(VLdA, 1), Align, SDValue(VLdA, 0),
- Pred, Reg0, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl,
- ResTy, MVT::Other, OpsB, 6);
- SuperReg = SDValue(VLdB, 0);
- Chain = SDValue(VLdB, 1);
- }
-
- // Extract out the Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, SuperReg);
- ReplaceUses(SDValue(N, Vec), Q);
+ Ops.push_back(SDValue(VLdA, 1));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa(Inc.getNode()) &&
+ "only constant post-increment update allowed for VLD3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SDValue(VLdA, 0));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
}
- ReplaceUses(SDValue(N, NumVecs), Chain);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // Extract out the subregisters.
+ SDValue SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
return NULL;
}
-SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
- EVT VT = N->getOperand(3).getValueType();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
@@ -1583,64 +1598,71 @@
break;
}
+ std::vector ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SmallVector Ops;
- if (is64BitVector) {
+ // Double registers and VST1/VST2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
SDValue SrcReg;
if (NumVecs == 1) {
- SrcReg = N->getOperand(3);
- } else {
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
-
+ SrcReg = N->getOperand(Vec0Idx);
+ } else if (is64BitVector) {
// Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2)
SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
else {
- SDValue V2 = N->getOperand(2+3);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
// If it's a vst3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
+ : N->getOperand(Vec0Idx + 3);
SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
- }
- const SDValue Ops[] = { MemAddr, Align, SrcReg, Pred, Reg0, Chain };
- return CurDAG->getMachineNode(DOpcodes[OpcodeIndex], dl,
- MVT::Other, Ops, 6);
- }
-
- if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2.
- SDValue SrcReg;
- if (NumVecs == 1) {
- SrcReg = N->getOperand(3);
} else {
// Form a QQ register.
- SDValue Q0 = N->getOperand(3);
- SDValue Q1 = N->getOperand(4);
+ SDValue Q0 = N->getOperand(Vec0Idx);
+ SDValue Q1 = N->getOperand(Vec0Idx + 1);
SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
}
- const SDValue Ops[] = { MemAddr, Align, SrcReg, Pred, Reg0, Chain };
- return CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
- MVT::Other, Ops, 6);
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(SrcReg);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- SDValue V2 = N->getOperand(2+3);
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
- : N->getOperand(3+3);
+ : N->getOperand(Vec0Idx + 3);
SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- // Store the even D registers.
+ // Store the even D registers. This is always an updating store, so that it
+ // provides the address to the second store for the odd subregs.
const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
@@ -1648,28 +1670,40 @@
Chain = SDValue(VStA, 1);
// Store the odd D registers.
- const SDValue OpsB[] = { SDValue(VStA, 0), Align, RegSeq, Pred, Reg0, Chain };
- SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl,
- MVT::Other, OpsB, 6);
- Chain = SDValue(VStB, 0);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
+ Ops.push_back(SDValue(VStA, 0));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa(Inc.getNode()) &&
+ "only constant post-increment update allowed for VST3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(RegSeq);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
- unsigned NumVecs, unsigned *DOpcodes,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
unsigned *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane =
- cast(N->getOperand(NumVecs+3))->getZExtValue();
- EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
+ cast(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
unsigned Alignment = 0;
@@ -1701,29 +1735,42 @@
case MVT::v4i32: OpcodeIndex = 1; break;
}
+ std::vector ResTys;
+ if (IsLoad) {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+ MVT::i64, ResTyElts));
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector Ops;
+ SmallVector Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
-
- unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
- QOpcodes[OpcodeIndex]);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc);
+ }
SDValue SuperReg;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2) {
if (is64BitVector)
SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
else
SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- SDValue V2 = N->getOperand(2+3);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
if (is64BitVector)
SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
else
@@ -1735,33 +1782,29 @@
Ops.push_back(Reg0);
Ops.push_back(Chain);
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+ Ops.data(), Ops.size());
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 7);
-
- EVT ResTy;
- unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
- if (!is64BitVector)
- ResTyElts *= 2;
- ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
-
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other,
- Ops.data(), 7);
- SuperReg = SDValue(VLdLn, 0);
- Chain = SDValue(VLdLn, 1);
+ return VLdLn;
// Extract the subregisters.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ SuperReg = SDValue(VLdLn, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
return NULL;
}
-SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, unsigned NumVecs,
- unsigned *Opcodes) {
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs, unsigned *Opcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1800,13 +1843,26 @@
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue SuperReg;
unsigned Opc = Opcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
+ SmallVector Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
- EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
- SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ std::vector ResTys;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLdDup =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
SuperReg = SDValue(VLdDup, 0);
- Chain = SDValue(VLdDup, 1);
// Extract the subregisters.
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
@@ -1814,7 +1870,9 @@
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
return NULL;
}
@@ -2470,19 +2528,165 @@
case ARMISD::VLD2DUP: {
unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
ARM::VLD2DUPd32Pseudo };
- return SelectVLDDup(N, 2, Opcodes);
+ return SelectVLDDup(N, false, 2, Opcodes);
}
case ARMISD::VLD3DUP: {
unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
ARM::VLD3DUPd32Pseudo };
- return SelectVLDDup(N, 3, Opcodes);
+ return SelectVLDDup(N, false, 3, Opcodes);
}
case ARMISD::VLD4DUP: {
unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
ARM::VLD4DUPd32Pseudo };
- return SelectVLDDup(N, 4, Opcodes);
+ return SelectVLDDup(N, false, 4, Opcodes);
+ }
+
+ case ARMISD::VLD2DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+ ARM::VLD2DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 4, Opcodes);
+ }
+
+ case ARMISD::VLD1_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+ ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD2_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+ ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+ ARM::VLD2q32Pseudo_UPD };
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD3_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
+ return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD4_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
+ return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST1_UPD: {
+ unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+ ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+ ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST2_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+ ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+ ARM::VST2q32Pseudo_UPD };
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST3_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
+ return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST4_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
+ return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
}
case ISD::INTRINSIC_VOID:
@@ -2497,7 +2701,7 @@
ARM::VLD1d32, ARM::VLD1d64 };
unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
- return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
@@ -2505,7 +2709,7 @@
ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
- return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
@@ -2517,7 +2721,7 @@
unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
ARM::VLD3q16oddPseudo,
ARM::VLD3q32oddPseudo };
- return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
@@ -2529,28 +2733,28 @@
unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
ARM::VLD4q16oddPseudo,
ARM::VLD4q32oddPseudo };
- return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
ARM::VLD2LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
- return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes);
+ return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
ARM::VLD3LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
- return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes);
+ return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
ARM::VLD4LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
- return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes);
+ return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
@@ -2558,7 +2762,7 @@
ARM::VST1d32, ARM::VST1d64 };
unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
- return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
@@ -2566,7 +2770,7 @@
ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
- return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
@@ -2578,7 +2782,7 @@
unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
ARM::VST3q16oddPseudo,
ARM::VST3q32oddPseudo };
- return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
@@ -2590,28 +2794,28 @@
unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
ARM::VST4q16oddPseudo,
ARM::VST4q32oddPseudo };
- return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
ARM::VST2LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
- return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes);
+ return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
ARM::VST3LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
- return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes);
+ return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
ARM::VST4LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
- return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes);
+ return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
}
}
break;
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Feb 7 11:43:21 2011
@@ -457,6 +457,8 @@
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
@@ -857,6 +859,23 @@
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
+ case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
+ case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
+ case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
+ case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
+ case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
+ case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
+ case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
+ case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
+ case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
+ case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
+ case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
+ case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
+ case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
+ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
+ case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
+ case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
}
}
@@ -5210,6 +5229,138 @@
DAG.getUNDEF(VT), NewMask.data());
}
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: assert(0 && "unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: assert(0 && "unexpected opcode for Neon base update");
+ case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+ case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+ case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) {
+ uint64_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ } else if (NumBytes >= 3 * 16) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ continue;
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SmallVector Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
@@ -5720,6 +5871,31 @@
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::VLD2DUP:
+ case ARMISD::VLD3DUP:
+ case ARMISD::VLD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default: break;
+ }
+ break;
}
return SDValue();
}
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Mon Feb 7 11:43:21 2011
@@ -181,7 +181,28 @@
// Vector load N-element structure to all lanes:
VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
VLD3DUP,
- VLD4DUP
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST2_UPD,
+ VST3_UPD,
+ VST4_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD
};
}
Modified: llvm/trunk/test/CodeGen/ARM/vld1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld1.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vld1.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vld1.ll Mon Feb 7 11:43:21 2011
@@ -16,6 +16,18 @@
ret <4 x i16> %tmp1
}
+;Check for a post-increment updating load.
+define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
+;CHECK: vld1i16_update:
+;CHECK: vld1.16 {d16}, [r1]!
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+ %tmp2 = getelementptr i16* %A, i32 4
+ store i16* %tmp2, i16** %ptr
+ ret <4 x i16> %tmp1
+}
+
define <2 x i32> @vld1i32(i32* %A) nounwind {
;CHECK: vld1i32:
;CHECK: vld1.32
@@ -24,6 +36,18 @@
ret <2 x i32> %tmp1
}
+;Check for a post-increment updating load with register increment.
+define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
+;CHECK: vld1i32_update:
+;CHECK: vld1.32 {d16}, [r2], r1
+ %A = load i32** %ptr
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+ %tmp2 = getelementptr i32* %A, i32 %inc
+ store i32* %tmp2, i32** %ptr
+ ret <2 x i32> %tmp1
+}
+
define <2 x float> @vld1f(float* %A) nounwind {
;CHECK: vld1f:
;CHECK: vld1.32
@@ -48,6 +72,17 @@
ret <16 x i8> %tmp1
}
+;Check for a post-increment updating load.
+define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
+;CHECK: vld1Qi8_update:
+;CHECK: vld1.8 {d16, d17}, [r1, :64]!
+ %A = load i8** %ptr
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+ %tmp2 = getelementptr i8* %A, i32 16
+ store i8* %tmp2, i8** %ptr
+ ret <16 x i8> %tmp1
+}
+
define <8 x i16> @vld1Qi16(i16* %A) nounwind {
;CHECK: vld1Qi16:
;Check the alignment value. Max for this instruction is 128 bits:
Modified: llvm/trunk/test/CodeGen/ARM/vld2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld2.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vld2.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vld2.ll Mon Feb 7 11:43:21 2011
@@ -56,6 +56,21 @@
ret <2 x float> %tmp4
}
+;Check for a post-increment updating load.
+define <2 x float> @vld2f_update(float** %ptr) nounwind {
+;CHECK: vld2f_update:
+;CHECK: vld2.32 {d16, d17}, [r1]!
+ %A = load float** %ptr
+ %tmp0 = bitcast float* %A to i8*
+ %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+ %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+ %tmp4 = fadd <2 x float> %tmp2, %tmp3
+ %tmp5 = getelementptr float* %A, i32 4
+ store float* %tmp5, float** %ptr
+ ret <2 x float> %tmp4
+}
+
define <1 x i64> @vld2i64(i64* %A) nounwind {
;CHECK: vld2i64:
;Check the alignment value. Max for this instruction is 128 bits:
@@ -79,6 +94,20 @@
ret <16 x i8> %tmp4
}
+;Check for a post-increment updating load with register increment.
+define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld2Qi8_update:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+ %A = load i8** %ptr
+ %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
+ %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+ %tmp5 = getelementptr i8* %A, i32 %inc
+ store i8* %tmp5, i8** %ptr
+ ret <16 x i8> %tmp4
+}
+
define <8 x i16> @vld2Qi16(i16* %A) nounwind {
;CHECK: vld2Qi16:
;Check the alignment value. Max for this instruction is 256 bits:
Modified: llvm/trunk/test/CodeGen/ARM/vld3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld3.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vld3.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vld3.ll Mon Feb 7 11:43:21 2011
@@ -33,6 +33,21 @@
ret <4 x i16> %tmp4
}
+;Check for a post-increment updating load with register increment.
+define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
+;CHECK: vld3i16_update:
+;CHECK: vld3.16 {d16, d17, d18}, [r2], r1
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+ %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+ %tmp4 = add <4 x i16> %tmp2, %tmp3
+ %tmp5 = getelementptr i16* %A, i32 %inc
+ store i16* %tmp5, i16** %ptr
+ ret <4 x i16> %tmp4
+}
+
define <2 x i32> @vld3i32(i32* %A) nounwind {
;CHECK: vld3i32:
;CHECK: vld3.32
@@ -103,6 +118,22 @@
ret <4 x i32> %tmp4
}
+;Check for a post-increment updating load.
+define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
+;CHECK: vld3Qi32_update:
+;CHECK: vld3.32 {d16, d18, d20}, [r1]!
+;CHECK: vld3.32 {d17, d19, d21}, [r1]!
+ %A = load i32** %ptr
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+ %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+ %tmp4 = add <4 x i32> %tmp2, %tmp3
+ %tmp5 = getelementptr i32* %A, i32 12
+ store i32* %tmp5, i32** %ptr
+ ret <4 x i32> %tmp4
+}
+
define <4 x float> @vld3Qf(float* %A) nounwind {
;CHECK: vld3Qf:
;CHECK: vld3.32
Modified: llvm/trunk/test/CodeGen/ARM/vld4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vld4.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vld4.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vld4.ll Mon Feb 7 11:43:21 2011
@@ -22,6 +22,20 @@
ret <8 x i8> %tmp4
}
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld4i8_update:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+ %A = load i8** %ptr
+ %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+ %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+ %tmp5 = getelementptr i8* %A, i32 %inc
+ store i8* %tmp5, i8** %ptr
+ ret <8 x i8> %tmp4
+}
+
define <4 x i16> @vld4i16(i16* %A) nounwind {
;CHECK: vld4i16:
;Check the alignment value. Max for this instruction is 256 bits:
@@ -94,6 +108,22 @@
ret <8 x i16> %tmp4
}
+;Check for a post-increment updating load.
+define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
+;CHECK: vld4Qi16_update:
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+ %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+ %tmp4 = add <8 x i16> %tmp2, %tmp3
+ %tmp5 = getelementptr i16* %A, i32 32
+ store i16* %tmp5, i16** %ptr
+ ret <8 x i16> %tmp4
+}
+
define <4 x i32> @vld4Qi32(i32* %A) nounwind {
;CHECK: vld4Qi32:
;CHECK: vld4.32
Modified: llvm/trunk/test/CodeGen/ARM/vlddup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vlddup.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vlddup.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vlddup.ll Mon Feb 7 11:43:21 2011
@@ -89,6 +89,22 @@
ret <4 x i16> %tmp5
}
+;Check for a post-increment updating load.
+define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld2dupi16_update:
+;CHECK: vld2.16 {d16[], d17[]}, [r1]!
+ %A = load i16** %ptr
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+ %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp5 = add <4 x i16> %tmp2, %tmp4
+ %tmp6 = getelementptr i16* %A, i32 2
+ store i16* %tmp6, i16** %ptr
+ ret <4 x i16> %tmp5
+}
+
define <2 x i32> @vld2dupi32(i32* %A) nounwind {
;CHECK: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
@@ -106,8 +122,28 @@
declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld3dupi8_update:
+;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
+ %A = load i8** %ptr
+ %tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
+ %tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1
+ %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+ %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 2
+ %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <8 x i32> zeroinitializer
+ %tmp7 = add <8 x i8> %tmp2, %tmp4
+ %tmp8 = add <8 x i8> %tmp7, %tmp6
+ %tmp9 = getelementptr i8* %A, i32 %inc
+ store i8* %tmp9, i8** %ptr
+ ret <8 x i8> %tmp8
+}
+
define <4 x i16> @vld3dupi16(i16* %A) nounwind {
;CHECK: vld3dupi16:
;Check the (default) alignment value. VLD3 does not support alignment.
@@ -124,10 +160,34 @@
ret <4 x i16> %tmp8
}
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+;Check for a post-increment updating load.
+define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld4dupi16_update:
+;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
+ %A = load i16** %ptr
+ %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+ %tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
+ %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2
+ %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3
+ %tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp9 = add <4 x i16> %tmp2, %tmp4
+ %tmp10 = add <4 x i16> %tmp6, %tmp8
+ %tmp11 = add <4 x i16> %tmp9, %tmp10
+ %tmp12 = getelementptr i16* %A, i32 4
+ store i16* %tmp12, i16** %ptr
+ ret <4 x i16> %tmp11
+}
+
define <2 x i32> @vld4dupi32(i32* %A) nounwind {
;CHECK: vld4dupi32:
;Check the alignment value. An 8-byte alignment is allowed here even though
@@ -148,4 +208,5 @@
ret <2 x i32> %tmp11
}
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
Modified: llvm/trunk/test/CodeGen/ARM/vldlane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vldlane.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vldlane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vldlane.ll Mon Feb 7 11:43:21 2011
@@ -121,6 +121,22 @@
ret <2 x i32> %tmp5
}
+;Check for a post-increment updating load.
+define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32_update:
+;CHECK: vld2.32 {d16[1], d17[1]}, [r1]!
+ %A = load i32** %ptr
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ %tmp6 = getelementptr i32* %A, i32 2
+ store i32* %tmp6, i32** %ptr
+ ret <2 x i32> %tmp5
+}
+
define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld2lanef:
;CHECK: vld2.32
@@ -260,6 +276,24 @@
ret <8 x i16> %tmp7
}
+;Check for a post-increment updating load with register increment.
+define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vld3laneQi16_update:
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+ %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+ %tmp6 = add <8 x i16> %tmp3, %tmp4
+ %tmp7 = add <8 x i16> %tmp5, %tmp6
+ %tmp8 = getelementptr i16* %A, i32 %inc
+ store i16* %tmp8, i16** %ptr
+ ret <8 x i16> %tmp7
+}
+
define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vld3laneQi32:
;CHECK: vld3.32
@@ -322,6 +356,25 @@
ret <8 x i8> %tmp9
}
+;Check for a post-increment updating load.
+define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8_update:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+ %A = load i8** %ptr
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+ %tmp7 = add <8 x i8> %tmp3, %tmp4
+ %tmp8 = add <8 x i8> %tmp5, %tmp6
+ %tmp9 = add <8 x i8> %tmp7, %tmp8
+ %tmp10 = getelementptr i8* %A, i32 4
+ store i8* %tmp10, i8** %ptr
+ ret <8 x i8> %tmp9
+}
+
define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vld4lanei16:
;Check that a power-of-two alignment smaller than the total size of the memory
Modified: llvm/trunk/test/CodeGen/ARM/vst1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst1.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vst1.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vst1.ll Mon Feb 7 11:43:21 2011
@@ -36,6 +36,19 @@
ret void
}
+;Check for a post-increment updating store.
+define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
+;CHECK: vst1f_update:
+;CHECK: vst1.32 {d16}, [r1]!
+ %A = load float** %ptr
+ %tmp0 = bitcast float* %A to i8*
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+ %tmp2 = getelementptr float* %A, i32 2
+ store float* %tmp2, float** %ptr
+ ret void
+}
+
define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1i64:
;CHECK: vst1.64
@@ -64,6 +77,19 @@
ret void
}
+;Check for a post-increment updating store with register increment.
+define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst1Qi16_update:
+;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
+ %tmp2 = getelementptr i16* %A, i32 %inc
+ store i16* %tmp2, i16** %ptr
+ ret void
+}
+
define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst1Qi32:
;CHECK: vst1.32
Modified: llvm/trunk/test/CodeGen/ARM/vst2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst2.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vst2.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vst2.ll Mon Feb 7 11:43:21 2011
@@ -9,6 +9,18 @@
ret void
}
+;Check for a post-increment updating store with register increment.
+define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst2i8_update:
+;CHECK: vst2.8 {d16, d17}, [r1], r2
+ %A = load i8** %ptr
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+ %tmp2 = getelementptr i8* %A, i32 %inc
+ store i8* %tmp2, i8** %ptr
+ ret void
+}
+
define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst2i16:
;Check the alignment value. Max for this instruction is 128 bits:
@@ -47,6 +59,19 @@
ret void
}
+;Check for a post-increment updating store.
+define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64_update:
+;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+ %A = load i64** %ptr
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = load <1 x i64>* %B
+ call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
+ %tmp2 = getelementptr i64* %A, i32 2
+ store i64* %tmp2, i64** %ptr
+ ret void
+}
+
define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst2Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
Modified: llvm/trunk/test/CodeGen/ARM/vst3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst3.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vst3.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vst3.ll Mon Feb 7 11:43:21 2011
@@ -28,6 +28,19 @@
ret void
}
+;Check for a post-increment updating store.
+define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32_update:
+;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+ %A = load i32** %ptr
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ %tmp2 = getelementptr i32* %A, i32 6
+ store i32* %tmp2, i32** %ptr
+ ret void
+}
+
define void @vst3f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst3f:
;CHECK: vst3.32
@@ -69,6 +82,20 @@
ret void
}
+;Check for a post-increment updating store.
+define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16_update:
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ %tmp2 = getelementptr i16* %A, i32 24
+ store i16* %tmp2, i16** %ptr
+ ret void
+}
+
define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst3Qi32:
;CHECK: vst3.32
Modified: llvm/trunk/test/CodeGen/ARM/vst4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vst4.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vst4.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vst4.ll Mon Feb 7 11:43:21 2011
@@ -9,6 +9,18 @@
ret void
}
+;Check for a post-increment updating store with register increment.
+define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst4i8_update:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+ %A = load i8** %ptr
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
+ %tmp2 = getelementptr i8* %A, i32 %inc
+ store i8* %tmp2, i8** %ptr
+ ret void
+}
+
define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst4i16:
;Check the alignment value. Max for this instruction is 256 bits:
@@ -89,6 +101,20 @@
ret void
}
+;Check for a post-increment updating store.
+define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf_update:
+;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
+;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
+ %A = load float** %ptr
+ %tmp0 = bitcast float* %A to i8*
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ %tmp2 = getelementptr float* %A, i32 16
+ store float* %tmp2, float** %ptr
+ ret void
+}
+
declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
Modified: llvm/trunk/test/CodeGen/ARM/vstlane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vstlane.ll?rev=125014&r1=125013&r2=125014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vstlane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vstlane.ll Mon Feb 7 11:43:21 2011
@@ -94,6 +94,19 @@
ret void
}
+;Check for a post-increment updating store with register increment.
+define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst2lanei16_update:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+ %tmp2 = getelementptr i16* %A, i32 %inc
+ store i16* %tmp2, i16** %ptr
+ ret void
+}
+
define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst2lanei32:
;CHECK: vst2.32
@@ -205,6 +218,19 @@
ret void
}
+;Check for a post-increment updating store.
+define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32_update:
+;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
+ %A = load i32** %ptr
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+ %tmp2 = getelementptr i32* %A, i32 3
+ store i32* %tmp2, i32** %ptr
+ ret void
+}
+
define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst3laneQf:
;CHECK: vst3.32
@@ -233,6 +259,18 @@
ret void
}
+;Check for a post-increment updating store.
+define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8_update:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+ %A = load i8** %ptr
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+ %tmp2 = getelementptr i8* %A, i32 4
+ store i8* %tmp2, i8** %ptr
+ ret void
+}
+
define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst4lanei16:
;CHECK: vst4.16
From dpatel at apple.com Mon Feb 7 12:24:18 2011
From: dpatel at apple.com (Devang Patel)
Date: Mon, 07 Feb 2011 18:24:18 -0000
Subject: [llvm-commits] [llvm] r125019 -
/llvm/trunk/test/CodeGen/X86/dbg-merge-loc-entry.ll
Message-ID: <20110207182418.BD9122A6C12C@llvm.org>
Author: dpatel
Date: Mon Feb 7 12:24:18 2011
New Revision: 125019
URL: http://llvm.org/viewvc/llvm-project?rev=125019&view=rev
Log:
Reduce test case, smaller is better.
Modified:
llvm/trunk/test/CodeGen/X86/dbg-merge-loc-entry.ll
Modified: llvm/trunk/test/CodeGen/X86/dbg-merge-loc-entry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dbg-merge-loc-entry.ll?rev=125019&r1=125018&r2=125019&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dbg-merge-loc-entry.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dbg-merge-loc-entry.ll Mon Feb 7 12:24:18 2011
@@ -2,641 +2,35 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin8"
-;CHECK: Ldebug_loc2:
-;CHECK-NEXT: .quad Ltmp11
-;CHECK-NEXT: .quad Lfunc_end0
-;CHECK-NEXT: .short 1 ## Loc expr size
-;CHECK-NEXT: .byte 85 ## DW_OP_reg5
-;CHECK-NEXT: .quad 0
-;CHECK-NEXT: .quad 0
-
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT: .quad Lfunc_begin0
+;CHECK-NEXT: .quad Lfunc_end0
+;CHECK-NEXT: .short 1 ## Loc expr size
+;CHECK-NEXT: .byte 85 ## DW_OP_reg5
+;CHECK-NEXT: .quad 0
+;CHECK-NEXT: .quad 0
%0 = type { i64, i1 }
- at __clz_tab = external unnamed_addr constant [256 x i8]
+ at __clz_tab = external constant [256 x i8]
define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
entry:
- tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !103), !dbg !111
- tail call void @llvm.dbg.value(metadata !{i128 %v}, i64 0, metadata !104), !dbg !111
- tail call void @llvm.dbg.declare(metadata !{null}, metadata !108), !dbg !112
- tail call void @llvm.dbg.declare(metadata !{null}, metadata !109), !dbg !113
- tail call void @llvm.dbg.value(metadata !114, i64 0, metadata !105), !dbg !115
- %0 = trunc i128 %u to i64
- %sroa.store.elt15 = lshr i128 %u, 64
- %1 = trunc i128 %sroa.store.elt15 to i64
- %2 = trunc i128 %v to i64
- %sroa.store.elt = lshr i128 %v, 64
- %3 = trunc i128 %sroa.store.elt to i64
- %4 = icmp slt i64 %1, 0, !dbg !116
- br i1 %4, label %bb, label %bb1, !dbg !116
-
-bb: ; preds = %entry
- tail call void @llvm.dbg.value(metadata !117, i64 0, metadata !105), !dbg !118
- %5 = sub nsw i128 0, %u, !dbg !118
- %6 = trunc i128 %5 to i64
- %sroa.store.elt18 = lshr i128 %5, 64
- %7 = trunc i128 %sroa.store.elt18 to i64
- br label %bb1, !dbg !118
-
-bb1: ; preds = %bb, %entry
- %uu.0.1.0 = phi i64 [ %7, %bb ], [ %1, %entry ]
- %uu.0.0.0 = phi i64 [ %6, %bb ], [ %0, %entry ]
- %c.0 = phi i64 [ -1, %bb ], [ 0, %entry ]
- %8 = icmp slt i64 %3, 0, !dbg !119
- br i1 %8, label %bb2, label %bb4, !dbg !119
-
-bb2: ; preds = %bb1
- %not3 = xor i64 %c.0, -1, !dbg !120
- tail call void @llvm.dbg.value(metadata !{i64 %not3}, i64 0, metadata !105), !dbg !120
- %9 = sub nsw i128 0, %v, !dbg !120
- %10 = trunc i128 %9 to i64
- %sroa.store.elt11 = lshr i128 %9, 64
- %11 = trunc i128 %sroa.store.elt11 to i64
- br label %bb4, !dbg !120
-
-bb4: ; preds = %bb2, %bb1
- %vv.0.1.0 = phi i64 [ %11, %bb2 ], [ %3, %bb1 ]
- %vv.0.0.0 = phi i64 [ %10, %bb2 ], [ %2, %bb1 ]
- %c.1 = phi i64 [ %not3, %bb2 ], [ %c.0, %bb1 ]
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !14) nounwind, !dbg !121
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !15) nounwind, !dbg !121
- tail call void @llvm.dbg.value(metadata !123, i64 0, metadata !16) nounwind, !dbg !121
- tail call void @llvm.dbg.declare(metadata !{null}, metadata !17) nounwind, !dbg !124
- tail call void @llvm.dbg.declare(metadata !{null}, metadata !30) nounwind, !dbg !125
- tail call void @llvm.dbg.declare(metadata !{null}, metadata !31) nounwind, !dbg !126
- tail call void @llvm.dbg.declare(metadata !{null}, metadata !43) nounwind, !dbg !127
- tail call void @llvm.dbg.value(metadata !{i64 %vv.0.0.0}, i64 0, metadata !32) nounwind, !dbg !128
- tail call void @llvm.dbg.value(metadata !{i64 %vv.0.1.0}, i64 0, metadata !35) nounwind, !dbg !129
- tail call void @llvm.dbg.value(metadata !{i64 %uu.0.0.0}, i64 0, metadata !36) nounwind, !dbg !130
- tail call void @llvm.dbg.value(metadata !{i64 %uu.0.1.0}, i64 0, metadata !37) nounwind, !dbg !131
- %12 = icmp eq i64 %vv.0.1.0, 0, !dbg !132
- br i1 %12, label %bb.i, label %bb73.i, !dbg !132
-
-bb.i: ; preds = %bb4
- %13 = icmp ugt i64 %vv.0.0.0, %uu.0.1.0, !dbg !133
- br i1 %13, label %bb4.i, label %bb21.i, !dbg !133
-
-bb2.i: ; preds = %bb4.i
- %tmp154.i = shl i64 255, %.cast.i
- %14 = and i64 %tmp154.i, %vv.0.0.0
- %15 = icmp eq i64 %14, 0, !dbg !134
- br i1 %15, label %bb3.i, label %bb5.i, !dbg !134
-
-bb3.i: ; preds = %bb2.i
- %indvar.next20.i = add i64 %indvar19.i, 1
- br label %bb4.i, !dbg !134
-
-bb4.i: ; preds = %bb.i, %bb3.i
- %indvar19.i = phi i64 [ %indvar.next20.i, %bb3.i ], [ 0, %bb.i ]
- %tmp24 = mul i64 %indvar19.i, -8
- %.cast.i = add i64 %tmp24, 56
- %16 = icmp eq i64 %.cast.i, 0, !dbg !134
- br i1 %16, label %bb5.i, label %bb2.i, !dbg !134
-
-bb5.i: ; preds = %bb4.i, %bb2.i
- %.cast6.i = and i64 %.cast.i, 4294967288
- %17 = lshr i64 %vv.0.0.0, %.cast6.i, !dbg !134
- %18 = getelementptr inbounds [256 x i8]* @__clz_tab, i64 0, i64 %17, !dbg !134
- %19 = load i8* %18, align 1, !dbg !134
- %20 = zext i8 %19 to i64, !dbg !134
- %21 = add i64 %20, %.cast.i, !dbg !134
- tail call void @llvm.dbg.value(metadata !135, i64 0, metadata !42) nounwind, !dbg !134
- %22 = icmp eq i64 %21, 64
- br i1 %22, label %bb12.i, label %bb7.i, !dbg !136
-
-bb7.i: ; preds = %bb5.i
- %23 = sub i64 64, %21, !dbg !134
- %.cast8.i = and i64 %23, 4294967295
- %24 = shl i64 %vv.0.0.0, %.cast8.i, !dbg !137
- tail call void @llvm.dbg.value(metadata !{i64 %24}, i64 0, metadata !32) nounwind, !dbg !137
- %25 = shl i64 %uu.0.1.0, %.cast8.i, !dbg !138
- %.cast10.i = and i64 %21, 4294967295
- %26 = lshr i64 %uu.0.0.0, %.cast10.i, !dbg !138
- %27 = or i64 %25, %26, !dbg !138
- tail call void @llvm.dbg.value(metadata !{i64 %27}, i64 0, metadata !37) nounwind, !dbg !138
- %28 = shl i64 %uu.0.0.0, %.cast8.i, !dbg !139
- tail call void @llvm.dbg.value(metadata !{i64 %28}, i64 0, metadata !36) nounwind, !dbg !139
- br label %bb12.i, !dbg !139
-
-bb12.i: ; preds = %bb7.i, %bb5.i
- %n1.0.i = phi i64 [ %27, %bb7.i ], [ %uu.0.1.0, %bb5.i ]
- %n0.0.i = phi i64 [ %28, %bb7.i ], [ %uu.0.0.0, %bb5.i ]
- %d0.0.i = phi i64 [ %24, %bb7.i ], [ %vv.0.0.0, %bb5.i ]
- %29 = lshr i64 %d0.0.i, 32, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %29}, i64 0, metadata !47) nounwind, !dbg !140
- %30 = and i64 %d0.0.i, 4294967295, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %30}, i64 0, metadata !49) nounwind, !dbg !140
- %31 = urem i64 %n1.0.i, %29, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %31}, i64 0, metadata !52) nounwind, !dbg !140
- %32 = udiv i64 %n1.0.i, %29, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %32}, i64 0, metadata !50) nounwind, !dbg !140
- %33 = mul i64 %32, %30, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %33}, i64 0, metadata !54) nounwind, !dbg !140
- %34 = shl i64 %31, 32
- %35 = lshr i64 %n0.0.i, 32, !dbg !140
- %36 = or i64 %34, %35, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %36}, i64 0, metadata !52) nounwind, !dbg !140
- %37 = icmp ult i64 %36, %33, !dbg !140
- br i1 %37, label %bb13.i, label %bb16.i, !dbg !140
-
-bb13.i: ; preds = %bb12.i
- %38 = add i64 %32, -1
- tail call void @llvm.dbg.value(metadata !{i64 %38}, i64 0, metadata !50) nounwind, !dbg !140
- %uadd153.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %36, i64 %d0.0.i) nounwind
- %39 = extractvalue %0 %uadd153.i, 0
- tail call void @llvm.dbg.value(metadata !{i64 %39}, i64 0, metadata !52) nounwind, !dbg !140
- %40 = extractvalue %0 %uadd153.i, 1
- %.not.i = xor i1 %40, true
- %41 = icmp ult i64 %39, %33, !dbg !140
- %or.cond.i = and i1 %41, %.not.i
- br i1 %or.cond.i, label %bb15.i, label %bb16.i, !dbg !140
-
-bb15.i: ; preds = %bb13.i
- %42 = add i64 %32, -2
- tail call void @llvm.dbg.value(metadata !{i64 %42}, i64 0, metadata !50) nounwind, !dbg !140
- %43 = add i64 %39, %d0.0.i, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %43}, i64 0, metadata !52) nounwind, !dbg !140
- br label %bb16.i, !dbg !140
-
-bb16.i: ; preds = %bb15.i, %bb13.i, %bb12.i
- %__r1.0.i = phi i64 [ %43, %bb15.i ], [ %39, %bb13.i ], [ %36, %bb12.i ]
- %__q1.0.i = phi i64 [ %42, %bb15.i ], [ %38, %bb13.i ], [ %32, %bb12.i ]
- %44 = sub i64 %__r1.0.i, %33, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %44}, i64 0, metadata !52) nounwind, !dbg !140
- %45 = urem i64 %44, %29, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %45}, i64 0, metadata !53) nounwind, !dbg !140
- %46 = udiv i64 %44, %29, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %46}, i64 0, metadata !51) nounwind, !dbg !140
- %47 = mul i64 %46, %30, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %47}, i64 0, metadata !54) nounwind, !dbg !140
- %48 = shl i64 %45, 32
- %49 = and i64 %n0.0.i, 4294967295, !dbg !140
- %50 = or i64 %48, %49, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %50}, i64 0, metadata !53) nounwind, !dbg !140
- %51 = icmp ult i64 %50, %47, !dbg !140
- br i1 %51, label %bb17.i, label %bb20.i, !dbg !140
-
-bb17.i: ; preds = %bb16.i
- %52 = add i64 %46, -1
- tail call void @llvm.dbg.value(metadata !{i64 %52}, i64 0, metadata !51) nounwind, !dbg !140
- %uadd152.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %50, i64 %d0.0.i) nounwind
- tail call void @llvm.dbg.value(metadata !135, i64 0, metadata !53) nounwind, !dbg !140
- %53 = extractvalue %0 %uadd152.i, 1
- br i1 %53, label %bb20.i, label %bb18.i, !dbg !140
-
-bb18.i: ; preds = %bb17.i
- %54 = extractvalue %0 %uadd152.i, 0
- %55 = add i64 %46, -2
- %56 = icmp ult i64 %54, %47, !dbg !140
- %..i = select i1 %56, i64 %55, i64 %52
- br label %bb20.i
-
-bb20.i: ; preds = %bb18.i, %bb17.i, %bb16.i
- %__q0.0.i = phi i64 [ %52, %bb17.i ], [ %46, %bb16.i ], [ %..i, %bb18.i ]
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !53) nounwind, !dbg !140
- %57 = shl i64 %__q1.0.i, 32
- %58 = or i64 %__q0.0.i, %57, !dbg !140
- tail call void @llvm.dbg.value(metadata !{i64 %58}, i64 0, metadata !39) nounwind, !dbg !140
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !36) nounwind, !dbg !140
- tail call void @llvm.dbg.value(metadata !114, i64 0, metadata !40) nounwind, !dbg !141
- br label %__udivmodti4.exit, !dbg !141
-
-bb21.i: ; preds = %bb.i
- %59 = icmp eq i64 %vv.0.0.0, 0, !dbg !142
- br i1 %59, label %bb22.i, label %bb23.i, !dbg !142
-
-bb22.i: ; preds = %bb21.i
- %60 = udiv i64 1, %vv.0.0.0, !dbg !143
- tail call void @llvm.dbg.value(metadata !{i64 %60}, i64 0, metadata !32) nounwind, !dbg !143
- br label %bb23.i, !dbg !143
-
-bb23.i: ; preds = %bb22.i, %bb21.i
- %d0.1.i = phi i64 [ %60, %bb22.i ], [ %vv.0.0.0, %bb21.i ]
- tail call void @llvm.dbg.value(metadata !{i64 %d0.1.i}, i64 0, metadata !55) nounwind, !dbg !144
- tail call void @llvm.dbg.value(metadata !145, i64 0, metadata !57) nounwind, !dbg !144
- br label %bb29.i, !dbg !144
-
-bb26.i: ; preds = %bb29.i
- %tmp151.i = shl i64 255, %.cast27.i
- %61 = and i64 %tmp151.i, %d0.1.i
- %62 = icmp eq i64 %61, 0, !dbg !144
- br i1 %62, label %bb28.i, label %bb30.i, !dbg !144
-
-bb28.i: ; preds = %bb26.i
- %indvar.next16.i = add i64 %indvar15.i, 1
- br label %bb29.i, !dbg !144
-
-bb29.i: ; preds = %bb28.i, %bb23.i
- %indvar15.i = phi i64 [ %indvar.next16.i, %bb28.i ], [ 0, %bb23.i ]
- %tmp22 = mul i64 %indvar15.i, -8
- %.cast27.i = add i64 %tmp22, 56
- %63 = icmp eq i64 %.cast27.i, 0, !dbg !144
- br i1 %63, label %bb30.i, label %bb26.i, !dbg !144
-
-bb30.i: ; preds = %bb29.i, %bb26.i
- %.cast31.i = and i64 %.cast27.i, 4294967288
- %64 = lshr i64 %d0.1.i, %.cast31.i, !dbg !144
- %65 = getelementptr inbounds [256 x i8]* @__clz_tab, i64 0, i64 %64, !dbg !144
- %66 = load i8* %65, align 1, !dbg !144
- %67 = zext i8 %66 to i64, !dbg !144
- %68 = add i64 %67, %.cast27.i, !dbg !144
- tail call void @llvm.dbg.value(metadata !135, i64 0, metadata !42) nounwind, !dbg !144
- %69 = icmp eq i64 %68, 64
- br i1 %69, label %bb32.i, label %bb33.i, !dbg !146
-
-bb32.i: ; preds = %bb30.i
- %70 = sub i64 %uu.0.1.0, %d0.1.i, !dbg !147
- tail call void @llvm.dbg.value(metadata !{i64 %70}, i64 0, metadata !37) nounwind, !dbg !147
- tail call void @llvm.dbg.value(metadata !148, i64 0, metadata !40) nounwind, !dbg !149
- br label %bb54.i, !dbg !149
-
-bb33.i: ; preds = %bb30.i
- %71 = sub i64 64, %68, !dbg !144
- tail call void @llvm.dbg.value(metadata !{i64 %68}, i64 0, metadata !41) nounwind, !dbg !150
- %.cast34.i = and i64 %71, 4294967295
- %72 = shl i64 %d0.1.i, %.cast34.i, !dbg !151
- tail call void @llvm.dbg.value(metadata !{i64 %72}, i64 0, metadata !32) nounwind, !dbg !151
- %.cast35.i = and i64 %68, 4294967295
- %73 = lshr i64 %uu.0.1.0, %.cast35.i, !dbg !152
- tail call void @llvm.dbg.value(metadata !{i64 %73}, i64 0, metadata !38) nounwind, !dbg !152
- %74 = shl i64 %uu.0.1.0, %.cast34.i, !dbg !153
- %75 = lshr i64 %uu.0.0.0, %.cast35.i, !dbg !153
- %76 = or i64 %74, %75, !dbg !153
- tail call void @llvm.dbg.value(metadata !{i64 %76}, i64 0, metadata !37) nounwind, !dbg !153
- %77 = shl i64 %uu.0.0.0, %.cast34.i, !dbg !154
- tail call void @llvm.dbg.value(metadata !{i64 %77}, i64 0, metadata !36) nounwind, !dbg !154
- %78 = lshr i64 %72, 32, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %78}, i64 0, metadata !58) nounwind, !dbg !155
- %79 = and i64 %72, 4294967295, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %79}, i64 0, metadata !60) nounwind, !dbg !155
- %80 = urem i64 %73, %78, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %80}, i64 0, metadata !63) nounwind, !dbg !155
- %81 = udiv i64 %73, %78, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %81}, i64 0, metadata !61) nounwind, !dbg !155
- %82 = mul i64 %81, %79, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %82}, i64 0, metadata !65) nounwind, !dbg !155
- %83 = shl i64 %80, 32
- %84 = lshr i64 %76, 32, !dbg !155
- %85 = or i64 %83, %84, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %85}, i64 0, metadata !63) nounwind, !dbg !155
- %86 = icmp ult i64 %85, %82, !dbg !155
- br i1 %86, label %bb46.i, label %bb49.i, !dbg !155
-
-bb46.i: ; preds = %bb33.i
- %87 = add i64 %81, -1
- tail call void @llvm.dbg.value(metadata !{i64 %87}, i64 0, metadata !61) nounwind, !dbg !155
- %uadd150.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %85, i64 %72) nounwind
- %88 = extractvalue %0 %uadd150.i, 0
- tail call void @llvm.dbg.value(metadata !{i64 %88}, i64 0, metadata !63) nounwind, !dbg !155
- %89 = extractvalue %0 %uadd150.i, 1
- %.not1.i = xor i1 %89, true
- %90 = icmp ult i64 %88, %82, !dbg !155
- %or.cond2.i = and i1 %90, %.not1.i
- br i1 %or.cond2.i, label %bb48.i, label %bb49.i, !dbg !155
-
-bb48.i: ; preds = %bb46.i
- %91 = add i64 %81, -2
- tail call void @llvm.dbg.value(metadata !{i64 %91}, i64 0, metadata !61) nounwind, !dbg !155
- %92 = add i64 %88, %72, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %92}, i64 0, metadata !63) nounwind, !dbg !155
- br label %bb49.i, !dbg !155
-
-bb49.i: ; preds = %bb48.i, %bb46.i, %bb33.i
- %__q141.0.i = phi i64 [ %91, %bb48.i ], [ %87, %bb46.i ], [ %81, %bb33.i ]
- %__r143.0.i = phi i64 [ %92, %bb48.i ], [ %88, %bb46.i ], [ %85, %bb33.i ]
- %93 = sub i64 %__r143.0.i, %82, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %93}, i64 0, metadata !63) nounwind, !dbg !155
- %94 = urem i64 %93, %78, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %94}, i64 0, metadata !64) nounwind, !dbg !155
- %95 = udiv i64 %93, %78, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %95}, i64 0, metadata !62) nounwind, !dbg !155
- %96 = mul i64 %95, %79, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %96}, i64 0, metadata !65) nounwind, !dbg !155
- %97 = shl i64 %94, 32
- %98 = and i64 %76, 4294967295, !dbg !155
- %99 = or i64 %97, %98, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %99}, i64 0, metadata !64) nounwind, !dbg !155
- %100 = icmp ult i64 %99, %96, !dbg !155
- br i1 %100, label %bb50.i, label %bb53.i, !dbg !155
-
-bb50.i: ; preds = %bb49.i
- %101 = add i64 %95, -1
- tail call void @llvm.dbg.value(metadata !{i64 %101}, i64 0, metadata !62) nounwind, !dbg !155
- %uadd149.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %99, i64 %72) nounwind
- %102 = extractvalue %0 %uadd149.i, 0
- tail call void @llvm.dbg.value(metadata !{i64 %102}, i64 0, metadata !64) nounwind, !dbg !155
- %103 = extractvalue %0 %uadd149.i, 1
- %.not3.i = xor i1 %103, true
- %104 = icmp ult i64 %102, %96, !dbg !155
- %or.cond4.i = and i1 %104, %.not3.i
- br i1 %or.cond4.i, label %bb52.i, label %bb53.i, !dbg !155
-
-bb52.i: ; preds = %bb50.i
- %105 = add i64 %95, -2
- tail call void @llvm.dbg.value(metadata !{i64 %105}, i64 0, metadata !62) nounwind, !dbg !155
- %106 = add i64 %102, %72, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %106}, i64 0, metadata !64) nounwind, !dbg !155
- br label %bb53.i, !dbg !155
-
-bb53.i: ; preds = %bb52.i, %bb50.i, %bb49.i
- %__q042.0.i = phi i64 [ %105, %bb52.i ], [ %101, %bb50.i ], [ %95, %bb49.i ]
- %__r044.0.i = phi i64 [ %106, %bb52.i ], [ %102, %bb50.i ], [ %99, %bb49.i ]
- %107 = sub i64 %__r044.0.i, %96, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %107}, i64 0, metadata !64) nounwind, !dbg !155
- %108 = shl i64 %__q141.0.i, 32
- %109 = or i64 %__q042.0.i, %108, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %109}, i64 0, metadata !40) nounwind, !dbg !155
- tail call void @llvm.dbg.value(metadata !{i64 %107}, i64 0, metadata !37) nounwind, !dbg !155
- br label %bb54.i, !dbg !155
-
-bb54.i: ; preds = %bb53.i, %bb32.i
- %q1.0.i = phi i64 [ 1, %bb32.i ], [ %109, %bb53.i ]
- %n1.1.i = phi i64 [ %70, %bb32.i ], [ %107, %bb53.i ]
- %n0.1.i = phi i64 [ %uu.0.0.0, %bb32.i ], [ %77, %bb53.i ]
- %d0.2.i = phi i64 [ %d0.1.i, %bb32.i ], [ %72, %bb53.i ]
- %110 = lshr i64 %d0.2.i, 32, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %110}, i64 0, metadata !66) nounwind, !dbg !156
- %111 = and i64 %d0.2.i, 4294967295, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %111}, i64 0, metadata !68) nounwind, !dbg !156
- %112 = urem i64 %n1.1.i, %110, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %112}, i64 0, metadata !71) nounwind, !dbg !156
- %113 = udiv i64 %n1.1.i, %110, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %113}, i64 0, metadata !69) nounwind, !dbg !156
- %114 = mul i64 %113, %111, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %114}, i64 0, metadata !73) nounwind, !dbg !156
- %115 = shl i64 %112, 32
- %116 = lshr i64 %n0.1.i, 32, !dbg !156
- %117 = or i64 %115, %116, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %117}, i64 0, metadata !71) nounwind, !dbg !156
- %118 = icmp ult i64 %117, %114, !dbg !156
- br i1 %118, label %bb62.i, label %bb65.i, !dbg !156
-
-bb62.i: ; preds = %bb54.i
- %119 = add i64 %113, -1
- tail call void @llvm.dbg.value(metadata !{i64 %119}, i64 0, metadata !69) nounwind, !dbg !156
- %uadd148.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %117, i64 %d0.2.i) nounwind
- %120 = extractvalue %0 %uadd148.i, 0
- tail call void @llvm.dbg.value(metadata !{i64 %120}, i64 0, metadata !71) nounwind, !dbg !156
- %121 = extractvalue %0 %uadd148.i, 1
- %.not5.i = xor i1 %121, true
- %122 = icmp ult i64 %120, %114, !dbg !156
- %or.cond6.i = and i1 %122, %.not5.i
- br i1 %or.cond6.i, label %bb64.i, label %bb65.i, !dbg !156
-
-bb64.i: ; preds = %bb62.i
- %123 = add i64 %113, -2
- tail call void @llvm.dbg.value(metadata !{i64 %123}, i64 0, metadata !69) nounwind, !dbg !156
- %124 = add i64 %120, %d0.2.i, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %124}, i64 0, metadata !71) nounwind, !dbg !156
- br label %bb65.i, !dbg !156
-
-bb65.i: ; preds = %bb64.i, %bb62.i, %bb54.i
- %__q157.0.i = phi i64 [ %123, %bb64.i ], [ %119, %bb62.i ], [ %113, %bb54.i ]
- %__r159.0.i = phi i64 [ %124, %bb64.i ], [ %120, %bb62.i ], [ %117, %bb54.i ]
- %125 = sub i64 %__r159.0.i, %114, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %125}, i64 0, metadata !71) nounwind, !dbg !156
- %126 = urem i64 %125, %110, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %126}, i64 0, metadata !72) nounwind, !dbg !156
- %127 = udiv i64 %125, %110, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %127}, i64 0, metadata !70) nounwind, !dbg !156
- %128 = mul i64 %127, %111, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %128}, i64 0, metadata !73) nounwind, !dbg !156
- %129 = shl i64 %126, 32
- %130 = and i64 %n0.1.i, 4294967295, !dbg !156
- %131 = or i64 %129, %130, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %131}, i64 0, metadata !72) nounwind, !dbg !156
- %132 = icmp ult i64 %131, %128, !dbg !156
- br i1 %132, label %bb66.i, label %bb69.i, !dbg !156
-
-bb66.i: ; preds = %bb65.i
- %133 = add i64 %127, -1
- tail call void @llvm.dbg.value(metadata !{i64 %133}, i64 0, metadata !70) nounwind, !dbg !156
- %uadd147.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %131, i64 %d0.2.i) nounwind
- tail call void @llvm.dbg.value(metadata !135, i64 0, metadata !72) nounwind, !dbg !156
- %134 = extractvalue %0 %uadd147.i, 1
- br i1 %134, label %bb69.i, label %bb67.i, !dbg !156
-
-bb67.i: ; preds = %bb66.i
- %135 = extractvalue %0 %uadd147.i, 0
- %136 = add i64 %127, -2
- %137 = icmp ult i64 %135, %128, !dbg !156
- %.7.i = select i1 %137, i64 %136, i64 %133
- br label %bb69.i
-
-bb69.i: ; preds = %bb67.i, %bb66.i, %bb65.i
- %__q058.0.i = phi i64 [ %133, %bb66.i ], [ %127, %bb65.i ], [ %.7.i, %bb67.i ]
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !72) nounwind, !dbg !156
- %138 = shl i64 %__q157.0.i, 32
- %139 = or i64 %__q058.0.i, %138, !dbg !156
- tail call void @llvm.dbg.value(metadata !{i64 %139}, i64 0, metadata !39) nounwind, !dbg !156
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !36) nounwind, !dbg !156
- br label %__udivmodti4.exit, !dbg !156
-
-bb73.i: ; preds = %bb4
- %140 = icmp ugt i64 %vv.0.1.0, %uu.0.1.0, !dbg !157
- br i1 %140, label %__udivmodti4.exit, label %bb82.i, !dbg !157
-
-bb79.i: ; preds = %bb82.i
- %tmp.i = shl i64 255, %.cast80.i
- %141 = and i64 %tmp.i, %vv.0.1.0
- %142 = icmp eq i64 %141, 0, !dbg !158
- br i1 %142, label %bb81.i, label %bb83.i, !dbg !158
-
-bb81.i: ; preds = %bb79.i
- %indvar.next.i = add i64 %indvar.i, 1
- br label %bb82.i, !dbg !158
-
-bb82.i: ; preds = %bb73.i, %bb81.i
- %indvar.i = phi i64 [ %indvar.next.i, %bb81.i ], [ 0, %bb73.i ]
- %tmp = mul i64 %indvar.i, -8
- %.cast80.i = add i64 %tmp, 56
- %143 = icmp eq i64 %.cast80.i, 0, !dbg !158
- br i1 %143, label %bb83.i, label %bb79.i, !dbg !158
-
-bb83.i: ; preds = %bb82.i, %bb79.i
- %.cast84.i = and i64 %.cast80.i, 4294967288
- %144 = lshr i64 %vv.0.1.0, %.cast84.i, !dbg !158
- %145 = getelementptr inbounds [256 x i8]* @__clz_tab, i64 0, i64 %144, !dbg !158
- %146 = load i8* %145, align 1, !dbg !158
- %147 = zext i8 %146 to i64, !dbg !158
- %148 = add i64 %147, %.cast80.i, !dbg !158
- tail call void @llvm.dbg.value(metadata !135, i64 0, metadata !42) nounwind, !dbg !158
- %149 = icmp eq i64 %148, 64
- br i1 %149, label %bb85.i, label %bb92.i, !dbg !159
-
-bb85.i: ; preds = %bb83.i
- %150 = icmp ugt i64 %uu.0.1.0, %vv.0.1.0, !dbg !160
- %151 = icmp uge i64 %uu.0.0.0, %vv.0.0.0, !dbg !160
- %152 = or i1 %150, %151, !dbg !160
- %.8.i = zext i1 %152 to i64
- tail call void @llvm.dbg.value(metadata !114, i64 0, metadata !40) nounwind, !dbg !161
- br label %__udivmodti4.exit
-
-bb92.i: ; preds = %bb83.i
- %153 = sub i64 64, %148, !dbg !158
- tail call void @llvm.dbg.value(metadata !{i64 %148}, i64 0, metadata !41) nounwind, !dbg !162
- %.cast93.i = and i64 %153, 4294967295
- %154 = shl i64 %vv.0.1.0, %.cast93.i, !dbg !163
- %.cast94.i = and i64 %148, 4294967295
- %155 = lshr i64 %vv.0.0.0, %.cast94.i, !dbg !163
- %156 = or i64 %154, %155, !dbg !163
- tail call void @llvm.dbg.value(metadata !{i64 %156}, i64 0, metadata !35) nounwind, !dbg !163
- %157 = shl i64 %vv.0.0.0, %.cast93.i, !dbg !164
- tail call void @llvm.dbg.value(metadata !{i64 %157}, i64 0, metadata !32) nounwind, !dbg !164
- %158 = lshr i64 %uu.0.1.0, %.cast94.i, !dbg !165
- tail call void @llvm.dbg.value(metadata !{i64 %158}, i64 0, metadata !38) nounwind, !dbg !165
- %159 = shl i64 %uu.0.1.0, %.cast93.i, !dbg !166
- %160 = lshr i64 %uu.0.0.0, %.cast94.i, !dbg !166
- %161 = or i64 %159, %160, !dbg !166
- tail call void @llvm.dbg.value(metadata !{i64 %161}, i64 0, metadata !37) nounwind, !dbg !166
- %162 = shl i64 %uu.0.0.0, %.cast93.i, !dbg !167
- tail call void @llvm.dbg.value(metadata !{i64 %162}, i64 0, metadata !36) nounwind, !dbg !167
- %163 = lshr i64 %156, 32, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %163}, i64 0, metadata !82) nounwind, !dbg !168
- %164 = and i64 %156, 4294967295, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %164}, i64 0, metadata !84) nounwind, !dbg !168
- %165 = urem i64 %158, %163, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %165}, i64 0, metadata !87) nounwind, !dbg !168
- %166 = udiv i64 %158, %163, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %166}, i64 0, metadata !85) nounwind, !dbg !168
- %167 = mul i64 %166, %164, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %167}, i64 0, metadata !89) nounwind, !dbg !168
- %168 = shl i64 %165, 32
- %169 = lshr i64 %161, 32, !dbg !168
- %170 = or i64 %168, %169, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %170}, i64 0, metadata !87) nounwind, !dbg !168
- %171 = icmp ult i64 %170, %167, !dbg !168
- br i1 %171, label %bb107.i, label %bb110.i, !dbg !168
-
-bb107.i: ; preds = %bb92.i
- %172 = add i64 %166, -1
- tail call void @llvm.dbg.value(metadata !{i64 %172}, i64 0, metadata !85) nounwind, !dbg !168
- %uadd146.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %170, i64 %156) nounwind
- %173 = extractvalue %0 %uadd146.i, 0
- tail call void @llvm.dbg.value(metadata !{i64 %173}, i64 0, metadata !87) nounwind, !dbg !168
- %174 = extractvalue %0 %uadd146.i, 1
- %.not9.i = xor i1 %174, true
- %175 = icmp ult i64 %173, %167, !dbg !168
- %or.cond10.i = and i1 %175, %.not9.i
- br i1 %or.cond10.i, label %bb109.i, label %bb110.i, !dbg !168
-
-bb109.i: ; preds = %bb107.i
- %176 = add i64 %166, -2
- tail call void @llvm.dbg.value(metadata !{i64 %176}, i64 0, metadata !85) nounwind, !dbg !168
- %177 = add i64 %173, %156, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %177}, i64 0, metadata !87) nounwind, !dbg !168
- br label %bb110.i, !dbg !168
-
-bb110.i: ; preds = %bb109.i, %bb107.i, %bb92.i
- %__q1102.0.i = phi i64 [ %176, %bb109.i ], [ %172, %bb107.i ], [ %166, %bb92.i ]
- %__r1104.0.i = phi i64 [ %177, %bb109.i ], [ %173, %bb107.i ], [ %170, %bb92.i ]
- %178 = sub i64 %__r1104.0.i, %167, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %178}, i64 0, metadata !87) nounwind, !dbg !168
- %179 = urem i64 %178, %163, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %179}, i64 0, metadata !88) nounwind, !dbg !168
- %180 = udiv i64 %178, %163, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %180}, i64 0, metadata !86) nounwind, !dbg !168
- %181 = mul i64 %180, %164, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %181}, i64 0, metadata !89) nounwind, !dbg !168
- %182 = shl i64 %179, 32
- %183 = and i64 %161, 4294967295, !dbg !168
- %184 = or i64 %182, %183, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %184}, i64 0, metadata !88) nounwind, !dbg !168
- %185 = icmp ult i64 %184, %181, !dbg !168
- br i1 %185, label %bb111.i, label %bb114.i, !dbg !168
-
-bb111.i: ; preds = %bb110.i
- %186 = add i64 %180, -1
- tail call void @llvm.dbg.value(metadata !{i64 %186}, i64 0, metadata !86) nounwind, !dbg !168
- %uadd145.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %184, i64 %156) nounwind
- %187 = extractvalue %0 %uadd145.i, 0
- tail call void @llvm.dbg.value(metadata !{i64 %187}, i64 0, metadata !88) nounwind, !dbg !168
- %188 = extractvalue %0 %uadd145.i, 1
- %.not11.i = xor i1 %188, true
- %189 = icmp ult i64 %187, %181, !dbg !168
- %or.cond12.i = and i1 %189, %.not11.i
- br i1 %or.cond12.i, label %bb113.i, label %bb114.i, !dbg !168
-
-bb113.i: ; preds = %bb111.i
- %190 = add i64 %180, -2
- tail call void @llvm.dbg.value(metadata !{i64 %190}, i64 0, metadata !86) nounwind, !dbg !168
- %191 = add i64 %187, %156, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %191}, i64 0, metadata !88) nounwind, !dbg !168
- br label %bb114.i, !dbg !168
-
-bb114.i: ; preds = %bb113.i, %bb111.i, %bb110.i
- %__q0103.0.i = phi i64 [ %190, %bb113.i ], [ %186, %bb111.i ], [ %180, %bb110.i ]
- %__r0105.0.i = phi i64 [ %191, %bb113.i ], [ %187, %bb111.i ], [ %184, %bb110.i ]
- %192 = sub i64 %__r0105.0.i, %181, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %192}, i64 0, metadata !88) nounwind, !dbg !168
- %193 = shl i64 %__q1102.0.i, 32
- %194 = or i64 %__q0103.0.i, %193, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %194}, i64 0, metadata !39) nounwind, !dbg !168
- tail call void @llvm.dbg.value(metadata !{i64 %192}, i64 0, metadata !37) nounwind, !dbg !168
- %195 = and i64 %__q0103.0.i, 4294967295, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %195}, i64 0, metadata !95) nounwind, !dbg !169
- %196 = lshr i64 %194, 32, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %196}, i64 0, metadata !97) nounwind, !dbg !169
- %197 = and i64 %157, 4294967295, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %197}, i64 0, metadata !96) nounwind, !dbg !169
- %198 = lshr i64 %157, 32, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %198}, i64 0, metadata !98) nounwind, !dbg !169
- %199 = mul i64 %195, %197, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %199}, i64 0, metadata !90) nounwind, !dbg !169
- %200 = mul i64 %195, %198, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %200}, i64 0, metadata !92) nounwind, !dbg !169
- %201 = mul i64 %196, %197, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %201}, i64 0, metadata !93) nounwind, !dbg !169
- %202 = mul i64 %196, %198, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %202}, i64 0, metadata !94) nounwind, !dbg !169
- %203 = lshr i64 %199, 32, !dbg !169
- %204 = add i64 %203, %200, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %204}, i64 0, metadata !92) nounwind, !dbg !169
- %uadd.i = tail call %0 @llvm.uadd.with.overflow.i64(i64 %204, i64 %201) nounwind
- %205 = extractvalue %0 %uadd.i, 0
- tail call void @llvm.dbg.value(metadata !{i64 %205}, i64 0, metadata !92) nounwind, !dbg !169
- %206 = extractvalue %0 %uadd.i, 1
- %207 = add i64 %202, 4294967296, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %207}, i64 0, metadata !94) nounwind, !dbg !169
- %__x3.0.i = select i1 %206, i64 %207, i64 %202
- %208 = lshr i64 %205, 32, !dbg !169
- %209 = add i64 %__x3.0.i, %208, !dbg !169
- tail call void @llvm.dbg.value(metadata !{i64 %209}, i64 0, metadata !79) nounwind, !dbg !169
- tail call void @llvm.dbg.value(metadata !135, i64 0, metadata !81) nounwind, !dbg !169
- %210 = icmp ugt i64 %209, %192, !dbg !170
- br i1 %210, label %bb121.i, label %bb117.i, !dbg !170
-
-bb117.i: ; preds = %bb114.i
- %211 = and i64 %199, 4294967295, !dbg !169
- %212 = shl i64 %205, 32
- %213 = or i64 %212, %211
- %214 = icmp eq i64 %209, %192, !dbg !170
- %215 = icmp ugt i64 %213, %162, !dbg !170
- %216 = and i1 %214, %215, !dbg !170
- br i1 %216, label %bb121.i, label %__udivmodti4.exit, !dbg !170
-
-bb121.i: ; preds = %bb117.i, %bb114.i
- %217 = add i64 %194, -1
- tail call void @llvm.dbg.value(metadata !{i64 %217}, i64 0, metadata !39) nounwind, !dbg !171
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !99) nounwind, !dbg !172
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !79) nounwind, !dbg !172
- tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !81) nounwind, !dbg !172
- br label %__udivmodti4.exit, !dbg !172
-
-__udivmodti4.exit: ; preds = %bb20.i, %bb69.i, %bb73.i, %bb85.i, %bb117.i, %bb121.i
- %q1.2.i = phi i64 [ 0, %bb85.i ], [ 0, %bb20.i ], [ %q1.0.i, %bb69.i ], [ 0, %bb73.i ], [ 0, %bb121.i ], [ 0, %bb117.i ]
- %q0.3.i = phi i64 [ %.8.i, %bb85.i ], [ %58, %bb20.i ], [ %139, %bb69.i ], [ 0, %bb73.i ], [ %217, %bb121.i ], [ %194, %bb117.i ]
- %218 = zext i64 %q0.3.i to i128
- %219 = zext i64 %q1.2.i to i128
- %220 = shl i128 %219, 64
- %221 = or i128 %220, %218
- tail call void @llvm.dbg.value(metadata !{i128 %221}, i64 0, metadata !110), !dbg !122
- %222 = icmp eq i64 %c.1, 0, !dbg !173
- %223 = sub nsw i128 0, %221, !dbg !174
- tail call void @llvm.dbg.value(metadata !{i128 %223}, i64 0, metadata !110), !dbg !174
- %w.0 = select i1 %222, i128 %221, i128 %223
- ret i128 %w.0, !dbg !175
+ tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
+ tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+ br i1 undef, label %bb2, label %bb4, !dbg !22
+
+bb2: ; preds = %entry
+ br label %bb4, !dbg !23
+
+bb4: ; preds = %bb2, %entry
+ br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
+
+bb82.i: ; preds = %bb4
+ unreachable
+
+__udivmodti4.exit: ; preds = %bb4
+ ret i128 undef, !dbg !27
}
declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
@@ -646,8 +40,6 @@
declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
!llvm.dbg.sp = !{!0, !9}
-!llvm.dbg.lv.__udivmodti4 = !{!14, !15, !16, !17, !30, !31, !32, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !46, !47, !49, !50, !51, !52, !53, !54, !55, !57, !58, !60, !61, !62, !63, !64, !65, !66, !68, !69, !70, !71, !72, !73, !74, !76, !77, !79, !81, !82, !84, !85, !86, !87, !88, !89, !90, !92, !93, !94, !95, !96, !97, !98, !99, !101}
-!llvm.dbg.lv.__divti3 = !{!103, !104, !105, !108, !109, !110}
!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
@@ -663,165 +55,17 @@
!11 = metadata !{metadata !12, metadata !12, metadata !12}
!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 590081, metadata !0, metadata !"n", metadata !1, i32 878, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 590081, metadata !0, metadata !"d", metadata !1, i32 878, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 590081, metadata !0, metadata !"rp", metadata !1, i32 878, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 590080, metadata !18, metadata !"nn", metadata !1, i32 880, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 589846, metadata !1, metadata !"DWunion", metadata !1, i32 879, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
-!20 = metadata !{i32 589847, metadata !1, metadata !"", metadata !6, i32 432, i64 128, i64 128, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_union_type ]
-!21 = metadata !{metadata !22, metadata !29}
-!22 = metadata !{i32 589837, metadata !20, metadata !"s", metadata !6, i32 433, i64 128, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_member ]
-!23 = metadata !{i32 589843, metadata !1, metadata !"DWstruct", metadata !6, i32 424, i64 128, i64 64, i64 0, i32 0, null, metadata !24, i32 0, null} ; [ DW_TAG_structure_type ]
-!24 = metadata !{metadata !25, metadata !28}
-!25 = metadata !{i32 589837, metadata !23, metadata !"low", metadata !6, i32 424, i64 64, i64 64, i64 0, i32 0, metadata !26} ; [ DW_TAG_member ]
-!26 = metadata !{i32 589846, metadata !6, metadata !"DItype", metadata !6, i32 156, i64 0, i64 0, i64 0, i32 0, metadata !27} ; [ DW_TAG_typedef ]
-!27 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!28 = metadata !{i32 589837, metadata !23, metadata !"high", metadata !6, i32 424, i64 64, i64 64, i64 64, i32 0, metadata !26} ; [ DW_TAG_member ]
-!29 = metadata !{i32 589837, metadata !20, metadata !"ll", metadata !6, i32 434, i64 128, i64 128, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
-!30 = metadata !{i32 590080, metadata !18, metadata !"dd", metadata !1, i32 881, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!31 = metadata !{i32 590080, metadata !18, metadata !"rr", metadata !1, i32 882, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!32 = metadata !{i32 590080, metadata !18, metadata !"d0", metadata !1, i32 883, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!33 = metadata !{i32 589846, metadata !6, metadata !"UDItype", metadata !6, i32 159, i64 0, i64 0, i64 0, i32 0, metadata !34} ; [ DW_TAG_typedef ]
-!34 = metadata !{i32 589860, metadata !1, metadata !"long unsigned int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 590080, metadata !18, metadata !"d1", metadata !1, i32 883, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!36 = metadata !{i32 590080, metadata !18, metadata !"n0", metadata !1, i32 883, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!37 = metadata !{i32 590080, metadata !18, metadata !"n1", metadata !1, i32 883, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!38 = metadata !{i32 590080, metadata !18, metadata !"n2", metadata !1, i32 883, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 590080, metadata !18, metadata !"q0", metadata !1, i32 884, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!40 = metadata !{i32 590080, metadata !18, metadata !"q1", metadata !1, i32 884, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!41 = metadata !{i32 590080, metadata !18, metadata !"b", metadata !1, i32 885, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!42 = metadata !{i32 590080, metadata !18, metadata !"bm", metadata !1, i32 885, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!43 = metadata !{i32 590080, metadata !18, metadata !"ww", metadata !1, i32 1086, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!44 = metadata !{i32 590080, metadata !45, metadata !"__xr", metadata !1, i32 933, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!45 = metadata !{i32 589835, metadata !18, i32 933, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
-!46 = metadata !{i32 590080, metadata !45, metadata !"__a", metadata !1, i32 933, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!47 = metadata !{i32 590080, metadata !48, metadata !"__d1", metadata !1, i32 945, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!48 = metadata !{i32 589835, metadata !18, i32 945, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
-!49 = metadata !{i32 590080, metadata !48, metadata !"__d0", metadata !1, i32 945, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!50 = metadata !{i32 590080, metadata !48, metadata !"__q1", metadata !1, i32 945, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!51 = metadata !{i32 590080, metadata !48, metadata !"__q0", metadata !1, i32 945, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!52 = metadata !{i32 590080, metadata !48, metadata !"__r1", metadata !1, i32 945, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!53 = metadata !{i32 590080, metadata !48, metadata !"__r0", metadata !1, i32 945, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!54 = metadata !{i32 590080, metadata !48, metadata !"__m", metadata !1, i32 945, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!55 = metadata !{i32 590080, metadata !56, metadata !"__xr", metadata !1, i32 957, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!56 = metadata !{i32 589835, metadata !18, i32 957, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
-!57 = metadata !{i32 590080, metadata !56, metadata !"__a", metadata !1, i32 957, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!58 = metadata !{i32 590080, metadata !59, metadata !"__d1", metadata !1, i32 982, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!59 = metadata !{i32 589835, metadata !18, i32 982, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
-!60 = metadata !{i32 590080, metadata !59, metadata !"__d0", metadata !1, i32 982, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!61 = metadata !{i32 590080, metadata !59, metadata !"__q1", metadata !1, i32 982, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!62 = metadata !{i32 590080, metadata !59, metadata !"__q0", metadata !1, i32 982, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!63 = metadata !{i32 590080, metadata !59, metadata !"__r1", metadata !1, i32 982, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!64 = metadata !{i32 590080, metadata !59, metadata !"__r0", metadata !1, i32 982, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!65 = metadata !{i32 590080, metadata !59, metadata !"__m", metadata !1, i32 982, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!66 = metadata !{i32 590080, metadata !67, metadata !"__d1", metadata !1, i32 987, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!67 = metadata !{i32 589835, metadata !18, i32 987, i32 0, metadata !1, i32 5} ; [ DW_TAG_lexical_block ]
-!68 = metadata !{i32 590080, metadata !67, metadata !"__d0", metadata !1, i32 987, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!69 = metadata !{i32 590080, metadata !67, metadata !"__q1", metadata !1, i32 987, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!70 = metadata !{i32 590080, metadata !67, metadata !"__q0", metadata !1, i32 987, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!71 = metadata !{i32 590080, metadata !67, metadata !"__r1", metadata !1, i32 987, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!72 = metadata !{i32 590080, metadata !67, metadata !"__r0", metadata !1, i32 987, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!73 = metadata !{i32 590080, metadata !67, metadata !"__m", metadata !1, i32 987, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!74 = metadata !{i32 590080, metadata !75, metadata !"__xr", metadata !1, i32 1022, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!75 = metadata !{i32 589835, metadata !18, i32 1022, i32 0, metadata !1, i32 6} ; [ DW_TAG_lexical_block ]
-!76 = metadata !{i32 590080, metadata !75, metadata !"__a", metadata !1, i32 1022, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!77 = metadata !{i32 590080, metadata !78, metadata !"__x", metadata !1, i32 1036, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!78 = metadata !{i32 589835, metadata !18, i32 1036, i32 0, metadata !1, i32 7} ; [ DW_TAG_lexical_block ]
-!79 = metadata !{i32 590080, metadata !80, metadata !"m1", metadata !1, i32 1052, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!80 = metadata !{i32 589835, metadata !18, i32 1055, i32 0, metadata !1, i32 8} ; [ DW_TAG_lexical_block ]
-!81 = metadata !{i32 590080, metadata !80, metadata !"m0", metadata !1, i32 1052, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!82 = metadata !{i32 590080, metadata !83, metadata !"__d1", metadata !1, i32 1063, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!83 = metadata !{i32 589835, metadata !80, i32 1063, i32 0, metadata !1, i32 9} ; [ DW_TAG_lexical_block ]
-!84 = metadata !{i32 590080, metadata !83, metadata !"__d0", metadata !1, i32 1063, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!85 = metadata !{i32 590080, metadata !83, metadata !"__q1", metadata !1, i32 1063, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!86 = metadata !{i32 590080, metadata !83, metadata !"__q0", metadata !1, i32 1063, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!87 = metadata !{i32 590080, metadata !83, metadata !"__r1", metadata !1, i32 1063, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!88 = metadata !{i32 590080, metadata !83, metadata !"__r0", metadata !1, i32 1063, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!89 = metadata !{i32 590080, metadata !83, metadata !"__m", metadata !1, i32 1063, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!90 = metadata !{i32 590080, metadata !91, metadata !"__x0", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!91 = metadata !{i32 589835, metadata !80, i32 1064, i32 0, metadata !1, i32 10} ; [ DW_TAG_lexical_block ]
-!92 = metadata !{i32 590080, metadata !91, metadata !"__x1", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!93 = metadata !{i32 590080, metadata !91, metadata !"__x2", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!94 = metadata !{i32 590080, metadata !91, metadata !"__x3", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!95 = metadata !{i32 590080, metadata !91, metadata !"__ul", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!96 = metadata !{i32 590080, metadata !91, metadata !"__vl", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!97 = metadata !{i32 590080, metadata !91, metadata !"__uh", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!98 = metadata !{i32 590080, metadata !91, metadata !"__vh", metadata !1, i32 1064, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!99 = metadata !{i32 590080, metadata !100, metadata !"__x", metadata !1, i32 1069, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!100 = metadata !{i32 589835, metadata !80, i32 1069, i32 0, metadata !1, i32 11} ; [ DW_TAG_lexical_block ]
-!101 = metadata !{i32 590080, metadata !102, metadata !"__x", metadata !1, i32 1077, metadata !33, i32 0} ; [ DW_TAG_auto_variable ]
-!102 = metadata !{i32 589835, metadata !80, i32 1077, i32 0, metadata !1, i32 12} ; [ DW_TAG_lexical_block ]
-!103 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
-!104 = metadata !{i32 590081, metadata !9, metadata !"v", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
-!105 = metadata !{i32 590080, metadata !106, metadata !"c", metadata !1, i32 1095, metadata !107, i32 0} ; [ DW_TAG_auto_variable ]
-!106 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
-!107 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !27} ; [ DW_TAG_typedef ]
-!108 = metadata !{i32 590080, metadata !106, metadata !"uu", metadata !1, i32 1096, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!109 = metadata !{i32 590080, metadata !106, metadata !"vv", metadata !1, i32 1097, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!110 = metadata !{i32 590080, metadata !106, metadata !"w", metadata !1, i32 1098, metadata !12, i32 0} ; [ DW_TAG_auto_variable ]
-!111 = metadata !{i32 1093, i32 0, metadata !9, null}
-!112 = metadata !{i32 1096, i32 0, metadata !106, null}
-!113 = metadata !{i32 1097, i32 0, metadata !106, null}
-!114 = metadata !{i64 0}
-!115 = metadata !{i32 1095, i32 0, metadata !106, null}
-!116 = metadata !{i32 1100, i32 0, metadata !106, null}
-!117 = metadata !{i64 -1}
-!118 = metadata !{i32 1101, i32 0, metadata !106, null}
-!119 = metadata !{i32 1103, i32 0, metadata !106, null}
-!120 = metadata !{i32 1104, i32 0, metadata !106, null}
-!121 = metadata !{i32 878, i32 0, metadata !0, metadata !122}
-!122 = metadata !{i32 1107, i32 0, metadata !106, null}
-!123 = metadata !{i128* null}
-!124 = metadata !{i32 880, i32 0, metadata !18, metadata !122}
-!125 = metadata !{i32 881, i32 0, metadata !18, metadata !122}
-!126 = metadata !{i32 882, i32 0, metadata !18, metadata !122}
-!127 = metadata !{i32 1086, i32 0, metadata !18, metadata !122}
-!128 = metadata !{i32 887, i32 0, metadata !18, metadata !122}
-!129 = metadata !{i32 888, i32 0, metadata !18, metadata !122}
-!130 = metadata !{i32 889, i32 0, metadata !18, metadata !122}
-!131 = metadata !{i32 890, i32 0, metadata !18, metadata !122}
-!132 = metadata !{i32 927, i32 0, metadata !18, metadata !122}
-!133 = metadata !{i32 929, i32 0, metadata !18, metadata !122}
-!134 = metadata !{i32 933, i32 0, metadata !45, metadata !122}
-!135 = metadata !{null}
-!136 = metadata !{i32 935, i32 0, metadata !18, metadata !122}
-!137 = metadata !{i32 940, i32 0, metadata !18, metadata !122}
-!138 = metadata !{i32 941, i32 0, metadata !18, metadata !122}
-!139 = metadata !{i32 942, i32 0, metadata !18, metadata !122}
-!140 = metadata !{i32 945, i32 0, metadata !48, metadata !122}
-!141 = metadata !{i32 946, i32 0, metadata !18, metadata !122}
-!142 = metadata !{i32 954, i32 0, metadata !18, metadata !122}
-!143 = metadata !{i32 955, i32 0, metadata !18, metadata !122}
-!144 = metadata !{i32 957, i32 0, metadata !56, metadata !122}
-!145 = metadata !{i64 56}
-!146 = metadata !{i32 959, i32 0, metadata !18, metadata !122}
-!147 = metadata !{i32 968, i32 0, metadata !18, metadata !122}
-!148 = metadata !{i64 1}
-!149 = metadata !{i32 969, i32 0, metadata !18, metadata !122}
-!150 = metadata !{i32 975, i32 0, metadata !18, metadata !122}
-!151 = metadata !{i32 977, i32 0, metadata !18, metadata !122}
-!152 = metadata !{i32 978, i32 0, metadata !18, metadata !122}
-!153 = metadata !{i32 979, i32 0, metadata !18, metadata !122}
-!154 = metadata !{i32 980, i32 0, metadata !18, metadata !122}
-!155 = metadata !{i32 982, i32 0, metadata !59, metadata !122}
-!156 = metadata !{i32 987, i32 0, metadata !67, metadata !122}
-!157 = metadata !{i32 1003, i32 0, metadata !18, metadata !122}
-!158 = metadata !{i32 1022, i32 0, metadata !75, metadata !122}
-!159 = metadata !{i32 1023, i32 0, metadata !18, metadata !122}
-!160 = metadata !{i32 1033, i32 0, metadata !18, metadata !122}
-!161 = metadata !{i32 1041, i32 0, metadata !18, metadata !122}
-!162 = metadata !{i32 1055, i32 0, metadata !80, metadata !122}
-!163 = metadata !{i32 1057, i32 0, metadata !80, metadata !122}
-!164 = metadata !{i32 1058, i32 0, metadata !80, metadata !122}
-!165 = metadata !{i32 1059, i32 0, metadata !80, metadata !122}
-!166 = metadata !{i32 1060, i32 0, metadata !80, metadata !122}
-!167 = metadata !{i32 1061, i32 0, metadata !80, metadata !122}
-!168 = metadata !{i32 1063, i32 0, metadata !83, metadata !122}
-!169 = metadata !{i32 1064, i32 0, metadata !91, metadata !122}
-!170 = metadata !{i32 1066, i32 0, metadata !80, metadata !122}
-!171 = metadata !{i32 1068, i32 0, metadata !80, metadata !122}
-!172 = metadata !{i32 1069, i32 0, metadata !100, metadata !122}
-!173 = metadata !{i32 1108, i32 0, metadata !106, null}
-!174 = metadata !{i32 1109, i32 0, metadata !106, null}
-!175 = metadata !{i32 1111, i32 0, metadata !106, null}
+!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 1093, i32 0, metadata !9, null}
+!16 = metadata !{i64 0}
+!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 1095, i32 0, metadata !18, null}
+!22 = metadata !{i32 1103, i32 0, metadata !18, null}
+!23 = metadata !{i32 1104, i32 0, metadata !18, null}
+!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
+!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 1107, i32 0, metadata !18, null}
+!27 = metadata !{i32 1111, i32 0, metadata !18, null}
From evan.cheng at apple.com Mon Feb 7 12:50:47 2011
From: evan.cheng at apple.com (Evan Cheng)
Date: Mon, 07 Feb 2011 18:50:47 -0000
Subject: [llvm-commits] [llvm] r125023 - in /llvm/trunk:
lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/atomic-cmp.ll
Message-ID: <20110207185048.013BE2A6C12D@llvm.org>
Author: evancheng
Date: Mon Feb 7 12:50:47 2011
New Revision: 125023
URL: http://llvm.org/viewvc/llvm-project?rev=125023&view=rev
Log:
Fix an obvious typo which caused an isel assertion. rdar://8964854.
Added:
llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=125023&r1=125022&r2=125023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Feb 7 12:50:47 2011
@@ -4344,7 +4344,7 @@
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
case 1:
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
break;
case 2:
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
Added: llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll?rev=125023&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll Mon Feb 7 12:50:47 2011
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=T2
+; rdar://8964854
+
+define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
+; ARM: t:
+; ARM: ldrexb
+; ARM: strexb
+
+; T2: t:
+; T2: ldrexb
+; T2: strexb
+ %tmp0 = tail call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %a, i8 %b, i8 %c)
+ ret i8 %tmp0
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind
From jason.w.kim.2009 at gmail.com Mon Feb 7 13:07:11 2011
From: jason.w.kim.2009 at gmail.com (Jason W Kim)
Date: Mon, 07 Feb 2011 19:07:11 -0000
Subject: [llvm-commits] [llvm] r125025 -
/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp
Message-ID: <20110207190711.AEFED2A6C12C@llvm.org>
Author: jasonwkim
Date: Mon Feb 7 13:07:11 2011
New Revision: 125025
URL: http://llvm.org/viewvc/llvm-project?rev=125025&view=rev
Log:
ARM/MC/ELF Lowercase .cpu attributes in .s, but make them uppercase in .o
Modified:
llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=125025&r1=125024&r2=125025&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Mon Feb 7 13:07:11 2011
@@ -86,7 +86,7 @@
void EmitTextAttribute(unsigned Attribute, StringRef String) {
switch (Attribute) {
case ARMBuildAttrs::CPU_name:
- Streamer.EmitRawText(StringRef("\t.cpu ") + String);
+ Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
break;
default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
}
@@ -126,7 +126,7 @@
void EmitTextAttribute(unsigned Attribute, StringRef String) {
Contents += Attribute;
- Contents += String;
+ Contents += UppercaseString(String);
Contents += 0;
}
@@ -467,7 +467,7 @@
if (CPUString == "cortex-a8" ||
Subtarget->isCortexA8()) {
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "CORTEX-A8");
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
ARMBuildAttrs::ApplicationProfile);
From greened at obbligato.org Mon Feb 7 13:36:54 2011
From: greened at obbligato.org (David Greene)
Date: Mon, 07 Feb 2011 19:36:54 -0000
Subject: [llvm-commits] [llvm] r125029 -
/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Message-ID: <20110207193654.94A702A6C12C@llvm.org>
Author: greened
Date: Mon Feb 7 13:36:54 2011
New Revision: 125029
URL: http://llvm.org/viewvc/llvm-project?rev=125029&view=rev
Log:
[AVX] Insert/extract subvector lowering support. This includes a
couple of utility functions that will be used in other places for more
AVX lowering.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=125029&r1=125028&r2=125029&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Feb 7 13:36:54 2011
@@ -60,6 +60,97 @@
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference.
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ int Factor = VT.getSizeInBits() / 128;
+
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+ ElVT,
+ VT.getVectorNumElements() / Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+ if (isa(Idx)) {
+ unsigned IdxVal = cast(Idx)->getZExtValue();
+
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
+
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference.
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ if (isa(Idx)) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ unsigned IdxVal = cast(Idx)->getZExtValue();
+
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+ return Result;
+ }
+
+ return SDValue();
+}
+
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
const X86Subtarget *Subtarget = &TM.getSubtarget();
bool is64Bit = Subtarget->is64Bit();
@@ -4189,6 +4280,7 @@
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
+
// All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
// All one's are handled with pcmpeqd. In AVX, zero's are handled with
// vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
@@ -5918,7 +6010,14 @@
SDValue
X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasAVX()) {
- // TODO
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue Idx = Op.getNode()->getOperand(1);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+ && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+ return Extract128BitVector(Vec, Idx, DAG, dl);
+ }
}
return SDValue();
}
@@ -5936,7 +6035,7 @@
if (Op.getNode()->getValueType(0).getSizeInBits() == 256
&& SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
- // TODO
+ return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
}
}
return SDValue();
From bruno.cardoso at gmail.com Mon Feb 7 13:38:32 2011
From: bruno.cardoso at gmail.com (Bruno Cardoso Lopes)
Date: Mon, 07 Feb 2011 19:38:32 -0000
Subject: [llvm-commits] [llvm] r125030 - in /llvm/trunk:
include/llvm/Target/Target.td utils/TableGen/AsmMatcherEmitter.cpp
Message-ID: <20110207193832.B7F862A6C12C@llvm.org>
Author: bruno
Date: Mon Feb 7 13:38:32 2011
New Revision: 125030
URL: http://llvm.org/viewvc/llvm-project?rev=125030&view=rev
Log:
Implement support for custom target specific asm parsing of operands.
Motivation: Improve the parsing of not usual (different from registers or
immediates) operand forms.
This commit implements only the generic support. The ARM specific modifications
will come next.
A table like the one below is autogenerated for every instruction
containing a 'ParserMethod' in its AsmOperandClass
static const OperandMatchEntry OperandMatchTable[20] = {
/* Mnemonic, Operand List Mask, Operand Class, Features */
{ "cdp", 29 /* 0, 2, 3, 4 */, MCK_Coproc, Feature_IsThumb|Feature_HasV6 },
{ "cdp", 58 /* 1, 3, 4, 5 */, MCK_Coproc, Feature_IsARM },
A matcher function very similar (but lot more naive) to
MatchInstructionImpl scans the table. After the mnemonic match, the
features are checked and if the "to be parsed" operand index is
present in the mask, there's a real match. Then, a switch like the one
below dispatch the parsing to the custom method provided in
'ParseMethod':
case MCK_Coproc:
return TryParseCoprocessorOperandName(Operands);
Modified:
llvm/trunk/include/llvm/Target/Target.td
llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp
Modified: llvm/trunk/include/llvm/Target/Target.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/Target.td?rev=125030&r1=125029&r2=125030&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/Target.td (original)
+++ llvm/trunk/include/llvm/Target/Target.td Mon Feb 7 13:38:32 2011
@@ -341,6 +341,12 @@
/// signature should be:
/// void addFooOperands(MCInst &Inst, unsigned N) const;
string RenderMethod = ?;
+
+ /// The name of the method on the target specific operand to call to custom
+ /// handle the operand parsing. This is useful when the operands do not relate
+ /// to immediates or registers and are very instruction specific (as flags to
+ /// set in a processor register, coprocessor number, ...).
+ string ParserMethod = ?;
}
def ImmAsmOperand : AsmOperandClass {
Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp?rev=125030&r1=125029&r2=125030&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Mon Feb 7 13:38:32 2011
@@ -8,7 +8,11 @@
//===----------------------------------------------------------------------===//
//
// This tablegen backend emits a target specifier matcher for converting parsed
-// assembly operands in the MCInst structures.
+// assembly operands in the MCInst structures. It also emits a matcher for
+// custom operand parsing.
+//
+// Converting assembly operands into MCInst structures
+// ---------------------------------------------------
//
// The input to the target specific matcher is a list of literal tokens and
// operands. The target specific parser should generally eliminate any syntax
@@ -68,6 +72,28 @@
// instruction (we currently ignore cases where this isn't true, whee!!!),
// which we can emit a simple matcher for.
//
+// Custom Operand Parsing
+// ----------------------
+//
+// Some targets need a custom way to parse operands, some specific instructions
+// can contain arguments that can represent processor flags and other kinds of
+// identifiers that need to be mapped to specific valeus in the final encoded
+// instructions. The target specific custom operand parsing works in the
+// following way:
+//
+// 1. A operand match table is built, each entry contains a mnemonic, an
+// operand class, a mask for all operand positions for that same
+// class/mnemonic and target features to be checked while trying to match.
+//
+// 2. The operand matcher will try every possible entry with the same
+// mnemonic and will check if the target feature for this mnemonic also
+// matches. After that, if the operand to be matched has its index
+// present in the mask, a successfull match occurs. Otherwise, fallback
+// to the regular operand parsing.
+//
+// 3. For a match success, each operand class that has a 'ParserMethod'
+// becomes part of a switch from where the custom method is called.
+//
//===----------------------------------------------------------------------===//
#include "AsmMatcherEmitter.h"
@@ -141,6 +167,10 @@
/// MCInst; this is not valid for Token or register kinds.
std::string RenderMethod;
+ /// ParserMethod - The name of the operand method to do a target specific
+ /// parsing on the operand.
+ std::string ParserMethod;
+
/// For register classes, the records for all the registers in this class.
std::set Registers;
@@ -499,6 +529,22 @@
}
};
+struct OperandMatchEntry {
+ unsigned OperandMask;
+ MatchableInfo* MI;
+ ClassInfo *CI;
+
+ static OperandMatchEntry Create(MatchableInfo* mi, ClassInfo *ci,
+ unsigned opMask) {
+ OperandMatchEntry X;
+ X.OperandMask = opMask;
+ X.CI = ci;
+ X.MI = mi;
+ return X;
+ }
+};
+
+
class AsmMatcherInfo {
public:
/// Tracked Records
@@ -519,6 +565,9 @@
/// The information on the matchables to match.
std::vector Matchables;
+ /// Info for custom matching operands by user defined methods.
+ std::vector OperandMatchInfo;
+
/// Map of Register records to their class information.
std::map RegisterClasses;
@@ -564,6 +613,10 @@
/// BuildInfo - Construct the various tables used during matching.
void BuildInfo();
+ /// BuildOperandMatchInfo - Build the necessary information to handle user
+ /// defined operand parsing methods.
+ void BuildOperandMatchInfo();
+
/// getSubtargetFeature - Lookup or create the subtarget feature info for the
/// given operand.
SubtargetFeatureInfo *getSubtargetFeature(Record *Def) const {
@@ -803,6 +856,7 @@
Entry->ValueName = Token;
Entry->PredicateMethod = "";
Entry->RenderMethod = "";
+ Entry->ParserMethod = "";
Classes.push_back(Entry);
}
@@ -1003,6 +1057,11 @@
CI->RenderMethod = "add" + CI->ClassName + "Operands";
}
+ // Get the parse method name or leave it as empty.
+ Init *PRMName = (*it)->getValueInit("ParserMethod");
+ if (StringInit *SI = dynamic_cast(PRMName))
+ CI->ParserMethod = SI->getValue();
+
AsmOperandClasses[*it] = CI;
Classes.push_back(CI);
}
@@ -1015,6 +1074,40 @@
RegisterPrefix(AsmParser->getValueAsString("RegisterPrefix")) {
}
+/// BuildOperandMatchInfo - Build the necessary information to handle user
+/// defined operand parsing methods.
+void AsmMatcherInfo::BuildOperandMatchInfo() {
+
+ /// Map containing a mask with all operands indicies that can be found for
+ /// that class inside a instruction.
+ std::map OpClassMask;
+
+ for (std::vector::const_iterator it =
+ Matchables.begin(), ie = Matchables.end();
+ it != ie; ++it) {
+ MatchableInfo &II = **it;
+ OpClassMask.clear();
+
+ // Keep track of all operands of this instructions which belong to the
+ // same class.
+ for (unsigned i = 0, e = II.AsmOperands.size(); i != e; ++i) {
+ MatchableInfo::AsmOperand &Op = II.AsmOperands[i];
+ if (Op.Class->ParserMethod.empty())
+ continue;
+ unsigned &OperandMask = OpClassMask[Op.Class];
+ OperandMask |= (1 << i);
+ }
+
+ // Generate operand match info for each mnemonic/operand class pair.
+ for (std::map::iterator iit = OpClassMask.begin(),
+ iie = OpClassMask.end(); iit != iie; ++iit) {
+ unsigned OpMask = iit->second;
+ ClassInfo *CI = iit->first;
+ OperandMatchInfo.push_back(OperandMatchEntry::Create(&II, CI, OpMask));
+ }
+ }
+}
+
void AsmMatcherInfo::BuildInfo() {
// Build information about all of the AssemblerPredicates.
std::vector AllPredicates =
@@ -1859,6 +1952,155 @@
return true;
}
+static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
+ const AsmMatcherInfo &Info, StringRef ClassName) {
+ // Emit the static custom operand parsing table;
+ OS << "namespace {\n";
+ OS << " struct OperandMatchEntry {\n";
+ OS << " const char *Mnemonic;\n";
+ OS << " unsigned OperandMask;\n";
+ OS << " MatchClassKind Class;\n";
+ OS << " unsigned RequiredFeatures;\n";
+ OS << " };\n\n";
+
+ OS << " // Predicate for searching for an opcode.\n";
+ OS << " struct LessOpcodeOperand {\n";
+ OS << " bool operator()(const OperandMatchEntry &LHS, StringRef RHS) {\n";
+ OS << " return StringRef(LHS.Mnemonic) < RHS;\n";
+ OS << " }\n";
+ OS << " bool operator()(StringRef LHS, const OperandMatchEntry &RHS) {\n";
+ OS << " return LHS < StringRef(RHS.Mnemonic);\n";
+ OS << " }\n";
+ OS << " bool operator()(const OperandMatchEntry &LHS,";
+ OS << " const OperandMatchEntry &RHS) {\n";
+ OS << " return StringRef(LHS.Mnemonic) < StringRef(RHS.Mnemonic);\n";
+ OS << " }\n";
+ OS << " };\n";
+
+ OS << "} // end anonymous namespace.\n\n";
+
+ OS << "static const OperandMatchEntry OperandMatchTable["
+ << Info.OperandMatchInfo.size() << "] = {\n";
+
+ OS << " /* Mnemonic, Operand List Mask, Operand Class, Features */\n";
+ for (std::vector::const_iterator it =
+ Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end();
+ it != ie; ++it) {
+ const OperandMatchEntry &OMI = *it;
+ const MatchableInfo &II = *OMI.MI;
+
+ OS << " { \"" << II.Mnemonic << "\""
+ << ", " << OMI.OperandMask;
+
+ OS << " /* ";
+ bool printComma = false;
+ for (int i = 0, e = 31; i !=e; ++i)
+ if (OMI.OperandMask & (1 << i)) {
+ if (printComma)
+ OS << ", ";
+ OS << i;
+ printComma = true;
+ }
+ OS << " */";
+
+ OS << ", " << OMI.CI->Name
+ << ", ";
+
+ // Write the required features mask.
+ if (!II.RequiredFeatures.empty()) {
+ for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
+ if (i) OS << "|";
+ OS << II.RequiredFeatures[i]->getEnumName();
+ }
+ } else
+ OS << "0";
+ OS << " },\n";
+ }
+ OS << "};\n\n";
+
+ // Emit the operand class switch to call the correct custom parser for
+ // the found operand class.
+ OS << "bool " << Target.getName() << ClassName << "::\n"
+ << "TryCustomParseOperand(SmallVectorImpl"
+ << " &Operands,\n unsigned MCK) {\n\n"
+ << " switch(MCK) {\n";
+
+ for (std::vector::const_iterator it = Info.Classes.begin(),
+ ie = Info.Classes.end(); it != ie; ++it) {
+ ClassInfo *CI = *it;
+ if (CI->ParserMethod.empty())
+ continue;
+ OS << " case " << CI->Name << ":\n"
+ << " return " << CI->ParserMethod << "(Operands);\n";
+ }
+
+ OS << " default:\n";
+ OS << " return true;\n";
+ OS << " }\n";
+ OS << " return true;\n";
+ OS << "}\n\n";
+
+ // Emit the static custom operand parser. This code is very similar with
+ // the other matcher. Also use MatchResultTy here just in case we go for
+ // a better error handling.
+ OS << Target.getName() << ClassName << "::MatchResultTy "
+ << Target.getName() << ClassName << "::\n"
+ << "MatchOperandParserImpl(SmallVectorImpl"
+ << " &Operands,\n StringRef Mnemonic) {\n";
+
+ // Emit code to get the available features.
+ OS << " // Get the current feature set.\n";
+ OS << " unsigned AvailableFeatures = getAvailableFeatures();\n\n";
+
+ OS << " // Get the next operand index.\n";
+ OS << " unsigned NextOpNum = Operands.size()-1;\n";
+
+ OS << " // Some state to try to produce better error messages.\n";
+ OS << " bool HadMatchOtherThanFeatures = false;\n\n";
+
+ // Emit code to search the table.
+ OS << " // Search the table.\n";
+ OS << " std::pair";
+ OS << " MnemonicRange =\n";
+ OS << " std::equal_range(OperandMatchTable, OperandMatchTable+"
+ << Info.OperandMatchInfo.size() << ", Mnemonic,\n"
+ << " LessOpcodeOperand());\n\n";
+
+ OS << " // Return a more specific error code if no mnemonics match.\n";
+ OS << " if (MnemonicRange.first == MnemonicRange.second)\n";
+ OS << " return Match_MnemonicFail;\n\n";
+
+ OS << " for (const OperandMatchEntry *it = MnemonicRange.first,\n"
+ << " *ie = MnemonicRange.second; it != ie; ++it) {\n";
+
+ OS << " // equal_range guarantees that instruction mnemonic matches.\n";
+ OS << " assert(Mnemonic == it->Mnemonic);\n\n";
+
+ // Emit check that the required features are available.
+ OS << " // check if the available features match\n";
+ OS << " if ((AvailableFeatures & it->RequiredFeatures) "
+ << "!= it->RequiredFeatures) {\n";
+ OS << " HadMatchOtherThanFeatures = true;\n";
+ OS << " continue;\n";
+ OS << " }\n\n";
+
+ // Emit check to ensure the operand number matches.
+ OS << " // check if the operand in question has a custom parser.\n";
+ OS << " if (!(it->OperandMask & (1 << NextOpNum)))\n";
+ OS << " continue;\n\n";
+
+ // Emit call to the custom parser method
+ OS << " // call custom parse method to handle the operand\n";
+ OS << " if (!TryCustomParseOperand(Operands, it->Class))\n";
+ OS << " return Match_Success;\n";
+ OS << " }\n\n";
+
+ OS << " // Okay, we had no match. Try to return a useful error code.\n";
+ OS << " if (HadMatchOtherThanFeatures) return Match_MissingFeature;\n";
+ OS << " return Match_InvalidOperand;\n";
+ OS << "}\n\n";
+}
+
void AsmMatcherEmitter::run(raw_ostream &OS) {
CodeGenTarget Target(Records);
Record *AsmParser = Target.getAsmParser();
@@ -1904,6 +2146,9 @@
<< " ambiguous matchables!\n";
});
+ // Compute the information on the custom operand parsing.
+ Info.BuildOperandMatchInfo();
+
// Write the output.
EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
@@ -1929,7 +2174,18 @@
OS << " bool MnemonicIsValid(StringRef Mnemonic);\n";
OS << " MatchResultTy MatchInstructionImpl(\n";
OS << " const SmallVectorImpl &Operands,\n";
- OS << " MCInst &Inst, unsigned &ErrorInfo);\n\n";
+ OS << " MCInst &Inst, unsigned &ErrorInfo);\n";
+
+ if (Info.OperandMatchInfo.size()) {
+ OS << " MatchResultTy MatchOperandParserImpl(\n";
+ OS << " SmallVectorImpl &Operands,\n";
+ OS << " StringRef Mnemonic);\n";
+
+ OS << " bool TryCustomParseOperand(\n";
+ OS << " SmallVectorImpl &Operands,\n";
+ OS << " unsigned MCK);\n\n";
+ }
+
OS << "#endif // GET_ASSEMBLER_HEADER_INFO\n\n";
OS << "\n#ifdef GET_REGISTER_MATCHER\n";
@@ -1994,7 +2250,7 @@
OS << " unsigned RequiredFeatures;\n";
OS << " };\n\n";
- OS << "// Predicate for searching for an opcode.\n";
+ OS << " // Predicate for searching for an opcode.\n";
OS << " struct LessOpcode {\n";
OS << " bool operator()(const MatchEntry &LHS, StringRef RHS) {\n";
OS << " return StringRef(LHS.Mnemonic) < RHS;\n";
@@ -2164,5 +2420,8 @@
OS << " return Match_InvalidOperand;\n";
OS << "}\n\n";
+ if (Info.OperandMatchInfo.size())
+ EmitCustomOperandParsing(OS, Target, Info, ClassName);
+
OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n";
}
From sabre at nondot.org Mon Feb 7 14:03:14 2011
From: sabre at nondot.org (Chris Lattner)
Date: Mon, 07 Feb 2011 20:03:14 -0000
Subject: [llvm-commits] [llvm] r125047 - /llvm/trunk/lib/VMCore/Constants.cpp
Message-ID: <20110207200314.8B5802A6C12C@llvm.org>
Author: lattner
Date: Mon Feb 7 14:03:14 2011
New Revision: 125047
URL: http://llvm.org/viewvc/llvm-project?rev=125047&view=rev
Log:
fix comment change.
Modified:
llvm/trunk/lib/VMCore/Constants.cpp
Modified: llvm/trunk/lib/VMCore/Constants.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=125047&r1=125046&r2=125047&view=diff
==============================================================================
--- llvm/trunk/lib/VMCore/Constants.cpp (original)
+++ llvm/trunk/lib/VMCore/Constants.cpp Mon Feb 7 14:03:14 2011
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the Constant *classes.
+// This file implements the Constant* classes.
//
//===----------------------------------------------------------------------===//
From fvbommel at gmail.com Mon Feb 7 14:12:03 2011
From: fvbommel at gmail.com (Frits van Bommel)
Date: Mon, 7 Feb 2011 21:12:03 +0100
Subject: [llvm-commits] [Review request] Have Correlated Value Propagation
handle instruction operands.
Message-ID:
This patch teaches -correlated-propagation to check whether LVI knows
instruction operands to be constant, and if so to replace them. It
also adds a call to SimplifyInstruction afterwards so that the special
handling of some instructions (select, load, store) is no longer
needed.
This fixes the -correlated-propagation problem mentioned in comment 1
of PR 9004[1] and is an alternative implementation of "A small pass to
constant fold branch conditions in destination blocks"[2] which
integrates with the existing pass as Duncan suggested.
Since it leverages LVI, this also handles the 'if (x == 0) use(x)'
case mentioned in that post as well as the 'x > 10' followed by 'x >
5' case I mentioned in a follow-up (and doesn't need a domtree).
It passes 'make check-all'.
[1]: http://llvm.org/bugs/show_bug.cgi?id=9004
[2]: http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-February/037884.html
-------------- next part --------------
A non-text attachment was scrubbed...
Name: correlated-value-prop-operands.patch
Type: text/x-patch
Size: 6396 bytes
Desc: not available
Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20110207/d45a717b/attachment-0001.bin
From jasonwkim at google.com Mon Feb 7 14:39:21 2011
From: jasonwkim at google.com (Jason Kim)
Date: Mon, 7 Feb 2011 12:39:21 -0800
Subject: [llvm-commits] [PATCH] s/CORTEX-A8/cortex-a8/ in
ARMAsmPrinter.cpp
In-Reply-To:
References:
Message-ID:
I just committed a likely fix.
Thank you Edmund.
-jason
On Mon, Feb 7, 2011 at 8:59 AM, Edmund Grimley-Evans
wrote:
> Jason Kim:
>
>> Gosh darn it, at least one version likes upper case in the
>> .ARM.attributes. for ELF.o
>> I guess we can put in lower case in .s, and upper case in .o?
>
> That seems to be what the CodeSourcery tools do. I tested a more recent version, too.
> --
> IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. ?Thank you.
>
From baldrick at free.fr Mon Feb 7 14:56:27 2011
From: baldrick at free.fr (Duncan Sands)
Date: Mon, 07 Feb 2011 21:56:27 +0100
Subject: [llvm-commits] [llvm] r125006 - in /llvm/trunk:
docs/LangRef.html include/llvm/Constants.h include/llvm/Operator.h
lib/AsmParser/LLParser.cpp lib/Bitcode/Reader/BitcodeReader.cpp
lib/VMCore/Constants.cpp test/Assembler/2003-05-21-MalformedShiftCrash.ll
test/Assembler/flags.ll
In-Reply-To: <20110207164022.3F67C2A6C12D@llvm.org>
References: <20110207164022.3F67C2A6C12D@llvm.org>
Message-ID: <4D505C7B.6090907@free.fr>
Hi Chris,
> @@ -3704,6 +3707,14 @@
> vectors, each vector element ofop1 is shifted by the corresponding
> shift amount inop2.
>
> +If thenuw keyword is present, then the shift produces a
> +trap value if it shifts out any non-zero bits. If
> + thensw keywrod is present, then the shift produces a
keywrod -> keyword
Ciao, Duncan.
From bruno.cardoso at gmail.com Mon Feb 7 15:41:26 2011
From: bruno.cardoso at gmail.com (Bruno Cardoso Lopes)
Date: Mon, 07 Feb 2011 21:41:26 -0000
Subject: [llvm-commits] [llvm] r125052 - in /llvm/trunk/lib/Target/ARM:
ARMInstrInfo.td AsmParser/ARMAsmParser.cpp
Message-ID: <20110207214126.135612A6C12C@llvm.org>
Author: bruno
Date: Mon Feb 7 15:41:25 2011
New Revision: 125052
URL: http://llvm.org/viewvc/llvm-project?rev=125052&view=rev
Log:
Remove the MCR asm parser hack and start using the custom target specific asm
parsing of operands introduced in r125030. As a small note, besides using a more
generic approach we can also have more descriptive output when debugging
llvm-mc, example:
mcr p7, #1, r5, c1, c1, #4
note: parsed instruction:
['mcr', ,
,
1,
,
,
,
4]
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=125052&r1=125051&r2=125052&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Feb 7 15:41:25 2011
@@ -588,12 +588,26 @@
let PrintMethod = "printNoHashImmediate";
}
+def CoprocNumAsmOperand : AsmOperandClass {
+ let Name = "CoprocNum";
+ let SuperClasses = [];
+ let ParserMethod = "ParseCoprocNumOperand";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let SuperClasses = [];
+ let ParserMethod = "ParseCoprocRegOperand";
+}
+
def p_imm : Operand {
let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
}
def c_imm : Operand {
let PrintMethod = "printCImmediate";
+ let ParserMatchClass = CoprocRegAsmOperand;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=125052&r1=125051&r2=125052&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Feb 7 15:41:25 2011
@@ -54,11 +54,12 @@
int TryParseRegister();
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
- bool TryParseCoprocessorOperandName(SmallVectorImpl&);
bool TryParseRegisterWithWriteBack(SmallVectorImpl &);
+ bool ParseCoprocNumOperand(SmallVectorImpl&);
+ bool ParseCoprocRegOperand(SmallVectorImpl&);
bool ParseRegisterList(SmallVectorImpl &);
bool ParseMemory(SmallVectorImpl &);
- bool ParseOperand(SmallVectorImpl &, bool hasCoprocOp);
+ bool ParseOperand(SmallVectorImpl &, StringRef Mnemonic);
bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
MCSymbolRefExpr::VariantKind Variant);
@@ -115,6 +116,8 @@
enum KindTy {
CondCode,
CCOut,
+ CoprocNum,
+ CoprocReg,
Immediate,
Memory,
Register,
@@ -133,6 +136,10 @@
} CC;
struct {
+ unsigned Val;
+ } Cop;
+
+ struct {
const char *Data;
unsigned Length;
} Tok;
@@ -185,6 +192,10 @@
case SPRRegisterList:
Registers = o.Registers;
break;
+ case CoprocNum:
+ case CoprocReg:
+ Cop = o.Cop;
+ break;
case Immediate:
Imm = o.Imm;
break;
@@ -204,6 +215,11 @@
return CC.Val;
}
+ unsigned getCoproc() const {
+ assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+ return Cop.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -259,6 +275,8 @@
/// @}
+ bool isCoprocNum() const { return Kind == CoprocNum; }
+ bool isCoprocReg() const { return Kind == CoprocReg; }
bool isCondCode() const { return Kind == CondCode; }
bool isCCOut() const { return Kind == CCOut; }
bool isImm() const { return Kind == Immediate; }
@@ -314,6 +332,16 @@
Inst.addOperand(MCOperand::CreateReg(RegNum));
}
+ void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
void addCCOutOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
@@ -391,6 +419,22 @@
return Op;
}
+ static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocNum);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocReg);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
ARMOperand *Op = new ARMOperand(CCOut);
Op->Reg.RegNum = RegNum;
@@ -492,6 +536,12 @@
case CCOut:
OS << "";
break;
+ case CoprocNum:
+ OS << "";
+ break;
+ case CoprocReg:
+ OS << "";
+ break;
case Immediate:
getImm()->print(OS);
break;
@@ -609,13 +659,16 @@
return false;
}
-static int MatchCoprocessorOperandName(StringRef Name) {
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
// Use the same layout as the tablegen'erated register name matcher. Ugly,
// but efficient.
switch (Name.size()) {
default: break;
case 2:
- if (Name[0] != 'p' && Name[0] != 'c')
+ if (Name[0] != CoprocOp)
return -1;
switch (Name[1]) {
default: return -1;
@@ -632,7 +685,7 @@
}
break;
case 3:
- if ((Name[0] != 'p' && Name[0] != 'c') || Name[1] != '1')
+ if (Name[0] != CoprocOp || Name[1] != '1')
return -1;
switch (Name[2]) {
default: return -1;
@@ -650,24 +703,39 @@
return -1;
}
-/// TryParseCoprocessorOperandName - Try to parse an coprocessor related
-/// instruction with a symbolic operand name. The token must be an Identifier
-/// when called, and if it is a coprocessor related operand name, the token is
-/// eaten and the operand is added to the operand list. Example: operands like
-/// "p1", "p7", "c3", "c5", ...
+/// ParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
bool ARMAsmParser::
-TryParseCoprocessorOperandName(SmallVectorImpl &Operands) {
+ParseCoprocNumOperand(SmallVectorImpl &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
- int Num = MatchCoprocessorOperandName(Tok.getString());
+ int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
if (Num == -1)
return true;
Parser.Lex(); // Eat identifier token.
- Operands.push_back(ARMOperand::CreateImm(
- MCConstantExpr::Create(Num, getContext()), S, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+ return false;
+}
+
+/// ParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+bool ARMAsmParser::
+ParseCoprocRegOperand(SmallVectorImpl &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ if (Reg == -1)
+ return true;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
return false;
}
@@ -974,8 +1042,15 @@
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::ParseOperand(SmallVectorImpl &Operands,
- bool hasCoprocOp){
+ StringRef Mnemonic) {
SMLoc S, E;
+
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ MatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == Match_Success)
+ return false;
+
switch (getLexer().getKind()) {
default:
Error(Parser.getTok().getLoc(), "unexpected token in operand");
@@ -983,8 +1058,6 @@
case AsmToken::Identifier:
if (!TryParseRegisterWithWriteBack(Operands))
return false;
- if (hasCoprocOp && !TryParseCoprocessorOperandName(Operands))
- return false;
// Fall though for the Identifier case that is not a register or a
// special name.
@@ -1273,22 +1346,10 @@
Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
}
- // Enable the parsing of instructions containing coprocessor related
- // asm syntax, such as coprocessor names "p7, p15, ..." and coprocessor
- // registers "c1, c3, ..."
- // FIXME: we probably want AsmOperandClass and ParserMatchClass declarations
- // in the .td file rather than hacking the ASMParser for every symbolic
- // operand type.
- bool hasCoprocOp = (Head == "mcr" || Head == "mcr2" ||
- Head == "mcrr" || Head == "mcrr2" ||
- Head == "mrc" || Head == "mrc2" ||
- Head == "mrrc" || Head == "mrrc2" ||
- Head == "cdp" || Head == "cdp2");
-
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, hasCoprocOp)) {
+ if (ParseOperand(Operands, Head)) {
Parser.EatToEndOfStatement();
return true;
}
@@ -1297,7 +1358,7 @@
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (ParseOperand(Operands, hasCoprocOp)) {
+ if (ParseOperand(Operands, Head)) {
Parser.EatToEndOfStatement();
return true;
}
From dpatel at apple.com Mon Feb 7 15:58:52 2011
From: dpatel at apple.com (Devang Patel)
Date: Mon, 07 Feb 2011 21:58:52 -0000
Subject: [llvm-commits] [llvm] r125054 -
/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Message-ID: <20110207215852.3EA0E2A6C12C@llvm.org>
Author: dpatel
Date: Mon Feb 7 15:58:52 2011
New Revision: 125054
URL: http://llvm.org/viewvc/llvm-project?rev=125054&view=rev
Log:
Remove comment about an argument that was removed couple of years ago.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=125054&r1=125053&r2=125054&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Feb 7 15:58:52 2011
@@ -3879,7 +3879,6 @@
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
-/// Allowed to return something different (and simpler) if Simplify is true.
SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
DebugLoc dl) {
if (NumOps == 1)
From bruno.cardoso at gmail.com Mon Feb 7 16:09:15 2011
From: bruno.cardoso at gmail.com (Bruno Cardoso Lopes)
Date: Mon, 07 Feb 2011 22:09:15 -0000
Subject: [llvm-commits] [llvm] r125055 - in /llvm/trunk:
lib/Target/ARM/ARMInstrFormats.td lib/Target/ARM/ARMInstrInfo.td
lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/arm_instructions.s
test/MC/ARM/thumb2.s
Message-ID: <20110207220915.D4BF12A6C12C@llvm.org>
Author: bruno
Date: Mon Feb 7 16:09:15 2011
New Revision: 125055
URL: http://llvm.org/viewvc/llvm-project?rev=125055&view=rev
Log:
Add support for parsing dmb/dsb instructions
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
llvm/trunk/test/MC/ARM/arm_instructions.s
llvm/trunk/test/MC/ARM/thumb2.s
Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=125055&r1=125054&r2=125055&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Mon Feb 7 16:09:15 2011
@@ -149,6 +149,12 @@
let SuperClasses = [];
}
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let SuperClasses = [];
+ let ParserMethod = "ParseMemBarrierOptOperand";
+}
+
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
def pred : PredicateOperand {
let PrintMethod = "printMemBOption";
+ let ParserMatchClass = MemBarrierOptOperand;
}
// memory barriers protect the atomic sequences
Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=125055&r1=125054&r2=125055&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Feb 7 16:09:15 2011
@@ -58,6 +58,7 @@
bool ParseCoprocNumOperand(SmallVectorImpl&);
bool ParseCoprocRegOperand(SmallVectorImpl&);
bool ParseRegisterList(SmallVectorImpl &);
+ bool ParseMemBarrierOptOperand(SmallVectorImpl &);
bool ParseMemory(SmallVectorImpl &);
bool ParseOperand(SmallVectorImpl &, StringRef Mnemonic);
bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
@@ -119,6 +120,7 @@
CoprocNum,
CoprocReg,
Immediate,
+ MemBarrierOpt,
Memory,
Register,
RegisterList,
@@ -136,6 +138,10 @@
} CC;
struct {
+ ARM_MB::MemBOpt Val;
+ } MBOpt;
+
+ struct {
unsigned Val;
} Cop;
@@ -199,6 +205,9 @@
case Immediate:
Imm = o.Imm;
break;
+ case MemBarrierOpt:
+ MBOpt = o.MBOpt;
+ break;
case Memory:
Mem = o.Mem;
break;
@@ -241,6 +250,11 @@
return Imm.Val;
}
+ ARM_MB::MemBOpt getMemBarrierOpt() const {
+ assert(Kind == MemBarrierOpt && "Invalid access!");
+ return MBOpt.Val;
+ }
+
/// @name Memory Operand Accessors
/// @{
@@ -285,6 +299,7 @@
bool isDPRRegList() const { return Kind == DPRRegisterList; }
bool isSPRRegList() const { return Kind == SPRRegisterList; }
bool isToken() const { return Kind == Token; }
+ bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
bool isMemory() const { return Kind == Memory; }
bool isMemMode5() const {
if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
@@ -373,6 +388,11 @@
addExpr(Inst, getImm());
}
+ void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+ }
+
void addMemMode5Operands(MCInst &Inst, unsigned N) const {
assert(N == 2 && isMemMode5() && "Invalid number of operands!");
@@ -524,6 +544,14 @@
Op->EndLoc = E;
return Op;
}
+
+ static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+ Op->MBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
};
} // end anonymous namespace.
@@ -545,6 +573,9 @@
case Immediate:
getImm()->print(OS);
break;
+ case MemBarrierOpt:
+ OS << "";
+ break;
case Memory:
OS << " &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef OptStr = Tok.getString();
+
+ unsigned Opt = StringSwitch(OptStr.slice(0, OptStr.size()))
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("ish", ARM_MB::ISH)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
+
+ if (Opt == ~0U)
+ return true;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+ return false;
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
///
Modified: llvm/trunk/test/MC/ARM/arm_instructions.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/arm_instructions.s?rev=125055&r1=125054&r2=125055&view=diff
==============================================================================
--- llvm/trunk/test/MC/ARM/arm_instructions.s (original)
+++ llvm/trunk/test/MC/ARM/arm_instructions.s Mon Feb 7 16:09:15 2011
@@ -188,3 +188,52 @@
@ CHECK: nop @ encoding: [0x00,0xf0,0x20,0xe3]
nop
+
+@ CHECK: dmb sy @ encoding: [0x5f,0xf0,0x7f,0xf5]
+ dmb sy
+
+@ CHECK: dmb st @ encoding: [0x5e,0xf0,0x7f,0xf5]
+ dmb st
+
+@ CHECK: dmb ish @ encoding: [0x5b,0xf0,0x7f,0xf5]
+ dmb ish
+
+@ CHECK: dmb ishst @ encoding: [0x5a,0xf0,0x7f,0xf5]
+ dmb ishst
+
+@ CHECK: dmb nsh @ encoding: [0x57,0xf0,0x7f,0xf5]
+ dmb nsh
+
+@ CHECK: dmb nshst @ encoding: [0x56,0xf0,0x7f,0xf5]
+ dmb nshst
+
+@ CHECK: dmb osh @ encoding: [0x53,0xf0,0x7f,0xf5]
+ dmb osh
+
+@ CHECK: dmb oshst @ encoding: [0x52,0xf0,0x7f,0xf5]
+ dmb oshst
+
+@ CHECK: dsb sy @ encoding: [0x4f,0xf0,0x7f,0xf5]
+ dsb sy
+
+@ CHECK: dsb st @ encoding: [0x4e,0xf0,0x7f,0xf5]
+ dsb st
+
+@ CHECK: dsb ish @ encoding: [0x4b,0xf0,0x7f,0xf5]
+ dsb ish
+
+@ CHECK: dsb ishst @ encoding: [0x4a,0xf0,0x7f,0xf5]
+ dsb ishst
+
+@ CHECK: dsb nsh @ encoding: [0x47,0xf0,0x7f,0xf5]
+ dsb nsh
+
+@ CHECK: dsb nshst @ encoding: [0x46,0xf0,0x7f,0xf5]
+ dsb nshst
+
+@ CHECK: dsb osh @ encoding: [0x43,0xf0,0x7f,0xf5]
+ dsb osh
+
+@ CHECK: dsb oshst @ encoding: [0x42,0xf0,0x7f,0xf5]
+ dsb oshst
+
Modified: llvm/trunk/test/MC/ARM/thumb2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/thumb2.s?rev=125055&r1=125054&r2=125055&view=diff
==============================================================================
--- llvm/trunk/test/MC/ARM/thumb2.s (original)
+++ llvm/trunk/test/MC/ARM/thumb2.s Mon Feb 7 16:09:15 2011
@@ -92,15 +92,6 @@
@ CHECK: pkhtb r0, r0, r1, asr #22 @ encoding: [0xa1,0x50,0xc0,0xea]
pkhtb r0, r0, r1, asr #22
-@ CHECK: dmb st @ encoding: [0x5e,0x8f,0xbf,0xf3]
- dmb st
-@ CHECK: dmb sy @ encoding: [0x5f,0x8f,0xbf,0xf3]
- dmb sy
-@ CHECK: dmb ishst @ encoding: [0x5a,0x8f,0xbf,0xf3]
- dmb ishst
-@ CHECK: dmb ish @ encoding: [0x5b,0x8f,0xbf,0xf3]
- dmb ish
-
@ CHECK: str.w r0, [r1, #4092] @ encoding: [0xfc,0x0f,0xc1,0xf8]
str.w r0, [r1, #4092]
@ CHECK: str r0, [r1, #-128] @ encoding: [0x80,0x0c,0x41,0xf8]
@@ -227,3 +218,37 @@
@ CHECK: wfi.w @ encoding: [0xaf,0xf3,0x03,0x80]
wfi.w
+@ CHECK: dmb sy @ encoding: [0xbf,0xf3,0x5f,0x8f]
+ dmb sy
+@ CHECK: dmb st @ encoding: [0xbf,0xf3,0x5e,0x8f]
+ dmb st
+@ CHECK: dmb ish @ encoding: [0xbf,0xf3,0x5b,0x8f]
+ dmb ish
+@ CHECK: dmb ishst @ encoding: [0xbf,0xf3,0x5a,0x8f]
+ dmb ishst
+@ CHECK: dmb nsh @ encoding: [0xbf,0xf3,0x57,0x8f]
+ dmb nsh
+@ CHECK: dmb nshst @ encoding: [0xbf,0xf3,0x56,0x8f]
+ dmb nshst
+@ CHECK: dmb osh @ encoding: [0xbf,0xf3,0x53,0x8f]
+ dmb osh
+@ CHECK: dmb oshst @ encoding: [0xbf,0xf3,0x52,0x8f]
+ dmb oshst
+
+@ CHECK: dsb sy @ encoding: [0xbf,0xf3,0x4f,0x8f]
+ dsb sy
+@ CHECK: dsb st @ encoding: [0xbf,0xf3,0x4e,0x8f]
+ dsb st
+@ CHECK: dsb ish @ encoding: [0xbf,0xf3,0x4b,0x8f]
+ dsb ish
+@ CHECK: dsb ishst @ encoding: [0xbf,0xf3,0x4a,0x8f]
+ dsb ishst
+@ CHECK: dsb nsh @ encoding: [0xbf,0xf3,0x47,0x8f]
+ dsb nsh
+@ CHECK: dsb nshst @ encoding: [0xbf,0xf3,0x46,0x8f]
+ dsb nshst
+@ CHECK: dsb osh @ encoding: [0xbf,0xf3,0x43,0x8f]
+ dsb osh
+@ CHECK: dsb oshst @ encoding: [0xbf,0xf3,0x42,0x8f]
+ dsb oshst
+
From benny.kra at googlemail.com Mon Feb 7 16:37:28 2011
From: benny.kra at googlemail.com (Benjamin Kramer)
Date: Mon, 07 Feb 2011 22:37:28 -0000
Subject: [llvm-commits] [llvm] r125056 - in /llvm/trunk:
lib/Transforms/Utils/SimplifyCFG.cpp
test/Transforms/SimplifyCFG/switch-to-icmp.ll
test/Transforms/SimplifyCFG/switch_create.ll
test/Transforms/SimplifyCFG/switch_formation.dbg.ll
Message-ID: <20110207223728.75F4E2A6C12C@llvm.org>
Author: d0k
Date: Mon Feb 7 16:37:28 2011
New Revision: 125056
URL: http://llvm.org/viewvc/llvm-project?rev=125056&view=rev
Log:
SimplifyCFG: Track the number of used icmps when turning a icmp chain into a switch. If we used only one icmp, don't turn it into a switch.
Also prevent the switch-to-icmp transform from creating identity adds, noticed by Marius Wachtler.
Modified:
llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/trunk/test/Transforms/SimplifyCFG/switch-to-icmp.ll
llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll
llvm/trunk/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=125056&r1=125055&r2=125056&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Mon Feb 7 16:37:28 2011
@@ -305,7 +305,7 @@
/// Values vector.
static Value *
GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra,
- const TargetData *TD, bool isEQ) {
+ const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast(V);
if (I == 0) return 0;
@@ -313,6 +313,7 @@
if (ICmpInst *ICI = dyn_cast(I)) {
if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ UsedICmps++;
Vals.push_back(C);
return I->getOperand(0);
}
@@ -335,6 +336,7 @@
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+ UsedICmps++;
return I->getOperand(0);
}
return 0;
@@ -345,14 +347,17 @@
return 0;
unsigned NumValsBeforeLHS = Vals.size();
+ unsigned UsedICmpsBeforeLHS = UsedICmps;
if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
- isEQ)) {
+ isEQ, UsedICmps)) {
unsigned NumVals = Vals.size();
+ unsigned UsedICmpsBeforeRHS = UsedICmps;
if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
- isEQ)) {
+ isEQ, UsedICmps)) {
if (LHS == RHS)
return LHS;
Vals.resize(NumVals);
+ UsedICmps = UsedICmpsBeforeRHS;
}
// The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
@@ -363,6 +368,7 @@
}
Vals.resize(NumValsBeforeLHS);
+ UsedICmps = UsedICmpsBeforeLHS;
return 0;
}
@@ -372,7 +378,7 @@
Value *OldExtra = Extra;
Extra = I->getOperand(0);
if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
- isEQ))
+ isEQ, UsedICmps))
return RHS;
assert(Vals.size() == NumValsBeforeLHS);
Extra = OldExtra;
@@ -1926,17 +1932,24 @@
std::vector Values;
bool TrueWhenEqual = true;
Value *ExtraCase = 0;
+ unsigned UsedICmps = 0;
if (Cond->getOpcode() == Instruction::Or) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true);
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ UsedICmps);
} else if (Cond->getOpcode() == Instruction::And) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false);
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ UsedICmps);
TrueWhenEqual = false;
}
// If we didn't have a multiply compared value, fail.
if (CompVal == 0) return false;
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
+
// There might be duplicate constants in the list, which the switch
// instruction can't handle, remove them now.
array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
@@ -2262,7 +2275,9 @@
Constant *Offset = ConstantExpr::getNeg(Cases.back());
Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
- Value *Sub = BinaryOperator::CreateAdd(SI->getCondition(), Offset, "off", SI);
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = BinaryOperator::CreateAdd(Sub, Offset, Sub->getName()+".off", SI);
Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch");
BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI);
Modified: llvm/trunk/test/Transforms/SimplifyCFG/switch-to-icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/switch-to-icmp.ll?rev=125056&r1=125055&r2=125056&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/switch-to-icmp.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/switch-to-icmp.ll Mon Feb 7 16:37:28 2011
@@ -16,8 +16,8 @@
ret i1 %0
; CHECK: @test1
-; CHECK: %off = add i32 %x, -1
-; CHECK: %switch = icmp ult i32 %off, 3
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
}
define zeroext i1 @test2(i32 %x) nounwind readnone ssp noredzone {
@@ -35,6 +35,5 @@
ret i1 %0
; CHECK: @test2
-; CHECK: %off = add i32 %x, 0
-; CHECK: %switch = icmp ult i32 %off, 2
+; CHECK: %switch = icmp ult i32 %x, 2
}
Modified: llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll?rev=125056&r1=125055&r2=125056&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll Mon Feb 7 16:37:28 2011
@@ -141,8 +141,8 @@
ret i1 %UnifiedRetVal
; CHECK: @test6
-; CHECK: %off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %off, 6
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
}
define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
@@ -441,8 +441,8 @@
define zeroext i1 @test16(i32 %x) nounwind {
entry:
; CHECK: @test16
-; CHECK: %off = add i32 %x, -1
-; CHECK: %switch = icmp ult i32 %off, 3
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
%cmp.i = icmp eq i32 %x, 1
br i1 %cmp.i, label %lor.end, label %lor.lhs.false
@@ -458,3 +458,24 @@
%0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
ret i1 %0
}
+
+; Check that we don't turn an icmp into a switch where it's not useful.
+define void @test17(i32 %x, i32 %y) {
+ %cmp = icmp ult i32 %x, 3
+ %switch = icmp ult i32 %y, 2
+ %or.cond775 = or i1 %cmp, %switch
+ br i1 %or.cond775, label %lor.lhs.false8, label %return
+
+lor.lhs.false8:
+ tail call void @foo1()
+ ret void
+
+return:
+ ret void
+
+; CHECK: @test17
+; CHECK-NOT: switch.early.test
+; CHECK-NOT: switch i32
+; CHECK: ret void
+}
+
Modified: llvm/trunk/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/switch_formation.dbg.ll?rev=125056&r1=125055&r2=125056&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/switch_formation.dbg.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/switch_formation.dbg.ll Mon Feb 7 16:37:28 2011
@@ -14,8 +14,8 @@
define i1 @t({ i32, i32 }* %I) {
; CHECK: @t
-; CHECK: %off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %off, 6
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
entry:
%tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1 ; [#uses=1]
%tmp.2.i = load i32* %tmp.1.i ; [#uses=6]
From stoklund at 2pi.dk Mon Feb 7 18:03:05 2011
From: stoklund at 2pi.dk (Jakob Stoklund Olesen)
Date: Tue, 08 Feb 2011 00:03:05 -0000
Subject: [llvm-commits] [llvm] r125058 - in /llvm/trunk:
include/llvm/CodeGen/LiveIntervalAnalysis.h
lib/CodeGen/LiveIntervalAnalysis.cpp
lib/CodeGen/SimpleRegisterCoalescing.cpp
lib/CodeGen/SimpleRegisterCoalescing.h
Message-ID: <20110208000305.B39962A6C12C@llvm.org>
Author: stoklund
Date: Mon Feb 7 18:03:05 2011
New Revision: 125058
URL: http://llvm.org/viewvc/llvm-project?rev=125058&view=rev
Log:
Add LiveIntervals::shrinkToUses().
After uses of a live range are removed, recompute the live range to only cover
the remaining uses. This is necessary after rematerializing the value before
some (but not all) uses.
Modified:
llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h
Modified: llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h?rev=125058&r1=125057&r2=125058&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h Mon Feb 7 18:03:05 2011
@@ -163,6 +163,12 @@
LiveRange addLiveRangeToEndOfBlock(unsigned reg,
MachineInstr* startInst);
+ /// shrinkToUses - After removing some uses of a register, shrink its live
+ /// range to just the remaining uses. This method does not compute reaching
+ /// defs for new uses, and it doesn't remove dead defs.
+ /// Dead PHIDef values are marked as unused.
+ void shrinkToUses(LiveInterval *li);
+
// Interval removal
void removeInterval(unsigned Reg) {
Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=125058&r1=125057&r2=125058&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Mon Feb 7 18:03:05 2011
@@ -742,6 +742,128 @@
return NewLI;
}
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+void LiveIntervals::shrinkToUses(LiveInterval *li) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can't only shrink physical registers");
+ // Find all the values used, including PHI kills.
+ SmallVector, 16> WorkList;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+ VNInfo *VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Live interval not live into reading instruction");
+ if (VNI->def == Idx) {
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ Idx = Idx.getPrevSlot();
+ VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Early-clobber tied value not available");
+ }
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live interval with only minimal live segments per def.
+ LiveInterval NewLI(li->reg, 0);
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+ }
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+
+ // Extend the live range for VNI to be live at Idx.
+ LiveInterval::iterator I = NewLI.find(Idx);
+
+ // Already got it?
+ if (I != NewLI.end() && I->start <= Idx) {
+ assert(I->valno == VNI && "Unexpected existing value number");
+ continue;
+ }
+
+ // Is there already a live range in the block containing Idx?
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+ SlotIndex BlockStart = getMBBStartIdx(MBB);
+ DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx
+ << " in BB#" << MBB->getNumber() << '@' << BlockStart);
+ if (I != NewLI.begin() && (--I)->end > BlockStart) {
+ assert(I->valno == VNI && "Wrong reaching def");
+ DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n");
+ // Is this the first use of a PHIDef in its defining block?
+ if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) {
+ // The PHI is live, make sure the predecessors are live-out.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ VNInfo *PVNI = li->getVNInfoAt(Stop);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (PVNI) {
+ assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag");
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ }
+ }
+
+ // Extend the live range in the block to include Idx.
+ NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI));
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+
+ // Handle dead values.
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+ assert(LII != NewLI.end() && "Missing live range for PHI");
+ if (LII->end != VNI->def.getNextSlot())
+ continue;
+ if (!VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->setIsUnused(true);
+ NewLI.removeRange(*LII);
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(VNI->def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(li->reg, tri_);
+ }
+ }
+
+ // Move the trimmed ranges back.
+ li->ranges.swap(NewLI.ranges);
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+}
+
+
//===----------------------------------------------------------------------===//
// Register allocator hooks.
//
Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=125058&r1=125057&r2=125058&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Mon Feb 7 18:03:05 2011
@@ -587,6 +587,7 @@
/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ bool preserveSrcInt,
unsigned DstReg,
unsigned DstSubIdx,
MachineInstr *CopyMI) {
@@ -642,30 +643,12 @@
RemoveCopyFlag(DstReg, CopyMI);
- // If copy kills the source register, find the last use and propagate
- // kill.
- bool checkForDeadDef = false;
MachineBasicBlock *MBB = CopyMI->getParent();
- if (SrcLR->end == CopyIdx.getDefIndex())
- if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
- checkForDeadDef = true;
- }
-
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
MachineInstr *NewMI = prior(MII);
- if (checkForDeadDef) {
- // PR4090 fix: Trim interval failed because there was no use of the
- // source interval in this MBB. If the def is in this MBB too then we
- // should mark it dead:
- if (DefMI->getParent() == MBB) {
- DefMI->addRegisterDead(SrcInt.reg, tri_);
- SrcLR->end = SrcLR->start.getNextSlot();
- }
- }
-
// CopyMI may have implicit operands, transfer them over to the newly
// rematerialized instruction. And update implicit def interval valnos.
for (unsigned i = CopyMI->getDesc().getNumOperands(),
@@ -684,6 +667,11 @@
ReMatDefs.insert(DefMI);
DEBUG(dbgs() << "Remat: " << *NewMI);
++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ if (preserveSrcInt)
+ li_->shrinkToUses(&SrcInt);
+
return true;
}
@@ -714,7 +702,7 @@
UseMI->getOperand(0).getReg() != SrcReg &&
UseMI->getOperand(0).getReg() != DstReg &&
!JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg),
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
UseMI->getOperand(0).getReg(), 0, UseMI))
continue;
}
@@ -1056,7 +1044,7 @@
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
+ ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI))
return true;
++numAborts;
@@ -1076,7 +1064,7 @@
// If definition of source is defined by trivial computation, try
// rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()),
+ ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
CP.getDstReg(), 0, CopyMI))
return true;
Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h?rev=125058&r1=125057&r2=125058&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h Mon Feb 7 18:03:05 2011
@@ -143,8 +143,10 @@
/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
- bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
- unsigned DstSubIdx, MachineInstr *CopyMI);
+ /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+ unsigned DstReg, unsigned DstSubIdx,
+ MachineInstr *CopyMI);
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
From aggarwa4 at illinois.edu Mon Feb 7 18:21:44 2011
From: aggarwa4 at illinois.edu (Arushi Aggarwal)
Date: Tue, 08 Feb 2011 00:21:44 -0000
Subject: [llvm-commits] [poolalloc] r125060 -
/poolalloc/trunk/lib/DSA/StdLibPass.cpp
Message-ID: <20110208002144.7A0592A6C12C@llvm.org>
Author: aggarwa4
Date: Mon Feb 7 18:21:44 2011
New Revision: 125060
URL: http://llvm.org/viewvc/llvm-project?rev=125060&view=rev
Log:
Add support for a bunch of functions not handled
earlier.
Modified:
poolalloc/trunk/lib/DSA/StdLibPass.cpp
Modified: poolalloc/trunk/lib/DSA/StdLibPass.cpp
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/DSA/StdLibPass.cpp?rev=125060&r1=125059&r2=125060&view=diff
==============================================================================
--- poolalloc/trunk/lib/DSA/StdLibPass.cpp (original)
+++ poolalloc/trunk/lib/DSA/StdLibPass.cpp Mon Feb 7 18:21:44 2011
@@ -85,18 +85,27 @@
{"fstat", {NRET_YNARGS, NRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
{"lstat", {NRET_YNARGS, NRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
{"read", {NRET_YARGS, YRET_YARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"time", {NRET_YARGS, YRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
// printf not strictly true, %n could cause a write
- {"printf", {NRET_YARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
- {"fprintf", {NRET_YARGS, NRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
- {"sprintf", {NRET_YARGS, NRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
- {"snprintf", {NRET_YARGS, NRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
- {"puts", {NRET_YARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
- {"putc", {NRET_NARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
- {"putchar", {NRET_NARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
- {"fputs", {NRET_YARGS, NRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
- {"fputc", {NRET_YARGS, NRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
-
+ {"printf", {NRET_YARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"sscanf", {NRET_YARGS, YRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"scanf", {NRET_YARGS, YRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"fscanf", {NRET_YARGS, YRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"fprintf", {NRET_YARGS, NRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"sprintf", {NRET_YARGS, NRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"snprintf", {NRET_YARGS, NRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"puts", {NRET_YARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"gets", {NRET_NARGS, YRET_YARGS, NRET_NARGS, YRET_YNARGS, false}},
+ {"fgets", {NRET_NYARGS, YRET_YNARGS, NRET_NARGS, YRET_YNARGS, false}},
+ {"getc", {NRET_YNARGS, YRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"fgetc", {NRET_YNARGS, YRET_YNARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"_IO_getc", {NRET_NARGS, YRET_YARGS, NRET_NARGS, YRET_YNARGS, false}},
+ {"putc", {NRET_NARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"putchar", {NRET_NARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"fputs", {NRET_YARGS, NRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"fputc", {NRET_YARGS, NRET_NYARGS, NRET_NARGS, NRET_NARGS, false}},
+ {"feof", {NRET_YARGS, NRET_NARGS, NRET_NARGS, NRET_NARGS, false}},
{"calloc", {NRET_NARGS, YRET_NARGS, YRET_NARGS, NRET_NARGS, false}},
{"malloc", {NRET_NARGS, YRET_NARGS, YRET_NARGS, NRET_NARGS, false}},
@@ -187,10 +196,8 @@
{"chdir", {false, false, false, true, false, false, false, false, false}},
{"mkdir", {false, false, false, true, false, false, false, false, false}},
{"rmdir", {false, false, false, true, false, false, false, false, false}},
- {"read", {false, false, false, false, true, false, false, false, false}},
{"pipe", {false, false, false, false, true, false, false, false, false}},
{"wait", {false, false, false, false, true, false, false, false, false}},
- {"time", {false, false, false, false, true, false, false, false, false}},
{"getrusage", {false, false, false, false, true, false, false, false, false}},
{"bcopy", {false, false, false, true, true, false, true, false, true}},
{"getcwd", { true, true, true, true, true, true, false, true, true}},
From aggarwa4 at illinois.edu Mon Feb 7 18:30:14 2011
From: aggarwa4 at illinois.edu (Arushi Aggarwal)
Date: Tue, 08 Feb 2011 00:30:14 -0000
Subject: [llvm-commits] [poolalloc] r125063 - in /poolalloc/trunk:
lib/DSA/DSGraph.cpp test/pa/clone/computeNodeMappingFail.ll
Message-ID: <20110208003014.83DF02A6C12C@llvm.org>
Author: aggarwa4
Date: Mon Feb 7 18:30:14 2011
New Revision: 125063
URL: http://llvm.org/viewvc/llvm-project?rev=125063&view=rev
Log:
reduced test case, that was triggering the assert in DSGraph.cpp.
Removed the assert, and added an if condition to handle the
example correctly.
Added:
poolalloc/trunk/test/pa/clone/computeNodeMappingFail.ll
Modified:
poolalloc/trunk/lib/DSA/DSGraph.cpp
Modified: poolalloc/trunk/lib/DSA/DSGraph.cpp
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/DSA/DSGraph.cpp?rev=125063&r1=125062&r2=125063&view=diff
==============================================================================
--- poolalloc/trunk/lib/DSA/DSGraph.cpp (original)
+++ poolalloc/trunk/lib/DSA/DSGraph.cpp Mon Feb 7 18:30:14 2011
@@ -1364,11 +1364,12 @@
// Modify the entry in the node map so that the DSNode from the first
// DSNodeHandle is mapped to the second DSNodeHandle.
//
- assert(((signed int)(NH2.getOffset()-NH1.getOffset())>=0) && " Underflow error ");
+ // FIXME: AA:I am not sure what the right mapping for the
+ // following case is. I believe we do not need to create any
+ // new mapping.
+ //assert(((signed int)(NH2.getOffset()-NH1.getOffset())>=0) && " Underflow error ");
if(NH2.getOffset() >= NH1.getOffset()) {
Entry.setTo(N2, NH2.getOffset()-NH1.getOffset());
- } else {
- Entry.setTo(N2, NH1.getOffset());
}
//
Added: poolalloc/trunk/test/pa/clone/computeNodeMappingFail.ll
URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/test/pa/clone/computeNodeMappingFail.ll?rev=125063&view=auto
==============================================================================
--- poolalloc/trunk/test/pa/clone/computeNodeMappingFail.ll (added)
+++ poolalloc/trunk/test/pa/clone/computeNodeMappingFail.ll Mon Feb 7 18:30:14 2011
@@ -0,0 +1,54 @@
+;This hits an assert in computeNodeMapping, when offset of merging node is greater than 1st node.
+;RUN: paopt %s -paheur-AllButUnreachableFromMemory -poolalloc -o %t.bc |& grep "Pool allocating.*nodes!"
+;RUN: llvm-dis %t.bc -o %t.ll
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.RefObj = type { i8*, %struct.TypeToken }
+%struct.TypeToken = type { i32, i16, i16 }
+
+define fastcc void @ImageTokenToRef(%struct.TypeToken* %Token, i32 %AttrNum, i32* %Status, i8** nocapture %RefObject) nounwind {
+entry:
+ %RefTkn = alloca %struct.TypeToken, align 8 ; <%struct.TypeToken*> [#uses=1]
+ br i1 undef, label %bb13, label %bb
+
+bb: ; preds = %entry
+ unreachable
+
+bb13: ; preds = %entry
+ br i1 undef, label %bb14, label %bb33
+
+bb14: ; preds = %bb13
+ br i1 undef, label %bb15, label %bb21
+
+bb15: ; preds = %bb14
+ br i1 undef, label %bb17, label %KernelGetAttr.exit
+
+KernelGetAttr.exit: ; preds = %bb15
+ unreachable
+
+bb17: ; preds = %bb15
+ %tmp62 = call fastcc i32 @ImageGetObject(%struct.TypeToken* %RefTkn, i32* %Status, i8** %RefObject) nounwind ; [#uses=0]
+ unreachable
+
+bb21: ; preds = %bb14
+ br i1 undef, label %bb23, label %KernelGetAttr.exit42
+
+KernelGetAttr.exit42: ; preds = %bb21
+ unreachable
+
+bb23: ; preds = %bb21
+ %tmp72 = getelementptr inbounds %struct.RefObj* undef, i64 0, i32 1 ; <%struct.TypeToken*> [#uses=1]
+ %tmp85 = call fastcc i32 @ImageGetObject(%struct.TypeToken* %tmp72, i32* %Status, i8** %RefObject) nounwind ; [#uses=0]
+ unreachable
+
+bb33: ; preds = %bb13
+ ret void
+}
+
+define fastcc i32 @ImageGetObject(%struct.TypeToken* %Token, i32* %Status, i8** nocapture %This) nounwind {
+entry:
+ unreachable
+}
From geek4civic at gmail.com Mon Feb 7 18:52:24 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 09:52:24 +0900
Subject: [llvm-commits] _WIN32_WINNT as predefined (was Re: [Review
request][Win64] Patches for Mingw-w64(and mingw64-clang))
In-Reply-To:
References:
Message-ID:
Good morning, Anton and Eric.
2011/2/7 Anton Korobeynikov :
>> In the case of mingw-w64, _WIN32_WINNT would be defined (as 0x0502) by
>> system headers even if we did not provide predefined _WIN32_WINNT. In
>> reverse, predefined _WIN32_WINNT might affect to system headers on
>> mingw-w64.
> If system headers have all necessary stuff defined why should be
> bother defining them?
Nope. It would be easier to suppress warnings, simply to undefine and
redefine _WIN32_WINNT in Support/Windows/Windows.h.
My aim is described below;
> Could you please clarify what is the problem you're trying to solve
> here? It might allow everyone better understand the needs, etc.
My aim was to work with AC_CHECK_DECL, (not AC_CHECK_LIB).
1) It needs AC_DEFINE(s) to find some decls. (_WIN32_WINNT,
MINGW_HAS_SECURE_API, &c)
2) AC_DEFINE(s) are written out to config.h.in.
3) I noticed config.h.in could provide those definitions.
4) I could remove those definitions from Windows/Windows.h.
It would be overwork, I thought at last night. I will attempt to
rework for easier tweaks later.
Thank you, ...Takumi
From gohman at apple.com Mon Feb 7 18:55:13 2011
From: gohman at apple.com (Dan Gohman)
Date: Tue, 08 Feb 2011 00:55:13 -0000
Subject: [llvm-commits] [llvm] r125065 -
/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Message-ID: <20110208005513.B0D9F2A6C12C@llvm.org>
Author: djg
Date: Mon Feb 7 18:55:13 2011
New Revision: 125065
URL: http://llvm.org/viewvc/llvm-project?rev=125065&view=rev
Log:
Don't split any loop backedges, including backedges of loops other than
the active loop. This is generally desirable, and it avoids trouble
in situations such as the testcase in PR9123, though the failure
mode depends on use-list order, so it is infeasible to test.
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=125065&r1=125064&r2=125065&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Feb 7 18:55:13 2011
@@ -3544,21 +3544,23 @@
// is the canonical backedge for this loop, which complicates post-inc
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
- !isa(BB->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(BB))) {
- // Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
+ !isa(BB->getTerminator())) {
+ Loop *PNLoop = LI.getLoopFor(PN->getParent());
+ if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
}
std::pair::iterator, bool> Pair =
From evan.cheng at apple.com Mon Feb 7 21:07:03 2011
From: evan.cheng at apple.com (Evan Cheng)
Date: Tue, 08 Feb 2011 03:07:03 -0000
Subject: [llvm-commits] [llvm] r125074 - in /llvm/trunk/lib/Target/ARM:
ARMConstantIslandPass.cpp Thumb2SizeReduction.cpp
Message-ID: <20110208030703.86AD02A6C12D@llvm.org>
Author: evancheng
Date: Mon Feb 7 21:07:03 2011
New Revision: 125074
URL: http://llvm.org/viewvc/llvm-project?rev=125074&view=rev
Log:
Temporary workaround for a bad bug introduced by r121082 which replaced
t2LDRpci with t2LDRi12.
There are a couple of problems with this.
1. The encoding for the literal and immediate constant are different.
Note bit 7 of the literal case is 'U' so it can be negative.
2. t2LDRi12 is now narrowed to tLDRpci before constant island pass is run.
So we end up never using the Thumb2 instruction, which ends up creating a
lot more constant islands.
Modified:
llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp
llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp?rev=125074&r1=125073&r2=125074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMConstantIslandPass.cpp Mon Feb 7 21:07:03 2011
@@ -1576,6 +1576,16 @@
Scale = 4;
}
break;
+ case ARM::t2LDRi12:
+ // FIXME: Temporary workaround for a bug introduced by r121082.
+ // We should use t2LDRpci for loads from constantpools.
+ if (isARMLowRegister(U.MI->getOperand(0).getReg()) &&
+ U.MI->getOperand(1).getReg() == ARM::PC) {
+ NewOpc = ARM::tLDRpci;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
}
if (!NewOpc)
@@ -1586,6 +1596,10 @@
// FIXME: Check if offset is multiple of scale if scale is not 4.
if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
U.MI->setDesc(TII->get(NewOpc));
+ if (NewOpc == ARM::tLDRpci)
+ // FIXME: Temporary workaround.
+ // PC is now an implicit operand.
+ U.MI->RemoveOperand(1);
MachineBasicBlock *MBB = U.MI->getParent();
BBSizes[MBB->getNumber()] -= 2;
AdjustBBOffsetsAfter(MBB, -2);
Modified: llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp?rev=125074&r1=125073&r2=125074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp Mon Feb 7 21:07:03 2011
@@ -294,14 +294,11 @@
HasImmOffset = true;
HasOffReg = false;
} else {
- if (Entry.WideOpc == ARM::t2LDRi12) {
- Opc = ARM::tLDRpci;
- OpNum = 2;
- }
-
- HasImmOffset = false;
- HasBaseReg = false;
- HasOffReg = false;
+ // FIXME: Temporary workaround for a bug introduced by r121082.
+ // We should use t2LDRpci for loads from constantpools.
+ // We don't want to narrow this to tLDRpci until constant island pass
+ // for fear of pessimizing code.
+ return false;
}
break;
case ARM::t2LDRBi12:
From geek4civic at gmail.com Mon Feb 7 22:02:25 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 13:02:25 +0900
Subject: [llvm-commits] [Review request][Win64] Patches for
Mingw-w64(and mingw64-clang)
In-Reply-To:
References:
<87zkqhggv2.fsf@wanadoo.es>
<5D4E2204-78B8-4011-91AF-17E5B2E5C79B@apple.com>
<87aaigh6mn.fsf@wanadoo.es>
<8762t4h5np.fsf@wanadoo.es>
Message-ID:
Hello.
I commited them below with Anton's approval, thank you.
> * 0002-lib-Target-X86-X86JITInfo.cpp-Add-Win64-stuff.patch.txt
> * 0009-Autoconf-may-check-symbols-in-libgcc.a-for-JIT-o.patch.txt
> * 0011-Windows-DynamicLibrary.inc-Split-explicit-symbol.patch.txt
I have reworked more simply with my patchest left.
I will post them individually (as possible), later.
...Takumi
From geek4civic at gmail.com Mon Feb 7 23:27:16 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 14:27:16 +0900
Subject: [llvm-commits] [Review request][Win64] Patches for Mingw-w64(and
mingw64-clang)
Message-ID:
Hello, everyone.
I reworked my patches simpler.
Please look into following patches and give me any comments, thank you.
...Takumi
---Part 1
https://github.com/chapuni/LLVM/commits/mingw64/20110208/
NAKAMURA Takumi (5):
Windows/Program.inc: Eliminate the declaration of
SetInformationJobObject(). It should be provided with
_WIN32_WINNT>=0x0500.
Windows/DynamicLibrary.inc: ELM_Callback fix
autoconf: Seek strerror_s() with AC_CHECK_DECLS.
Regenerate configure. (**** unsent ***)
lib/Support/Errno.cpp: Check strerror_s() with HAVE_DECL_STRERROR_S
int config.h.*.
autoconf/configure.ac | 5 ++-
cmake/config-ix.cmake | 2 +-
configure | 90 +++++++++++++++++++++++++++++++-
include/llvm/Config/config.h.cmake | 7 ++-
include/llvm/Config/config.h.in | 7 ++-
lib/Support/Errno.cpp | 2 +-
lib/Support/Windows/DynamicLibrary.inc | 12 ++++-
lib/Support/Windows/Program.inc | 9 ---
8 files changed, 113 insertions(+), 21 deletions(-)
---Part 2-1 (exclusive against part 2-2)
https://github.com/chapuni/LLVM/commits/mingw64/20110208-1
NAKAMURA Takumi (1):
Windows/Windows.h: Redefine _WIN32_WINNT here. mingw-w64 tends to
define it as 0x0502 in its headers.
lib/Support/Windows/Windows.h | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
---Part 2-2 (exclusive against part 2-1)
https://github.com/chapuni/LLVM/commits/mingw64/20110208-2
NAKAMURA Takumi (1):
lib/Support: Always include Windows/Windows.h!
lib/Support/DynamicLibrary.cpp | 4 ++++
lib/Support/Host.cpp | 4 ++++
lib/Support/Memory.cpp | 4 ++++
lib/Support/Mutex.cpp | 4 ++++
lib/Support/Path.cpp | 7 +++++++
lib/Support/PathV2.cpp | 4 ++++
lib/Support/Process.cpp | 4 ++++
lib/Support/Program.cpp | 4 ++++
lib/Support/RWMutex.cpp | 4 ++++
lib/Support/Signals.cpp | 4 ++++
lib/Support/ThreadLocal.cpp | 4 ++++
lib/Support/TimeValue.cpp | 4 ++++
lib/Support/Windows/Windows.h | 5 +++++
13 files changed, 56 insertions(+), 0 deletions(-)
From geek4civic at gmail.com Mon Feb 7 23:27:36 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 14:27:36 +0900
Subject: [llvm-commits] [PATCH 1/5] Windows/Program.inc: Eliminate the
declaration of SetInformationJobObject(). It should be provided with
_WIN32_WINNT>=0x0500.
Message-ID:
---
lib/Support/Windows/Program.inc | 9 ---------
1 files changed, 0 insertions(+), 9 deletions(-)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Windows-Program.inc-Eliminate-the-declaration-of.patch.txt
Type: text/x-patch
Size: 771 bytes
Desc: not available
Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20110208/2a868480/attachment.bin
From geek4civic at gmail.com Mon Feb 7 23:27:46 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 14:27:46 +0900
Subject: [llvm-commits] [PATCH 2/5] Windows/DynamicLibrary.inc: ELM_Callback
fix
Message-ID:
---
lib/Support/Windows/DynamicLibrary.inc | 12 +++++++++++-
1 files changed, 11 insertions(+), 1 deletions(-)
It has been unchanged since last one.
I wonder it would be better to detect header compatibility with
autoconf and cmake.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-Windows-DynamicLibrary.inc-ELM_Callback-fix.patch.txt
Type: text/x-patch
Size: 974 bytes
Desc: not available
Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20110208/34dee85e/attachment-0001.bin
From geek4civic at gmail.com Mon Feb 7 23:27:54 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 14:27:54 +0900
Subject: [llvm-commits] [PATCH 3/5] autoconf: Seek strerror_s() with
AC_CHECK_DECLS.
Message-ID:
AC_CHECK_FUNCS seeks a symbol only in libs. We should check decl in string.h.
With recent Mingw, *_s() stuff would be enabled by MINGW_HAS_SECURE_API.
---
autoconf/configure.ac | 5 ++++-
1 files changed, 4 insertions(+), 1 deletions(-)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0003-autoconf-Seek-strerror_s-with-AC_CHECK_DECLS.patch.txt
Type: text/x-patch
Size: 958 bytes
Desc: not available
Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20110208/a2428ec4/attachment.bin
From geek4civic at gmail.com Mon Feb 7 23:28:07 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 14:28:07 +0900
Subject: [llvm-commits] [PATCH 5/5] lib/Support/Errno.cpp: Check
strerror_s() with HAVE_DECL_STRERROR_S int config.h.*.
Message-ID:
FIXME: I have never seen mingw(s) have strerror_s() (not _strerror_s()).
---
cmake/config-ix.cmake | 2 +-
include/llvm/Config/config.h.cmake | 7 ++++---
include/llvm/Config/config.h.in | 7 ++++---
lib/Support/Errno.cpp | 2 +-
4 files changed, 10 insertions(+), 8 deletions(-)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0005-lib-Support-Errno.cpp-Check-strerror_s-with-HAVE.patch.txt
Type: text/x-patch
Size: 2892 bytes
Desc: not available
Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20110208/919a0d3a/attachment.bin
From geek4civic at gmail.com Mon Feb 7 23:28:20 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 14:28:20 +0900
Subject: [llvm-commits] [PATCH] Windows/Windows.h: Redefine _WIN32_WINNT
here. mingw-w64 tends to define it as 0x0502 in its headers.
Message-ID:
---
lib/Support/Windows/Windows.h | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
It is the simplest one. Ruben suggests it.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Windows-Windows.h-Redefine-_WIN32_WINNT-here.-mi.patch.txt
Type: text/x-patch
Size: 556 bytes
Desc: not available
Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20110208/c6760bfe/attachment.bin
From geek4civic at gmail.com Mon Feb 7 23:28:34 2011
From: geek4civic at gmail.com (NAKAMURA Takumi)
Date: Tue, 8 Feb 2011 14:28:34 +0900
Subject: [llvm-commits] [PATCH] lib/Support: Always include
Windows/Windows.h!
Message-ID: