From evan.cheng at apple.com Mon Feb 6 00:02:45 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 6 Feb 2006 00:02:45 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200602060602.AAA06807@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.42 -> 1.43 --- Log message: - Update load folding checks to match those auto-generated by tblgen. - Manually select SDOperand's returned by TryFoldLoad which make up the load address. --- Diffs of the changes: (+26 -11) X86ISelDAGToDAG.cpp | 37 ++++++++++++++++++++++++++----------- 1 files changed, 26 insertions(+), 11 deletions(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.42 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.43 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.42 Sun Feb 5 00:46:41 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Feb 6 00:02:33 2006 @@ -111,7 +111,8 @@ SDOperand &Index, SDOperand &Disp); bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); - bool TryFoldLoad(SDOperand N, SDOperand &Base, SDOperand &Scale, + bool TryFoldLoad(SDOperand P, SDOperand N, + SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base, @@ -381,11 +382,13 @@ return true; } -bool X86DAGToDAGISel::TryFoldLoad(SDOperand N, SDOperand &Base, - SDOperand &Scale, SDOperand &Index, - SDOperand &Disp) { - if (N.getOpcode() == ISD::LOAD && N.hasOneUse() && - CodeGenMap.count(N.getValue(1)) == 0) +bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N, + SDOperand &Base, SDOperand &Scale, + SDOperand &Index, SDOperand &Disp) { + if (N.getOpcode() == ISD::LOAD && + N.hasOneUse() && + !CodeGenMap.count(N.getValue(0)) && + (P.getNumOperands() == 1 || !isNonImmUse(P.Val, N.Val))) return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp); return false; } @@ -486,10 +489,10 @@ bool foldedLoad = false; SDOperand Tmp0, Tmp1, Tmp2, Tmp3; - foldedLoad = TryFoldLoad(N1, Tmp0, Tmp1, Tmp2, Tmp3); + foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); // MULHU and MULHS are commmutative if (!foldedLoad) { - foldedLoad = TryFoldLoad(N0, Tmp0, Tmp1, Tmp2, Tmp3); + foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); if (foldedLoad) { N0 = Node->getOperand(1); N1 = Node->getOperand(0); @@ -505,6 +508,10 @@ InFlag = Chain.getValue(1); if (foldedLoad) { + Tmp0 = Select(Tmp0); + Tmp1 = Select(Tmp1); + Tmp2 = Select(Tmp2); + Tmp3 = Select(Tmp3); Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag); InFlag = Chain.getValue(1); @@ -514,8 +521,10 @@ SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag); CodeGenMap[N.getValue(0)] = Result; - if (foldedLoad) + if (foldedLoad) { CodeGenMap[N1.getValue(1)] = Result.getValue(1); + AddHandleReplacement(N1.getValue(1), Result.getValue(1)); + } return Result; } @@ -566,7 +575,7 @@ bool foldedLoad = false; SDOperand Tmp0, Tmp1, Tmp2, Tmp3; - foldedLoad = TryFoldLoad(N1, Tmp0, Tmp1, Tmp2, Tmp3); + foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); SDOperand Chain = foldedLoad ? Select(N1.getOperand(0)) : CurDAG->getEntryNode(); @@ -589,6 +598,10 @@ } if (foldedLoad) { + Tmp0 = Select(Tmp0); + Tmp1 = Select(Tmp1); + Tmp2 = Select(Tmp2); + Tmp3 = Select(Tmp3); Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag); InFlag = Chain.getValue(1); @@ -599,8 +612,10 @@ SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, NVT, InFlag); CodeGenMap[N.getValue(0)] = Result; - if (foldedLoad) + if (foldedLoad) { CodeGenMap[N1.getValue(1)] = Result.getValue(1); + AddHandleReplacement(N1.getValue(1), Result.getValue(1)); + } return Result; } From evan.cheng at apple.com Mon Feb 6 00:03:47 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 6 Feb 2006 00:03:47 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/DAGISelEmitter.cpp Message-ID: <200602060603.AAA06827@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: DAGISelEmitter.cpp updated: 1.166 -> 1.167 --- Log message: Name change. --- Diffs of the changes: (+4 -4) DAGISelEmitter.cpp | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) Index: llvm/utils/TableGen/DAGISelEmitter.cpp diff -u llvm/utils/TableGen/DAGISelEmitter.cpp:1.166 llvm/utils/TableGen/DAGISelEmitter.cpp:1.167 --- llvm/utils/TableGen/DAGISelEmitter.cpp:1.166 Sun Feb 5 02:46:14 2006 +++ llvm/utils/TableGen/DAGISelEmitter.cpp Mon Feb 6 00:03:35 2006 @@ -2398,7 +2398,7 @@ emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + ")] = " + ChainName + ";"); if (DoReplace) - emitCode("if (N.ResNo == 0) AddReplacement(N.getValue(" + emitCode("if (N.ResNo == 0) AddHandleReplacement(N.getValue(" + utostr(ValNo) + "), " + ChainName + ");"); ValNo++; } @@ -2415,7 +2415,7 @@ std::string Code = FoldedChains[j].first + ".getValue(" + utostr(FoldedChains[j].second) + ")"; - emitCode("AddReplacement(" + Code + ", " + ChainName + ");"); + emitCode("AddHandleReplacement(" + Code + ", " + ChainName + ");"); } } @@ -3099,9 +3099,9 @@ OS << "}\n"; OS << "\n"; - OS << "// AddReplacement - Note the pending replacement node for a\n" + OS << "// AddHandleReplacement - Note the pending replacement node for a\n" << "// holder node in ReplaceMap.\n"; - OS << "void AddReplacement(SDOperand N, SDOperand R) {\n"; + OS << "void AddHandleReplacement(SDOperand N, SDOperand R) {\n"; OS << " std::map::iterator HMI = HolderMap.find(N);\n"; OS << " if (HMI != HolderMap.end()) {\n"; OS << " ReplaceMap[HMI->second] = R;\n"; From evan.cheng at apple.com Mon Feb 6 02:13:07 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 6 Feb 2006 02:13:07 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/DAGISelEmitter.cpp Message-ID: <200602060813.CAA09902@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: DAGISelEmitter.cpp updated: 1.167 -> 1.168 --- Log message: At the end of isel, select a replacement node for each handle that does not have one. This can happen if a load's real uses are dead (i.e. they do not have uses themselves). --- Diffs of the changes: (+24 -11) DAGISelEmitter.cpp | 35 ++++++++++++++++++++++++----------- 1 files changed, 24 insertions(+), 11 deletions(-) Index: llvm/utils/TableGen/DAGISelEmitter.cpp diff -u llvm/utils/TableGen/DAGISelEmitter.cpp:1.167 llvm/utils/TableGen/DAGISelEmitter.cpp:1.168 --- llvm/utils/TableGen/DAGISelEmitter.cpp:1.167 Mon Feb 6 00:03:35 2006 +++ llvm/utils/TableGen/DAGISelEmitter.cpp Mon Feb 6 02:12:55 2006 @@ -2856,7 +2856,7 @@ << "CurDAG->getNode(ISD::HANDLENODE, MVT::Other, N);\n" << " CodeGenMap[N.getValue(" << OpcodeInfo.getNumResults() << ")] = Dummy;\n" - << " HolderMap[N.getValue(" << OpcodeInfo.getNumResults() + << " HandleMap[N.getValue(" << OpcodeInfo.getNumResults() << ")] = Dummy;\n" << " return Dummy;\n" << " }\n"; @@ -3061,9 +3061,9 @@ << "std::map CodeGenMap;\n"; OS << "// Instance var to keep track of mapping of chain generating nodes\n" - << "// and their place holder nodes.\n"; - OS << "std::map HolderMap;\n"; - OS << "// Instance var to keep track of mapping of place holder nodes\n" + << "// and their place handle nodes.\n"; + OS << "std::map HandleMap;\n"; + OS << "// Instance var to keep track of mapping of place handle nodes\n" << "// and their replacement nodes.\n"; OS << "std::map ReplaceMap;\n"; @@ -3100,19 +3100,31 @@ OS << "\n"; OS << "// AddHandleReplacement - Note the pending replacement node for a\n" - << "// holder node in ReplaceMap.\n"; + << "// handle node in ReplaceMap.\n"; OS << "void AddHandleReplacement(SDOperand N, SDOperand R) {\n"; - OS << " std::map::iterator HMI = HolderMap.find(N);\n"; - OS << " if (HMI != HolderMap.end()) {\n"; + OS << " std::map::iterator HMI = HandleMap.find(N);\n"; + OS << " if (HMI != HandleMap.end()) {\n"; OS << " ReplaceMap[HMI->second] = R;\n"; - OS << " HolderMap.erase(N);\n"; + OS << " HandleMap.erase(N);\n"; OS << " }\n"; OS << "}\n"; OS << "\n"; - OS << "// ReplaceHolders - Replace all the holders with the real target\n"; + OS << "// SelectDanglingHandles - Select replacements for all `dangling`\n"; + OS << "// handles.Some handles do not yet have replacements because the\n"; + OS << "// nodes they replacements have only dead readers.\n"; + OS << "void SelectDanglingHandles() {\n"; + OS << " for (std::map::iterator I = " + << "HandleMap.begin(),\n" + << " E = HandleMap.end(); I != E; ++I) {\n"; + OS << " SDOperand N = I->first;\n"; + OS << " AddHandleReplacement(N, Select(N.getValue(0)));\n"; + OS << " }\n"; + OS << "}\n"; + OS << "\n"; + OS << "// ReplaceHandles - Replace all the handles with the real target\n"; OS << "// specific nodes.\n"; - OS << "void ReplaceHolders() {\n"; + OS << "void ReplaceHandles() {\n"; OS << " for (std::map::iterator I = " << "ReplaceMap.begin(),\n" << " E = ReplaceMap.end(); I != E; ++I) {\n"; @@ -3139,7 +3151,8 @@ OS << "// SelectRoot - Top level entry to DAG isel.\n"; OS << "SDOperand SelectRoot(SDOperand N) {\n"; OS << " SDOperand RetVal = Select(N);\n"; - OS << " ReplaceHolders();\n"; + OS << " SelectDanglingHandles();\n"; + OS << " ReplaceHandles();\n"; OS << " ReplaceMap.clear();\n"; OS << " return RetVal;\n"; OS << "}\n"; From jlaskey at apple.com Mon Feb 6 08:16:37 2006 From: jlaskey at apple.com (Jim Laskey) Date: Mon, 6 Feb 2006 08:16:37 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Message-ID: <200602061416.IAA19071@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCAsmPrinter.cpp updated: 1.145 -> 1.146 --- Log message: We seem to have settled to __DWARF for section name. --- Diffs of the changes: (+11 -11) PPCAsmPrinter.cpp | 22 +++++++++++----------- 1 files changed, 11 insertions(+), 11 deletions(-) Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp diff -u llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.145 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.146 --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.145 Sat Feb 4 19:30:45 2006 +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Mon Feb 6 08:16:15 2006 @@ -222,18 +222,18 @@ : DwarfWriter(o, ap) { needsSet = true; - DwarfAbbrevSection = ".section __DWARFA,__debug_abbrev"; - DwarfInfoSection = ".section __DWARFA,__debug_info"; - DwarfLineSection = ".section __DWARFA,__debug_line"; + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev"; + DwarfInfoSection = ".section __DWARF,__debug_info"; + DwarfLineSection = ".section __DWARF,__debug_line"; DwarfFrameSection = - ".section __DWARFA,__debug_frame,,coalesced,no_toc+strip_static_syms"; - DwarfPubNamesSection = ".section __DWARFA,__debug_pubnames"; - DwarfPubTypesSection = ".section __DWARFA,__debug_pubtypes"; - DwarfStrSection = ".section __DWARFA,__debug_str"; - DwarfLocSection = ".section __DWARFA,__debug_loc"; - DwarfARangesSection = ".section __DWARFA,__debug_aranges"; - DwarfRangesSection = ".section __DWARFA,__debug_ranges"; - DwarfMacInfoSection = ".section __DWARFA,__debug_macinfo"; + ".section __DWARF,__debug_frame,,coalesced,no_toc+strip_static_syms"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes"; + DwarfStrSection = ".section __DWARF,__debug_str"; + DwarfLocSection = ".section __DWARF,__debug_loc"; + DwarfARangesSection = ".section __DWARF,__debug_aranges"; + DwarfRangesSection = ".section __DWARF,__debug_ranges"; + DwarfMacInfoSection = ".section __DWARF,__debug_macinfo"; TextSection = ".text"; DataSection = ".data"; } From jlaskey at apple.com Mon Feb 6 09:33:35 2006 From: jlaskey at apple.com (Jim Laskey) Date: Mon, 6 Feb 2006 09:33:35 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/DwarfWriter.h MachineDebugInfo.h Message-ID: <200602061533.JAA19279@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: DwarfWriter.h updated: 1.22 -> 1.23 MachineDebugInfo.h updated: 1.10 -> 1.11 --- Log message: Changing model for the construction of debug information. --- Diffs of the changes: (+332 -101) DwarfWriter.h | 4 MachineDebugInfo.h | 429 ++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 332 insertions(+), 101 deletions(-) Index: llvm/include/llvm/CodeGen/DwarfWriter.h diff -u llvm/include/llvm/CodeGen/DwarfWriter.h:1.22 llvm/include/llvm/CodeGen/DwarfWriter.h:1.23 --- llvm/include/llvm/CodeGen/DwarfWriter.h:1.22 Fri Jan 27 14:31:25 2006 +++ llvm/include/llvm/CodeGen/DwarfWriter.h Mon Feb 6 09:33:21 2006 @@ -33,7 +33,7 @@ // Forward declarations. // class AsmPrinter; - class CompileUnitWrapper; + class CompileUnitDesc; class DIE; class DwarfWriter; class DWContext; @@ -657,7 +657,7 @@ /// NewCompileUnit - Create new compile unit information. /// - DIE *NewCompileUnit(const CompileUnitWrapper &CompileUnit); + DIE *NewCompileUnit(const CompileUnitDesc *CompileUnit); /// EmitInitial - Emit initial Dwarf declarations. /// Index: llvm/include/llvm/CodeGen/MachineDebugInfo.h diff -u llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.10 llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.11 --- llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.10 Sun Jan 29 03:52:11 2006 +++ llvm/include/llvm/CodeGen/MachineDebugInfo.h Mon Feb 6 09:33:21 2006 @@ -17,160 +17,385 @@ // The following information can be retrieved from the MachineDebugInfo. // // -- Source directories - Directories are uniqued based on their canonical -// string and assigned a sequential numeric ID (base 1.) A directory ID - 1 -// provides the index of directory information in a queried directory list. +// string and assigned a sequential numeric ID (base 1.) // -- Source files - Files are also uniqued based on their name and directory -// ID. A file ID is sequential number (base 1.) A file ID - 1 provides the -// index of source information in a queried file list. +// ID. A file ID is sequential number (base 1.) // -- Source line coorespondence - A vector of file ID, line#, column# triples. // A DEBUG_LOCATION instruction is generated by the DAG Legalizer // corresponding to each entry in the source line list. This allows a debug -// information emitter to generate labels to map code addressed to debug -// tables. +// emitter to generate labels referenced by degug information tables. // //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_MACHINEDEBUGINFO_H #define LLVM_CODEGEN_MACHINEDEBUGINFO_H -#include "llvm/Pass.h" +#include "llvm/Support/Dwarf.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/Pass.h" +#include "llvm/User.h" + #include +#include namespace llvm { +//===----------------------------------------------------------------------===// // Forward declarations. -class ConstantStruct; +class DebugInfoDesc; class GlobalVariable; class Module; +class PointerType; +class StructType; + +//===----------------------------------------------------------------------===// +// Debug info constants. +enum { + LLVMDebugVersion = 1, // Current version of debug information. + DIInvalid = ~0U, // Invalid result indicator. + + // DebugInfoDesc type identifying tags. + // FIXME - Change over with gcc4. +#if 1 + DI_TAG_compile_unit = DW_TAG_compile_unit, + DI_TAG_global_variable = DW_TAG_variable, + DI_TAG_subprogram = DW_TAG_subprogram +#else + DI_TAG_compile_unit = 1, + DI_TAG_global_variable, + DI_TAG_subprogram +#endif +}; //===----------------------------------------------------------------------===// -/// DebugInfoWrapper - This class is the base class for debug info wrappers. +/// DIApplyManager - Subclasses of this class apply steps to each of the fields +/// in the supplied DebugInfoDesc. +class DIApplyManager { +public: + DIApplyManager() {} + virtual ~DIApplyManager() {} + + + /// ApplyToFields - Target the manager to each field of the debug information + /// descriptor. + void ApplyToFields(DebugInfoDesc *DD); + + /// Apply - Subclasses override each of these methods to perform the + /// appropriate action for the type of field. + virtual void Apply(int &Field) = 0; + virtual void Apply(unsigned &Field) = 0; + virtual void Apply(bool &Field) = 0; + virtual void Apply(std::string &Field) = 0; + virtual void Apply(DebugInfoDesc *&Field) = 0; + virtual void Apply(GlobalVariable *&Field) = 0; +}; + +//===----------------------------------------------------------------------===// +/// DebugInfoDesc - This class is the base class for debug info descriptors. /// -class DebugInfoWrapper { +class DebugInfoDesc { +private: + unsigned Tag; // Content indicator. Dwarf values are + // used but that does not limit use to + // Dwarf writers. + protected: - GlobalVariable *GV; // "llvm.db" global - ConstantStruct *IC; // Initializer constant. + DebugInfoDesc(unsigned T) : Tag(T) {} public: - DebugInfoWrapper(GlobalVariable *G); + virtual ~DebugInfoDesc() {} + + // Accessors + unsigned getTag() const { return Tag; } - /// getGlobal - Return the "llvm.db" global. - /// - GlobalVariable *getGlobal() const { return GV; } + /// TagFromGlobal - Returns the Tag number from a debug info descriptor + /// GlobalVariable. + static unsigned TagFromGlobal(GlobalVariable *GV, bool Checking = false); - /// operator== - Used by Uniquevector to locate entry. + /// DescFactory - Create an instance of debug info descriptor based on Tag. + /// Return NULL if not a recognized Tag. + static DebugInfoDesc *DescFactory(unsigned Tag); + + //===--------------------------------------------------------------------===// + // Subclasses should supply the following static methods. + + // Implement isa/cast/dyncast. + static bool classof(const DebugInfoDesc *) { return true; } + + //===--------------------------------------------------------------------===// + // Subclasses should supply the following virtual methods. + + /// ApplyToFields - Target the apply manager to the fields of the descriptor. /// - bool operator==(const DebugInfoWrapper &DI) const { return IC == DI.IC; } + virtual void ApplyToFields(DIApplyManager *Mgr) = 0; - /// operator< - Used by Uniquevector to locate entry. + /// TypeString - Return a string used to compose globalnames and labels. /// - bool operator<(const DebugInfoWrapper &DI) const { return IC < DI.IC; } + virtual const char *TypeString() const = 0; + +#ifndef NDEBUG + virtual void dump() = 0; +#endif }; //===----------------------------------------------------------------------===// -/// CompileUnitWrapper - This class wraps a "lldb.compile_unit" global to -/// provide easy access to its attributes. -class CompileUnitWrapper : public DebugInfoWrapper { +/// CompileUnitDesc - This class packages debug information associated with a +/// source/header file. +class CompileUnitDesc : public DebugInfoDesc { +private: + unsigned DebugVersion; // LLVM debug version when produced. + unsigned Language; // Language number (ex. DW_LANG_C89.) + std::string FileName; // Source file name. + std::string Directory; // Source file directory. + std::string Producer; // Compiler string. + GlobalVariable *TransUnit; // Translation unit - ignored. + +public: + CompileUnitDesc() + : DebugInfoDesc(DI_TAG_compile_unit) + , DebugVersion(LLVMDebugVersion) + , Language(0) + , FileName("") + , Directory("") + , Producer("") + , TransUnit(NULL) + {} + + // Accessors + unsigned getDebugVersion() const { return DebugVersion; } + unsigned getLanguage() const { return Language; } + const std::string &getFileName() const { return FileName; } + const std::string &getDirectory() const { return Directory; } + const std::string &getProducer() const { return Producer; } + void setLanguage(unsigned L) { Language = L; } + void setFileName(const std::string &FN) { FileName = FN; } + void setDirectory(const std::string &D) { Directory = D; } + void setProducer(const std::string &P) { Producer = P; } + // FIXME - Need translation unit getter/setter. + + // Implement isa/cast/dyncast. + static bool classof(const CompileUnitDesc *) { return true; } + static bool classof(const DebugInfoDesc *D) { + return D->getTag() == DI_TAG_compile_unit; + } + + /// DebugVersionFromGlobal - Returns the version number from a compile unit + /// GlobalVariable. + static unsigned DebugVersionFromGlobal(GlobalVariable *GV, + bool Checking = false); + + /// ApplyToFields - Target the apply manager to the fields of the + /// CompileUnitDesc. + virtual void ApplyToFields(DIApplyManager *Mgr); + + /// TypeString - Return a string used to compose globalnames and labels. + /// + virtual const char *TypeString() const; + +#ifndef NDEBUG + virtual void dump(); +#endif +}; + +//===----------------------------------------------------------------------===// +/// GlobalVariableDesc - This class packages debug information associated with a +/// GlobalVariable. +class GlobalVariableDesc : public DebugInfoDesc { private: - // Operand indices. - enum { - Tag_op, - Version_op, - Language_op, - FileName_op, - Directory_op, - Producer_op, - Anchor_op, // ignored - N_op - }; + DebugInfoDesc *Context; // Context debug descriptor. + std::string Name; // Global name. + GlobalVariable *TransUnit; // Translation unit - ignored. + // FIXME - Use a descriptor. + GlobalVariable *TyDesc; // Type debug descriptor. + bool IsStatic; // Is the global a static. + bool IsDefinition; // Is the global defined in context. + GlobalVariable *Global; // llvm global. public: - CompileUnitWrapper(GlobalVariable *G); + GlobalVariableDesc() + : DebugInfoDesc(DI_TAG_global_variable) + , Context(0) + , Name("") + , TransUnit(NULL) + , TyDesc(NULL) + , IsStatic(false) + , IsDefinition(false) + , Global(NULL) + {} + + // Accessors + DebugInfoDesc *getContext() const { return Context; } + const std::string &getName() const { return Name; } + bool isStatic() const { return IsStatic; } + bool isDefinition() const { return IsDefinition; } + GlobalVariable *getGlobalVariable() const { return Global; } + void setName(const std::string &N) { Name = N; } + void setIsStatic(bool IS) { IsStatic = IS; } + void setIsDefinition(bool ID) { IsDefinition = ID; } + void setGlobalVariable(GlobalVariable *GV) { Global = GV; } + // FIXME - Other getters/setters. + + // Implement isa/cast/dyncast. + static bool classof(const GlobalVariableDesc *) { return true; } + static bool classof(const DebugInfoDesc *D) { + return D->getTag() == DI_TAG_global_variable; + } - /// getGlobal - Return the "lldb.compile_unit" global. + /// ApplyToFields - Target the apply manager to the fields of the + /// GlobalVariableDesc. + virtual void ApplyToFields(DIApplyManager *Mgr); + + /// TypeString - Return a string used to compose globalnames and labels. /// - GlobalVariable *getGlobal() const { return GV; } + virtual const char *TypeString() const; - /// getTag - Return the compile unit's tag number. Currently DW_TAG_variable, - /// DW_TAG_subprogram or DW_TAG_compile_unit. - unsigned getTag() const; +#ifndef NDEBUG + virtual void dump(); +#endif +}; - /// isCorrectDebugVersion - Return true if is the correct llvm debug version. - /// Currently the value is 0 (zero.) If the value is is not correct then - /// ignore all debug information. - bool isCorrectDebugVersion() const; - - /// getLanguage - Return the compile unit's language number (ex. DW_LANG_C89.) - /// - unsigned getLanguage() const; +//===----------------------------------------------------------------------===// +/// SubprogramDesc - This class packages debug information associated with a +/// subprogram/function. +class SubprogramDesc : public DebugInfoDesc { +private: + DebugInfoDesc *Context; // Context debug descriptor. + std::string Name; // Subprogram name. + GlobalVariable *TransUnit; // Translation unit - ignored. + // FIXME - Use a descriptor. + GlobalVariable *TyDesc; // Type debug descriptor. + bool IsStatic; // Is the subprogram a static. + bool IsDefinition; // Is the subprogram defined in context. - /// getFileName - Return the compile unit's file name. - /// - const std::string getFileName() const; +public: + SubprogramDesc() + : DebugInfoDesc(DI_TAG_subprogram) + , Context(0) + , Name("") + , TransUnit(NULL) + , TyDesc(NULL) + , IsStatic(false) + , IsDefinition(false) + {} - /// getDirectory - Return the compile unit's file directory. - /// - const std::string getDirectory() const; + // Accessors + DebugInfoDesc *getContext() const { return Context; } + const std::string &getName() const { return Name; } + bool isStatic() const { return IsStatic; } + bool isDefinition() const { return IsDefinition; } + void setName(const std::string &N) { Name = N; } + void setIsStatic(bool IS) { IsStatic = IS; } + void setIsDefinition(bool ID) { IsDefinition = ID; } + // FIXME - Other getters/setters. + + // Implement isa/cast/dyncast. + static bool classof(const SubprogramDesc *) { return true; } + static bool classof(const DebugInfoDesc *D) { + return D->getTag() == DI_TAG_subprogram; + } - /// getProducer - Return the compile unit's generator name. + /// ApplyToFields - Target the apply manager to the fields of the + /// SubprogramDesc. + virtual void ApplyToFields(DIApplyManager *Mgr); + + /// TypeString - Return a string used to compose globalnames and labels. /// - const std::string getProducer() const; + virtual const char *TypeString() const; + +#ifndef NDEBUG + virtual void dump(); +#endif }; //===----------------------------------------------------------------------===// -/// GlobalWrapper - This class wraps a "lldb.global" global to provide easy -/// access to its attributes. -class GlobalWrapper : public DebugInfoWrapper { +/// DIDeserializer - This class is responsible for casting GlobalVariables +/// into DebugInfoDesc objects. +class DIDeserializer { private: - // Operand indices. - enum { - Tag_op, - Context_op, - Name_op, - Anchor_op, // ignored - Type_op, - Static_op, - Definition_op, - GlobalVariable_op, - N_op - }; + Module *M; // Definition space module. + unsigned DebugVersion; // Version of debug information in use. + std::map GlobalDescs; + // Previously defined gloabls. public: - GlobalWrapper(GlobalVariable *G); + DIDeserializer() : M(NULL), DebugVersion(LLVMDebugVersion) {} + ~DIDeserializer() {} - /// getGlobal - Return the "lldb.global" global. - /// - GlobalVariable *getGlobal() const { return GV; } - - /// getContext - Return the "lldb.compile_unit" context global. - /// - GlobalVariable *getContext() const; + // Accessors + Module *getModule() const { return M; }; + void setModule(Module *module) { M = module; } + unsigned getDebugVersion() const { return DebugVersion; } + + /// Deserialize - Reconstitute a GlobalVariable into it's component + /// DebugInfoDesc objects. + DebugInfoDesc *Deserialize(Value *V); + DebugInfoDesc *Deserialize(GlobalVariable *GV); +}; - /// getTag - Return the global's tag number. Currently should be - /// DW_TAG_variable or DW_TAG_subprogram. - unsigned getTag() const; +//===----------------------------------------------------------------------===// +/// DISerializer - This class is responsible for casting DebugInfoDesc objects +/// into GlobalVariables. +class DISerializer { +private: + Module *M; // Definition space module. + PointerType *StrPtrTy; // A "sbyte *" type. Created lazily. + PointerType *EmptyStructPtrTy; // A "{ }*" type. Created lazily. + std::map TagTypes; + // Types per Tag. Created lazily. + std::map DescGlobals; + // Previously defined descriptors. + std::map StringCache; + // Previously defined strings. +public: + DISerializer() : M(NULL) {} + ~DISerializer() {} - /// getName - Return the name of the global. + // Accessors + Module *getModule() const { return M; }; + void setModule(Module *module) { M = module; } + + /// getStrPtrType - Return a "sbyte *" type. /// - const std::string getName() const; + const PointerType *getStrPtrType(); - /// getType - Return the type of the global. + /// getEmptyStructPtrType - Return a "{ }*" type. /// - const GlobalVariable *getType() const; - - /// isStatic - Return true if the global is static. + const PointerType *getEmptyStructPtrType(); + + /// getTagType - Return the type describing the specified descriptor (via + /// tag.) + const StructType *getTagType(DebugInfoDesc *DD); + + /// getString - Construct the string as constant string global. /// - bool isStatic() const; + GlobalVariable *getString(const std::string &String); + + /// Serialize - Recursively cast the specified descriptor into a + /// GlobalVariable so that it can be serialized to a .bc or .ll file. + GlobalVariable *Serialize(DebugInfoDesc *DD); +}; - /// isDefinition - Return true if the global is a definition. - /// - bool isDefinition() const; +//===----------------------------------------------------------------------===// +/// DIVerifier - This class is responsible for verifying the given network of +/// GlobalVariables are valid as DebugInfoDesc objects. +class DIVerifier { +private: + unsigned DebugVersion; // Version of debug information in use. + std::set Visited; // Tracks visits during recursion. + std::map Counts; // Count of fields per Tag type. + + /// markVisited - Return true if the GlobalVariable hase been "seen" before. + /// Mark markVisited otherwise. + bool markVisited(GlobalVariable *GV); - /// getGlobalVariable - Return the global variable (tag == DW_TAG_variable.) - /// - GlobalVariable *getGlobalVariable() const; +public: + DIVerifier() : DebugVersion(LLVMDebugVersion) {} + ~DIVerifier() {} + + /// Verify - Return true if the GlobalVariable appears to be a valid + /// serialization of a DebugInfoDesc. + bool Verify(GlobalVariable *GV); }; //===----------------------------------------------------------------------===// @@ -228,8 +453,14 @@ /// class MachineDebugInfo : public ImmutablePass { private: + // Debug indforma + // Use the same serializer/deserializer/verifier for the module. + DISerializer SR; + DIDeserializer DR; + DIVerifier VR; + // CompileUnits - Uniquing vector for compile units. - UniqueVector CompileUnits; + UniqueVector CompileUnits; // Directories - Uniquing vector for directories. UniqueVector Directories; @@ -300,11 +531,11 @@ /// getCompileUnits - Return a vector of debug compile units. /// - const UniqueVector getCompileUnits() const; + const UniqueVector getCompileUnits() const; - /// getGlobalVariables - Return a vector of debug global variables. + /// getGlobalVariables - Return a vector of debug GlobalVariables. /// - static std::vector getGlobalVariables(Module &M); + std::vector getGlobalVariables(Module &M); }; // End class MachineDebugInfo From jlaskey at apple.com Mon Feb 6 09:33:35 2006 From: jlaskey at apple.com (Jim Laskey) Date: Mon, 6 Feb 2006 09:33:35 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/DwarfWriter.cpp MachineDebugInfo.cpp Message-ID: <200602061533.JAA19280@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: DwarfWriter.cpp updated: 1.24 -> 1.25 MachineDebugInfo.cpp updated: 1.9 -> 1.10 --- Log message: Changing model for the construction of debug information. --- Diffs of the changes: (+626 -106) DwarfWriter.cpp | 22 - MachineDebugInfo.cpp | 710 ++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 626 insertions(+), 106 deletions(-) Index: llvm/lib/CodeGen/DwarfWriter.cpp diff -u llvm/lib/CodeGen/DwarfWriter.cpp:1.24 llvm/lib/CodeGen/DwarfWriter.cpp:1.25 --- llvm/lib/CodeGen/DwarfWriter.cpp:1.24 Fri Jan 27 14:31:25 2006 +++ llvm/lib/CodeGen/DwarfWriter.cpp Mon Feb 6 09:33:21 2006 @@ -1269,16 +1269,16 @@ /// NewCompileUnit - Create new compile unit information. /// -DIE *DwarfWriter::NewCompileUnit(const CompileUnitWrapper &CompileUnit) { +DIE *DwarfWriter::NewCompileUnit(const CompileUnitDesc *CompileUnit) { DIE *Unit = new DIE(DW_TAG_compile_unit, DW_CHILDREN_yes); // FIXME - use the correct line set. Unit->AddLabel (DW_AT_stmt_list, DW_FORM_data4, DWLabel("line", 0)); Unit->AddLabel (DW_AT_high_pc, DW_FORM_addr, DWLabel("text_end", 0)); Unit->AddLabel (DW_AT_low_pc, DW_FORM_addr, DWLabel("text_begin", 0)); - Unit->AddString(DW_AT_producer, DW_FORM_string, CompileUnit.getProducer()); - Unit->AddUInt (DW_AT_language, DW_FORM_data1, CompileUnit.getLanguage()); - Unit->AddString(DW_AT_name, DW_FORM_string, CompileUnit.getFileName()); - Unit->AddString(DW_AT_comp_dir, DW_FORM_string, CompileUnit.getDirectory()); + Unit->AddString(DW_AT_producer, DW_FORM_string, CompileUnit->getProducer()); + Unit->AddUInt (DW_AT_language, DW_FORM_data1, CompileUnit->getLanguage()); + Unit->AddString(DW_AT_name, DW_FORM_string, CompileUnit->getFileName()); + Unit->AddString(DW_AT_comp_dir, DW_FORM_string, CompileUnit->getDirectory()); Unit->Complete(*this); return Unit; @@ -1723,11 +1723,10 @@ /// ConstructCompileUnitDIEs - Create a compile unit DIE for each source and /// header file. void DwarfWriter::ConstructCompileUnitDIEs() { - const UniqueVector CUW = DebugInfo->getCompileUnits(); + const UniqueVector CUW = DebugInfo->getCompileUnits(); for (unsigned i = 1, N = CUW.size(); i <= N; ++i) { - const CompileUnitWrapper &CompileUnit = CUW[i]; - DIE *Unit = NewCompileUnit(CompileUnit); + DIE *Unit = NewCompileUnit(CUW[i]); DWContext *Context = new DWContext(*this, NULL, Unit); CompileUnits.push_back(Unit); } @@ -1738,11 +1737,12 @@ void DwarfWriter::ConstructGlobalDIEs(Module &M) { const TargetData &TD = Asm->TM.getTargetData(); - std::vector GlobalVariables = DebugInfo->getGlobalVariables(M); + std::vector GlobalVariables = + DebugInfo->getGlobalVariables(M); for (unsigned i = 0, N = GlobalVariables.size(); i < N; ++i) { - GlobalWrapper &GW = GlobalVariables[i]; - GlobalVariable *GV = GW.getGlobalVariable(); + GlobalVariableDesc *GVD = GlobalVariables[i]; + GlobalVariable *GV = GVD->getGlobalVariable(); if (!GV->hasInitializer()) continue; // External global require no code Index: llvm/lib/CodeGen/MachineDebugInfo.cpp diff -u llvm/lib/CodeGen/MachineDebugInfo.cpp:1.9 llvm/lib/CodeGen/MachineDebugInfo.cpp:1.10 --- llvm/lib/CodeGen/MachineDebugInfo.cpp:1.9 Fri Jan 27 11:31:30 2006 +++ llvm/lib/CodeGen/MachineDebugInfo.cpp Mon Feb 6 09:33:21 2006 @@ -11,11 +11,14 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Support/Dwarf.h" +#include + using namespace llvm; // Handle the Pass registration stuff necessary to use TargetData's. @@ -25,14 +28,14 @@ //===----------------------------------------------------------------------===// -/// getGlobalVariablesUsing - Return all of the global variables which have the +/// getGlobalVariablesUsing - Return all of the GlobalVariables which have the /// specified value in their initializer somewhere. static void getGlobalVariablesUsing(Value *V, std::vector &Result) { // Scan though value users. for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { if (GlobalVariable *GV = dyn_cast(*I)) { - // If the user is a global variable then add to result. + // If the user is a GlobalVariable then add to result. Result.push_back(GV); } else if (Constant *C = dyn_cast(*I)) { // If the user is a constant variable then scan its users @@ -41,13 +44,13 @@ } } -/// getGlobalVariablesUsing - Return all of the global variables that use the -/// named global variable. +/// getGlobalVariablesUsing - Return all of the GlobalVariables that use the +/// named GlobalVariable. static std::vector getGlobalVariablesUsing(Module &M, const std::string &RootName) { - std::vector Result; // Global variables matching criteria. + std::vector Result; // GlobalVariables matching criteria. - // Get the global variable root. + // Get the GlobalVariable root. GlobalVariable *UseRoot = M.getGlobalVariable(RootName, StructType::get(std::vector())); @@ -98,131 +101,646 @@ return ""; } +/// isStringValue - Return true if the given value can be coerced to a string. +/// +static bool isStringValue(Value *V) { + if (GlobalVariable *GV = dyn_cast(V)) { + if (GV->hasInitializer() && isa(GV->getInitializer())) { + ConstantArray *Init = cast(GV->getInitializer()); + return Init->isString(); + } + } else if (Constant *C = dyn_cast(V)) { + if (GlobalValue *GV = dyn_cast(C)) + return isStringValue(GV); + else if (ConstantExpr *CE = dyn_cast(C)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + if (CE->getNumOperands() == 3 && + cast(CE->getOperand(1))->isNullValue() && + isa(CE->getOperand(2))) { + return isStringValue(CE->getOperand(0)); + } + } + } + } + return false; +} + /// getGlobalValue - Return either a direct or cast Global value. /// static GlobalVariable *getGlobalValue(Value *V) { if (GlobalVariable *GV = dyn_cast(V)) { return GV; } else if (ConstantExpr *CE = dyn_cast(V)) { - return CE->getOpcode() == Instruction::Cast ? dyn_cast(V) - : NULL; + if (CE->getOpcode() == Instruction::Cast) { + return dyn_cast(CE->getOperand(0)); + } } return NULL; } +/// isGlobalValue - Return true if the given value can be coerced to a +/// GlobalVariable. +static bool isGlobalValue(Value *V) { + if (isa(V) || isa(V)) { + return true; + } else if (ConstantExpr *CE = dyn_cast(V)) { + if (CE->getOpcode() == Instruction::Cast) { + return isa(CE->getOperand(0)); + } + } + return false; +} + +/// isUIntOperand - Return true if the ith operand is an unsigned integer. +/// +static bool isUIntOperand(GlobalVariable *GV, unsigned i) { + // Make sure the GlobalVariable has an initializer. + if (!GV->hasInitializer()) return false; -//===----------------------------------------------------------------------===// + // Get the initializer constant. + ConstantStruct *CI = dyn_cast(GV->getInitializer()); + if (!CI) return false; + + // Check if there is at least i + 1 operands. + unsigned N = CI->getNumOperands(); + if (i >= N) return false; -DebugInfoWrapper::DebugInfoWrapper(GlobalVariable *G) -: GV(G) -, IC(dyn_cast(GV->getInitializer())) { - assert(IC && "llvm.db.global is missing structured constant"); + // Check constant. + return isa(CI->getOperand(i)); } - + //===----------------------------------------------------------------------===// -CompileUnitWrapper::CompileUnitWrapper(GlobalVariable *G) -: DebugInfoWrapper(G) -{ - // FIXME - should probably ease up on the number of operands (version.) - assert(IC->getNumOperands() == N_op && - "Compile unit does not have correct number of operands"); +/// TagFromGlobal - Returns the Tag number from a debug info descriptor +/// GlobalVariable. +unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV, bool Checking) { + if (Checking && !isUIntOperand(GV, 0)) return DIInvalid; + ConstantStruct *CI = cast(GV->getInitializer()); + Constant *C = CI->getOperand(0); + return cast(C)->getValue(); +} + +/// DescFactory - Create an instance of debug info descriptor based on Tag. +/// Return NULL if not a recognized Tag. +DebugInfoDesc *DebugInfoDesc::DescFactory(unsigned Tag) { + switch (Tag) { + case DI_TAG_compile_unit: return new CompileUnitDesc(); + case DI_TAG_global_variable: return new GlobalVariableDesc(); + case DI_TAG_subprogram: return new SubprogramDesc(); + default: break; + } + return NULL; } -/// getTag - Return the compile unit's tag number. Currently should be -/// DW_TAG_variable. -unsigned CompileUnitWrapper::getTag() const { - return cast(IC->getOperand(Tag_op))->getValue(); +//===----------------------------------------------------------------------===// + +/// ApplyToFields - Target the manager to each field of the debug information +/// descriptor. +void DIApplyManager::ApplyToFields(DebugInfoDesc *DD) { + DD->ApplyToFields(this); } -/// isCorrectDebugVersion - Return true if is the correct llvm debug version. -/// Currently the value is 0 (zero.) If the value is is not correct then -/// ignore all debug information. -bool CompileUnitWrapper::isCorrectDebugVersion() const { - return cast(IC->getOperand(Version_op))->getValue() == 0; +//===----------------------------------------------------------------------===// +/// DICountAppMgr - This DIApplyManager counts all the fields in the supplied +/// debug the supplied DebugInfoDesc. +class DICountAppMgr : public DIApplyManager { +private: + unsigned Count; // Running count of fields. + +public: + DICountAppMgr() : DIApplyManager(), Count(1) {} + + // Accessors. + unsigned getCount() const { return Count; } + + /// Apply - Count each of the fields. + /// + virtual void Apply(int &Field) { ++Count; } + virtual void Apply(unsigned &Field) { ++Count; } + virtual void Apply(bool &Field) { ++Count; } + virtual void Apply(std::string &Field) { ++Count; } + virtual void Apply(DebugInfoDesc *&Field) { ++Count; } + virtual void Apply(GlobalVariable *&Field) { ++Count; } +}; + +//===----------------------------------------------------------------------===// +/// DIDeserializeAppMgr - This DIApplyManager deserializes all the fields in +/// the supplied DebugInfoDesc. +class DIDeserializeAppMgr : public DIApplyManager { +private: + DIDeserializer &DR; // Active deserializer. + unsigned I; // Current operand index. + ConstantStruct *CI; // GlobalVariable constant initializer. + +public: + DIDeserializeAppMgr(DIDeserializer &D, GlobalVariable *GV) + : DIApplyManager() + , DR(D) + , I(1) + , CI(cast(GV->getInitializer())) + {} + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Constant *C = CI->getOperand(I++); + Field = cast(C)->getValue(); + } + virtual void Apply(unsigned &Field) { + Constant *C = CI->getOperand(I++); + Field = cast(C)->getValue(); + } + virtual void Apply(bool &Field) { + Constant *C = CI->getOperand(I++); + Field = cast(C)->getValue(); + } + virtual void Apply(std::string &Field) { + Constant *C = CI->getOperand(I++); + Field = getStringValue(C); + } + virtual void Apply(DebugInfoDesc *&Field) { + Constant *C = CI->getOperand(I++); + Field = DR.Deserialize(C); + } + virtual void Apply(GlobalVariable *&Field) { + Constant *C = CI->getOperand(I++); + Field = getGlobalValue(C); + } +}; + +//===----------------------------------------------------------------------===// +/// DISerializeAppMgr - This DIApplyManager serializes all the fields in +/// the supplied DebugInfoDesc. +class DISerializeAppMgr : public DIApplyManager { +private: + DISerializer &SR; // Active serializer. + std::vector &Elements; // Element accumulator. + +public: + DISerializeAppMgr(DISerializer &S, std::vector &E) + : DIApplyManager() + , SR(S) + , Elements(E) + {} + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Elements.push_back(ConstantUInt::get(Type::IntTy, Field)); + } + virtual void Apply(unsigned &Field) { + Elements.push_back(ConstantUInt::get(Type::UIntTy, Field)); + } + virtual void Apply(bool &Field) { + Elements.push_back(ConstantBool::get(Field)); + } + virtual void Apply(std::string &Field) { + Elements.push_back(SR.getString(Field)); + } + virtual void Apply(DebugInfoDesc *&Field) { + GlobalVariable *GV = NULL; + + // If non-NULL the convert to global. + if (Field) GV = SR.Serialize(Field); + + // FIXME - At some point should use specific type. + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + + if (GV) { + // Set to pointer to global. + Elements.push_back(ConstantExpr::getCast(GV, EmptyTy)); + } else { + // Use NULL. + Elements.push_back(ConstantPointerNull::get(EmptyTy)); + } + } + virtual void Apply(GlobalVariable *&Field) { + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + Elements.push_back(ConstantExpr::getCast(Field, EmptyTy)); + } +}; + +//===----------------------------------------------------------------------===// +/// DIGetTypesAppMgr - This DIApplyManager gathers all the field types in +/// the supplied DebugInfoDesc. +class DIGetTypesAppMgr : public DIApplyManager { +private: + DISerializer &SR; // Active serializer. + std::vector &Fields; // Type accumulator. + +public: + DIGetTypesAppMgr(DISerializer &S, std::vector &F) + : DIApplyManager() + , SR(S) + , Fields(F) + {} + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Fields.push_back(Type::IntTy); + } + virtual void Apply(unsigned &Field) { + Fields.push_back(Type::UIntTy); + } + virtual void Apply(bool &Field) { + Fields.push_back(Type::BoolTy); + } + virtual void Apply(std::string &Field) { + Fields.push_back(SR.getStrPtrType()); + } + virtual void Apply(DebugInfoDesc *&Field) { + // FIXME - At some point should use specific type. + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + Fields.push_back(EmptyTy); + } + virtual void Apply(GlobalVariable *&Field) { + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + Fields.push_back(EmptyTy); + } +}; + +//===----------------------------------------------------------------------===// +/// DIVerifyAppMgr - This DIApplyManager verifies all the field types against +/// a constant initializer. +class DIVerifyAppMgr : public DIApplyManager { +private: + DIVerifier &VR; // Active verifier. + bool IsValid; // Validity status. + unsigned I; // Current operand index. + ConstantStruct *CI; // GlobalVariable constant initializer. + +public: + DIVerifyAppMgr(DIVerifier &V, GlobalVariable *GV) + : DIApplyManager() + , VR(V) + , IsValid(true) + , I(1) + , CI(cast(GV->getInitializer())) + { + } + + // Accessors. + bool isValid() const { return IsValid; } + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa(C); + } + virtual void Apply(unsigned &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa(C); + } + virtual void Apply(bool &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa(C); + } + virtual void Apply(std::string &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isStringValue(C); + } + virtual void Apply(DebugInfoDesc *&Field) { + // FIXME - Prepare the correct descriptor. + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isGlobalValue(C); + } + virtual void Apply(GlobalVariable *&Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isGlobalValue(C); + } +}; + +//===----------------------------------------------------------------------===// + +/// DebugVersionFromGlobal - Returns the version number from a compile unit +/// GlobalVariable. +unsigned CompileUnitDesc::DebugVersionFromGlobal(GlobalVariable *GV, + bool Checking) { + if (Checking && !isUIntOperand(GV, 1)) return DIInvalid; + ConstantStruct *CI = cast(GV->getInitializer()); + Constant *C = CI->getOperand(1); + return cast(C)->getValue(); +} + +/// ApplyToFields - Target the apply manager to the fields of the +/// CompileUnitDesc. +void CompileUnitDesc::ApplyToFields(DIApplyManager *Mgr) { + Mgr->Apply(DebugVersion); + Mgr->Apply(Language); + Mgr->Apply(FileName); + Mgr->Apply(Directory); + Mgr->Apply(Producer); + Mgr->Apply(TransUnit); +} + +/// TypeString - Return a string used to compose globalnames and labels. +/// +const char *CompileUnitDesc::TypeString() const { + return "compile_unit"; +} + +#ifndef NDEBUG +void CompileUnitDesc::dump() { + std::cerr << TypeString() << " " + << "Tag(" << getTag() << "), " + << "Language(" << Language << "), " + << "FileName(\"" << FileName << "\"), " + << "Directory(\"" << Directory << "\"), " + << "Producer(\"" << Producer << "\")\n"; } +#endif -/// getLanguage - Return the compile unit's language number (ex. DW_LANG_C89.) -/// -unsigned CompileUnitWrapper::getLanguage() const { - return cast(IC->getOperand(Language_op))->getValue(); +//===----------------------------------------------------------------------===// + +/// ApplyToFields - Target the apply manager to the fields of the +/// GlobalVariableDesc. +void GlobalVariableDesc::ApplyToFields(DIApplyManager *Mgr) { + Mgr->Apply(Context); + Mgr->Apply(Name); + Mgr->Apply(TransUnit); + Mgr->Apply(TyDesc); + Mgr->Apply(IsStatic); + Mgr->Apply(IsDefinition); + Mgr->Apply(Global); +} + +/// TypeString - Return a string used to compose globalnames and labels. +/// +const char *GlobalVariableDesc::TypeString() const { + return "global_variable"; +} + +#ifndef NDEBUG +void GlobalVariableDesc::dump() { + std::cerr << TypeString() << " " + << "Tag(" << getTag() << "), " + << "Name(\"" << Name << "\"), " + << "Type(" << TyDesc << "), " + << "IsStatic(" << (IsStatic ? "true" : "false") << "), " + << "IsDefinition(" << (IsDefinition ? "true" : "false") << "), " + << "Global(" << Global << ")\n"; } +#endif -/// getFileName - Return the compile unit's file name. -/// -const std::string CompileUnitWrapper::getFileName() const { - return getStringValue(IC->getOperand(FileName_op)); +//===----------------------------------------------------------------------===// + +/// ApplyToFields - Target the apply manager to the fields of the +/// SubprogramDesc. +void SubprogramDesc::ApplyToFields(DIApplyManager *Mgr) { + Mgr->Apply(Context); + Mgr->Apply(Name); + Mgr->Apply(TransUnit); + Mgr->Apply(TyDesc); + Mgr->Apply(IsStatic); + Mgr->Apply(IsDefinition); + + // FIXME - Temp variable until restructured. + GlobalVariable *Tmp; + Mgr->Apply(Tmp); } -/// getDirectory - Return the compile unit's file directory. +/// TypeString - Return a string used to compose globalnames and labels. /// -const std::string CompileUnitWrapper::getDirectory() const { - return getStringValue(IC->getOperand(Directory_op)); +const char *SubprogramDesc::TypeString() const { + return "subprogram"; } - -/// getProducer - Return the compile unit's generator name. -/// -const std::string CompileUnitWrapper::getProducer() const { - return getStringValue(IC->getOperand(Producer_op)); + +#ifndef NDEBUG +void SubprogramDesc::dump() { + std::cerr << TypeString() << " " + << "Tag(" << getTag() << "), " + << "Name(\"" << Name << "\"), " + << "Type(" << TyDesc << "), " + << "IsStatic(" << (IsStatic ? "true" : "false") << "), " + << "IsDefinition(" << (IsDefinition ? "true" : "false") << ")\n"; } +#endif //===----------------------------------------------------------------------===// -GlobalWrapper::GlobalWrapper(GlobalVariable *G) -: DebugInfoWrapper(G) -{ - // FIXME - should probably ease up on the number of operands (version.) - assert(IC->getNumOperands() == N_op && - "Global does not have correct number of operands"); +DebugInfoDesc *DIDeserializer::Deserialize(Value *V) { + return Deserialize(cast(V)); } +DebugInfoDesc *DIDeserializer::Deserialize(GlobalVariable *GV) { + // Check to see if it has been already deserialized. + DebugInfoDesc *&Slot = GlobalDescs[GV]; + if (Slot) return Slot; -/// getTag - Return the global's tag number. Currently should be -/// DW_TAG_variable or DW_TAG_subprogram. -unsigned GlobalWrapper::getTag() const { - return cast(IC->getOperand(Tag_op))->getValue(); + // Get the Tag from the global. + unsigned Tag = DebugInfoDesc::TagFromGlobal(GV); + + // Get the debug version if a compile unit. + if (Tag == DI_TAG_compile_unit) { + DebugVersion = CompileUnitDesc::DebugVersionFromGlobal(GV); + } + + // Create an empty instance of the correct sort. + Slot = DebugInfoDesc::DescFactory(Tag); + assert(Slot && "Unknown Tag"); + + // Deserialize the fields. + DIDeserializeAppMgr DRAM(*this, GV); + DRAM.ApplyToFields(Slot); + + return Slot; } -/// getContext - Return the "lldb.compile_unit" context global. +//===----------------------------------------------------------------------===// + +/// getStrPtrType - Return a "sbyte *" type. /// -GlobalVariable *GlobalWrapper::getContext() const { - return getGlobalValue(IC->getOperand(Context_op)); +const PointerType *DISerializer::getStrPtrType() { + // If not already defined. + if (!StrPtrTy) { + // Construct the pointer to signed bytes. + StrPtrTy = PointerType::get(Type::SByteTy); + } + + return StrPtrTy; } -/// getName - Return the name of the global. +/// getEmptyStructPtrType - Return a "{ }*" type. /// -const std::string GlobalWrapper::getName() const { - return getStringValue(IC->getOperand(Name_op)); +const PointerType *DISerializer::getEmptyStructPtrType() { + // If not already defined. + if (!EmptyStructPtrTy) { + // Construct the empty structure type. + const StructType *EmptyStructTy = + StructType::get(std::vector()); + // Construct the pointer to empty structure type. + EmptyStructPtrTy = PointerType::get(EmptyStructTy); + } + + return EmptyStructPtrTy; } -/// getType - Return the type of the global. +/// getTagType - Return the type describing the specified descriptor (via tag.) /// -const GlobalVariable *GlobalWrapper::getType() const { - return getGlobalValue(IC->getOperand(Type_op)); +const StructType *DISerializer::getTagType(DebugInfoDesc *DD) { + // Attempt to get the previously defined type. + StructType *&Ty = TagTypes[DD->getTag()]; + + // If not already defined. + if (!Ty) { + // Get descriptor type name. + const char *TS = DD->TypeString(); + + // Set up fields vector. + std::vector Fields; + // Add tag field. + Fields.push_back(Type::UIntTy); + // Get types of remaining fields. + DIGetTypesAppMgr GTAM(*this, Fields); + GTAM.ApplyToFields(DD); + + // Construct structured type. + Ty = StructType::get(Fields); + + // Construct a name for the type. + const std::string Name = std::string("lldb.") + DD->TypeString() + ".type"; + + // Register type name with module. + M->addTypeName(Name, Ty); + } + + return Ty; } -/// isStatic - Return true if the global is static. +/// getString - Construct the string as constant string global. /// -bool GlobalWrapper::isStatic() const { - return cast(IC->getOperand(Static_op))->getValue(); +GlobalVariable *DISerializer::getString(const std::string &String) { + // Check string cache for previous edition. + GlobalVariable *&Slot = StringCache[String]; + // return GlobalVariable if previously defined. + if (Slot) return Slot; + // Construct strings as an llvm constant. + Constant *ConstStr = ConstantArray::get(String); + // Otherwise create and return a new string global. + return Slot = new GlobalVariable(ConstStr->getType(), true, + GlobalVariable::InternalLinkage, + ConstStr, "str", M); +} + +/// Serialize - Recursively cast the specified descriptor into a GlobalVariable +/// so that it can be serialized to a .bc or .ll file. +GlobalVariable *DISerializer::Serialize(DebugInfoDesc *DD) { + // Check if the DebugInfoDesc is already in the map. + GlobalVariable *&Slot = DescGlobals[DD]; + + // See if DebugInfoDesc exists, if so return prior GlobalVariable. + if (Slot) return Slot; + + // Get DebugInfoDesc type Tag. + unsigned Tag = DD->getTag(); + + // Construct name. + const std::string Name = std::string("lldb.") + + DD->TypeString(); + + // Get the type associated with the Tag. + const StructType *Ty = getTagType(DD); + + // Create the GlobalVariable early to prevent infinite recursion. + GlobalVariable *GV = new GlobalVariable(Ty, true, + GlobalValue::InternalLinkage, + NULL, Name, M); + + // Insert new GlobalVariable in DescGlobals map. + Slot = GV; + + // Set up elements vector + std::vector Elements; + // Add Tag value. + Elements.push_back(ConstantUInt::get(Type::UIntTy, Tag)); + // Add remaining fields. + DISerializeAppMgr SRAM(*this, Elements); + SRAM.ApplyToFields(DD); + + // Set the globals initializer. + GV->setInitializer(ConstantStruct::get(Ty, Elements)); + + return GV; } -/// isDefinition - Return true if the global is a definition. -/// -bool GlobalWrapper::isDefinition() const { - return dyn_cast(IC->getOperand(Definition_op))->getValue(); +//===----------------------------------------------------------------------===// + +/// markVisited - Return true if the GlobalVariable hase been "seen" before. +/// Mark visited otherwise. +bool DIVerifier::markVisited(GlobalVariable *GV) { + // Check if the GlobalVariable is already in the Visited set. + std::set::iterator VI = Visited.lower_bound(GV); + + // See if GlobalVariable exists. + bool Exists = VI != Visited.end() && *VI == GV; + + // Insert in set. + if (!Exists) Visited.insert(VI, GV); + + return Exists; } -/// getGlobalVariable - Return the global variable (tag == DW_TAG_variable.) -/// -GlobalVariable *GlobalWrapper::getGlobalVariable() const { - return getGlobalValue(IC->getOperand(GlobalVariable_op)); +/// Verify - Return true if the GlobalVariable appears to be a valid +/// serialization of a DebugInfoDesc. +bool DIVerifier::Verify(GlobalVariable *GV) { + // Check if seen before. + if (markVisited(GV)) return true; + + // Get the Tag + unsigned Tag = DebugInfoDesc::TagFromGlobal(GV, true); + if (Tag == DIInvalid) return false; + + // If a compile unit we need the debug version. + if (Tag == DI_TAG_compile_unit) { + DebugVersion = CompileUnitDesc::DebugVersionFromGlobal(GV, true); + if (DebugVersion == DIInvalid) return false; + } + + // Construct an empty DebugInfoDesc. + DebugInfoDesc *DD = DebugInfoDesc::DescFactory(Tag); + if (!DD) return false; + + // Get the initializer constant. + ConstantStruct *CI = cast(GV->getInitializer()); + + // Get the operand count. + unsigned N = CI->getNumOperands(); + + // Get the field count. + unsigned &Slot = Counts[Tag]; + if (!Slot) { + // Check the operand count to the field count + DICountAppMgr CTAM; + CTAM.ApplyToFields(DD); + Slot = CTAM.getCount(); + } + + // Field count must equal operand count. + if (Slot != N) { + delete DD; + return false; + } + + // Check each field for valid type. + DIVerifyAppMgr VRAM(*this, GV); + VRAM.ApplyToFields(DD); + + // Release empty DebugInfoDesc. + delete DD; + + // Return result of field tests. + return VRAM.isValid(); } //===----------------------------------------------------------------------===// MachineDebugInfo::MachineDebugInfo() -: CompileUnits() +: SR() +, DR() +, VR() +, CompileUnits() , Directories() , SourceFiles() , Lines() @@ -248,54 +766,56 @@ /// AnalyzeModule - Scan the module for global debug information. /// void MachineDebugInfo::AnalyzeModule(Module &M) { + SR.setModule(&M); + DR.setModule(&M); SetupCompileUnits(M); } /// SetupCompileUnits - Set up the unique vector of compile units. /// void MachineDebugInfo::SetupCompileUnits(Module &M) { + SR.setModule(&M); + DR.setModule(&M); // Get vector of all debug compile units. std::vector Globals = getGlobalVariablesUsing(M, "llvm.dbg.translation_units"); // Scan all compile unit globals. for (unsigned i = 0, N = Globals.size(); i < N; ++i) { - // Create wrapper for compile unit. - CompileUnitWrapper CUI(Globals[i]); - // Add to result. - if (CUI.isCorrectDebugVersion()) CompileUnits.insert(CUI); + // Add compile unit to result. + CompileUnits.insert( + static_cast(DR.Deserialize(Globals[i]))); } - - // If there any bad compile units then suppress debug information - if (CompileUnits.size() != Globals.size()) CompileUnits.reset(); } /// getCompileUnits - Return a vector of debug compile units. /// -const UniqueVector MachineDebugInfo::getCompileUnits()const{ +const UniqueVector MachineDebugInfo::getCompileUnits()const{ return CompileUnits; } -/// getGlobalVariables - Return a vector of debug global variables. +/// getGlobalVariables - Return a vector of debug GlobalVariables. /// -std::vector MachineDebugInfo::getGlobalVariables(Module &M) { +std::vector +MachineDebugInfo::getGlobalVariables(Module &M) { + SR.setModule(&M); + DR.setModule(&M); // Get vector of all debug global objects. std::vector Globals = getGlobalVariablesUsing(M, "llvm.dbg.globals"); - // Accumulation of global variables. - std::vector GlobalVariables; + // Accumulation of GlobalVariables. + std::vector GlobalVariables; -// FIXME - skip until globals have new format -#if 0 // Scan all globals. for (unsigned i = 0, N = Globals.size(); i < N; ++i) { - // Create wrapper for global. - GlobalWrapper GW(Globals[i]); - // If the global is a variable then add to result. - if (GW.getTag() == DW_TAG_variable) GlobalVariables.push_back(GW); + GlobalVariable *GV = Globals[i]; + if (DebugInfoDesc::TagFromGlobal(GV, true) == DI_TAG_global_variable) { + GlobalVariableDesc *GVD = + static_cast(DR.Deserialize(GV)); + GlobalVariables.push_back(GVD); + } } -#endif return GlobalVariables; } From alenhar2 at cs.uiuc.edu Mon Feb 6 11:15:29 2006 From: alenhar2 at cs.uiuc.edu (Andrew Lenharth) Date: Mon, 6 Feb 2006 11:15:29 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp Message-ID: <200602061715.LAA19779@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Alpha: AlphaAsmPrinter.cpp updated: 1.30 -> 1.31 --- Log message: see what this allignment thing will do --- Diffs of the changes: (+2 -1) AlphaAsmPrinter.cpp | 3 ++- 1 files changed, 2 insertions(+), 1 deletion(-) Index: llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp diff -u llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp:1.30 llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp:1.31 --- llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp:1.30 Sat Feb 4 13:13:09 2006 +++ llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp Mon Feb 6 11:15:17 2006 @@ -235,7 +235,8 @@ std::string name = Mang->getValueName(I); Constant *C = I->getInitializer(); unsigned Size = TD.getTypeSize(C->getType()); - unsigned Align = TD.getTypeAlignmentShift(C->getType()); + // unsigned Align = TD.getTypeAlignmentShift(C->getType()); + unsigned Align = getPreferredAlignmentLog(I); if (C->isNullValue() && (I->hasLinkOnceLinkage() || I->hasInternalLinkage() || From jlaskey at apple.com Mon Feb 6 13:12:14 2006 From: jlaskey at apple.com (Jim Laskey) Date: Mon, 6 Feb 2006 13:12:14 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineDebugInfo.h Message-ID: <200602061912.NAA20311@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineDebugInfo.h updated: 1.11 -> 1.12 --- Log message: Edit requests from Sabre. --- Diffs of the changes: (+27 -26) MachineDebugInfo.h | 53 +++++++++++++++++++++++++++-------------------------- 1 files changed, 27 insertions(+), 26 deletions(-) Index: llvm/include/llvm/CodeGen/MachineDebugInfo.h diff -u llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.11 llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.12 --- llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.11 Mon Feb 6 09:33:21 2006 +++ llvm/include/llvm/CodeGen/MachineDebugInfo.h Mon Feb 6 13:12:02 2006 @@ -11,7 +11,7 @@ // neutral form that can be used by different debugging schemes. // // The organization of information is primarily clustered around the source -// compile units. The main exception is source line coorespondence where +// compile units. The main exception is source line correspondence where // inlining may interleave code from various compile units. // // The following information can be retrieved from the MachineDebugInfo. @@ -23,7 +23,7 @@ // -- Source line coorespondence - A vector of file ID, line#, column# triples. // A DEBUG_LOCATION instruction is generated by the DAG Legalizer // corresponding to each entry in the source line list. This allows a debug -// emitter to generate labels referenced by degug information tables. +// emitter to generate labels referenced by debug information tables. // //===----------------------------------------------------------------------===// @@ -50,9 +50,12 @@ //===----------------------------------------------------------------------===// // Debug info constants. + +// Invalid result indicator. +#define DIINVALID (~0U) + enum { LLVMDebugVersion = 1, // Current version of debug information. - DIInvalid = ~0U, // Invalid result indicator. // DebugInfoDesc type identifying tags. // FIXME - Change over with gcc4. @@ -68,15 +71,15 @@ }; //===----------------------------------------------------------------------===// -/// DIApplyManager - Subclasses of this class apply steps to each of the fields -/// in the supplied DebugInfoDesc. -class DIApplyManager { +/// DIVisitor - Subclasses of this class apply steps to each of the fields in +/// the supplied DebugInfoDesc. +class DIVisitor { public: - DIApplyManager() {} - virtual ~DIApplyManager() {} + DIVisitor() {} + virtual ~DIVisitor() {} - /// ApplyToFields - Target the manager to each field of the debug information + /// ApplyToFields - Target the visitor to each field of the debug information /// descriptor. void ApplyToFields(DebugInfoDesc *DD); @@ -109,8 +112,8 @@ unsigned getTag() const { return Tag; } /// TagFromGlobal - Returns the Tag number from a debug info descriptor - /// GlobalVariable. - static unsigned TagFromGlobal(GlobalVariable *GV, bool Checking = false); + /// GlobalVariable. Return DIIValid if operand is not an unsigned int. + static unsigned TagFromGlobal(GlobalVariable *GV); /// DescFactory - Create an instance of debug info descriptor based on Tag. /// Return NULL if not a recognized Tag. @@ -125,9 +128,9 @@ //===--------------------------------------------------------------------===// // Subclasses should supply the following virtual methods. - /// ApplyToFields - Target the apply manager to the fields of the descriptor. + /// ApplyToFields - Target the vistor to the fields of the descriptor. /// - virtual void ApplyToFields(DIApplyManager *Mgr) = 0; + virtual void ApplyToFields(DIVisitor *Visitor) = 0; /// TypeString - Return a string used to compose globalnames and labels. /// @@ -181,13 +184,12 @@ } /// DebugVersionFromGlobal - Returns the version number from a compile unit - /// GlobalVariable. - static unsigned DebugVersionFromGlobal(GlobalVariable *GV, - bool Checking = false); - - /// ApplyToFields - Target the apply manager to the fields of the - /// CompileUnitDesc. - virtual void ApplyToFields(DIApplyManager *Mgr); + /// GlobalVariable. Return DIIValid if operand is not an unsigned int. + static unsigned DebugVersionFromGlobal(GlobalVariable *GV); + + /// ApplyToFields - Target the visitor to the fields of the CompileUnitDesc. + /// + virtual void ApplyToFields(DIVisitor *Visitor); /// TypeString - Return a string used to compose globalnames and labels. /// @@ -242,9 +244,9 @@ return D->getTag() == DI_TAG_global_variable; } - /// ApplyToFields - Target the apply manager to the fields of the + /// ApplyToFields - Target the visitor to the fields of the /// GlobalVariableDesc. - virtual void ApplyToFields(DIApplyManager *Mgr); + virtual void ApplyToFields(DIVisitor *Visitor); /// TypeString - Return a string used to compose globalnames and labels. /// @@ -295,9 +297,9 @@ return D->getTag() == DI_TAG_subprogram; } - /// ApplyToFields - Target the apply manager to the fields of the - /// SubprogramDesc. - virtual void ApplyToFields(DIApplyManager *Mgr); + /// ApplyToFields - Target the visitor to the fields of the SubprogramDesc. + /// + virtual void ApplyToFields(DIVisitor *Visitor); /// TypeString - Return a string used to compose globalnames and labels. /// @@ -453,7 +455,6 @@ /// class MachineDebugInfo : public ImmutablePass { private: - // Debug indforma // Use the same serializer/deserializer/verifier for the module. DISerializer SR; DIDeserializer DR; From jlaskey at apple.com Mon Feb 6 13:12:15 2006 From: jlaskey at apple.com (Jim Laskey) Date: Mon, 6 Feb 2006 13:12:15 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/MachineDebugInfo.cpp Message-ID: <200602061912.NAA20315@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: MachineDebugInfo.cpp updated: 1.10 -> 1.11 --- Log message: Edit requests from Sabre. --- Diffs of the changes: (+80 -85) MachineDebugInfo.cpp | 165 ++++++++++++++++++++++++--------------------------- 1 files changed, 80 insertions(+), 85 deletions(-) Index: llvm/lib/CodeGen/MachineDebugInfo.cpp diff -u llvm/lib/CodeGen/MachineDebugInfo.cpp:1.10 llvm/lib/CodeGen/MachineDebugInfo.cpp:1.11 --- llvm/lib/CodeGen/MachineDebugInfo.cpp:1.10 Mon Feb 6 09:33:21 2006 +++ llvm/lib/CodeGen/MachineDebugInfo.cpp Mon Feb 6 13:12:02 2006 @@ -125,9 +125,9 @@ return false; } -/// getGlobalValue - Return either a direct or cast Global value. +/// getGlobalVariable - Return either a direct or cast Global value. /// -static GlobalVariable *getGlobalValue(Value *V) { +static GlobalVariable *getGlobalVariable(Value *V) { if (GlobalVariable *GV = dyn_cast(V)) { return GV; } else if (ConstantExpr *CE = dyn_cast(V)) { @@ -138,9 +138,9 @@ return NULL; } -/// isGlobalValue - Return true if the given value can be coerced to a +/// isGlobalVariable - Return true if the given value can be coerced to a /// GlobalVariable. -static bool isGlobalValue(Value *V) { +static bool isGlobalVariable(Value *V) { if (isa(V) || isa(V)) { return true; } else if (ConstantExpr *CE = dyn_cast(V)) { @@ -151,33 +151,31 @@ return false; } -/// isUIntOperand - Return true if the ith operand is an unsigned integer. +/// getUIntOperand - Return ith operand if it is an unsigned integer. /// -static bool isUIntOperand(GlobalVariable *GV, unsigned i) { +static ConstantUInt *getUIntOperand(GlobalVariable *GV, unsigned i) { // Make sure the GlobalVariable has an initializer. - if (!GV->hasInitializer()) return false; + if (!GV->hasInitializer()) return NULL; // Get the initializer constant. ConstantStruct *CI = dyn_cast(GV->getInitializer()); - if (!CI) return false; + if (!CI) return NULL; // Check if there is at least i + 1 operands. unsigned N = CI->getNumOperands(); - if (i >= N) return false; + if (i >= N) return NULL; // Check constant. - return isa(CI->getOperand(i)); + return dyn_cast(CI->getOperand(i)); } //===----------------------------------------------------------------------===// /// TagFromGlobal - Returns the Tag number from a debug info descriptor /// GlobalVariable. -unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV, bool Checking) { - if (Checking && !isUIntOperand(GV, 0)) return DIInvalid; - ConstantStruct *CI = cast(GV->getInitializer()); - Constant *C = CI->getOperand(0); - return cast(C)->getValue(); +unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV) { + ConstantUInt *C = getUIntOperand(GV, 0); + return C ? C->getValue() : DIINVALID; } /// DescFactory - Create an instance of debug info descriptor based on Tag. @@ -194,21 +192,21 @@ //===----------------------------------------------------------------------===// -/// ApplyToFields - Target the manager to each field of the debug information +/// ApplyToFields - Target the visitor to each field of the debug information /// descriptor. -void DIApplyManager::ApplyToFields(DebugInfoDesc *DD) { +void DIVisitor::ApplyToFields(DebugInfoDesc *DD) { DD->ApplyToFields(this); } //===----------------------------------------------------------------------===// -/// DICountAppMgr - This DIApplyManager counts all the fields in the supplied -/// debug the supplied DebugInfoDesc. -class DICountAppMgr : public DIApplyManager { +/// DICountVisitor - This DIVisitor counts all the fields in the supplied debug +/// the supplied DebugInfoDesc. +class DICountVisitor : public DIVisitor { private: unsigned Count; // Running count of fields. public: - DICountAppMgr() : DIApplyManager(), Count(1) {} + DICountVisitor() : DIVisitor(), Count(1) {} // Accessors. unsigned getCount() const { return Count; } @@ -224,17 +222,17 @@ }; //===----------------------------------------------------------------------===// -/// DIDeserializeAppMgr - This DIApplyManager deserializes all the fields in -/// the supplied DebugInfoDesc. -class DIDeserializeAppMgr : public DIApplyManager { +/// DIDeserializeVisitor - This DIVisitor deserializes all the fields in the +/// supplied DebugInfoDesc. +class DIDeserializeVisitor : public DIVisitor { private: DIDeserializer &DR; // Active deserializer. unsigned I; // Current operand index. ConstantStruct *CI; // GlobalVariable constant initializer. public: - DIDeserializeAppMgr(DIDeserializer &D, GlobalVariable *GV) - : DIApplyManager() + DIDeserializeVisitor(DIDeserializer &D, GlobalVariable *GV) + : DIVisitor() , DR(D) , I(1) , CI(cast(GV->getInitializer())) @@ -264,21 +262,21 @@ } virtual void Apply(GlobalVariable *&Field) { Constant *C = CI->getOperand(I++); - Field = getGlobalValue(C); + Field = getGlobalVariable(C); } }; //===----------------------------------------------------------------------===// -/// DISerializeAppMgr - This DIApplyManager serializes all the fields in +/// DISerializeVisitor - This DIVisitor serializes all the fields in /// the supplied DebugInfoDesc. -class DISerializeAppMgr : public DIApplyManager { +class DISerializeVisitor : public DIVisitor { private: DISerializer &SR; // Active serializer. std::vector &Elements; // Element accumulator. public: - DISerializeAppMgr(DISerializer &S, std::vector &E) - : DIApplyManager() + DISerializeVisitor(DISerializer &S, std::vector &E) + : DIVisitor() , SR(S) , Elements(E) {} @@ -321,16 +319,16 @@ }; //===----------------------------------------------------------------------===// -/// DIGetTypesAppMgr - This DIApplyManager gathers all the field types in +/// DIGetTypesVisitor - This DIVisitor gathers all the field types in /// the supplied DebugInfoDesc. -class DIGetTypesAppMgr : public DIApplyManager { +class DIGetTypesVisitor : public DIVisitor { private: DISerializer &SR; // Active serializer. std::vector &Fields; // Type accumulator. public: - DIGetTypesAppMgr(DISerializer &S, std::vector &F) - : DIApplyManager() + DIGetTypesVisitor(DISerializer &S, std::vector &F) + : DIVisitor() , SR(S) , Fields(F) {} @@ -361,9 +359,9 @@ }; //===----------------------------------------------------------------------===// -/// DIVerifyAppMgr - This DIApplyManager verifies all the field types against +/// DIVerifyVisitor - This DIVisitor verifies all the field types against /// a constant initializer. -class DIVerifyAppMgr : public DIApplyManager { +class DIVerifyVisitor : public DIVisitor { private: DIVerifier &VR; // Active verifier. bool IsValid; // Validity status. @@ -371,8 +369,8 @@ ConstantStruct *CI; // GlobalVariable constant initializer. public: - DIVerifyAppMgr(DIVerifier &V, GlobalVariable *GV) - : DIApplyManager() + DIVerifyVisitor(DIVerifier &V, GlobalVariable *GV) + : DIVisitor() , VR(V) , IsValid(true) , I(1) @@ -404,11 +402,11 @@ virtual void Apply(DebugInfoDesc *&Field) { // FIXME - Prepare the correct descriptor. Constant *C = CI->getOperand(I++); - IsValid = IsValid && isGlobalValue(C); + IsValid = IsValid && isGlobalVariable(C); } virtual void Apply(GlobalVariable *&Field) { Constant *C = CI->getOperand(I++); - IsValid = IsValid && isGlobalValue(C); + IsValid = IsValid && isGlobalVariable(C); } }; @@ -416,23 +414,20 @@ /// DebugVersionFromGlobal - Returns the version number from a compile unit /// GlobalVariable. -unsigned CompileUnitDesc::DebugVersionFromGlobal(GlobalVariable *GV, - bool Checking) { - if (Checking && !isUIntOperand(GV, 1)) return DIInvalid; - ConstantStruct *CI = cast(GV->getInitializer()); - Constant *C = CI->getOperand(1); - return cast(C)->getValue(); +unsigned CompileUnitDesc::DebugVersionFromGlobal(GlobalVariable *GV) { + ConstantUInt *C = getUIntOperand(GV, 1); + return C ? C->getValue() : DIINVALID; } -/// ApplyToFields - Target the apply manager to the fields of the -/// CompileUnitDesc. -void CompileUnitDesc::ApplyToFields(DIApplyManager *Mgr) { - Mgr->Apply(DebugVersion); - Mgr->Apply(Language); - Mgr->Apply(FileName); - Mgr->Apply(Directory); - Mgr->Apply(Producer); - Mgr->Apply(TransUnit); +/// ApplyToFields - Target the visitor to the fields of the CompileUnitDesc. +/// +void CompileUnitDesc::ApplyToFields(DIVisitor *Visitor) { + Visitor->Apply(DebugVersion); + Visitor->Apply(Language); + Visitor->Apply(FileName); + Visitor->Apply(Directory); + Visitor->Apply(Producer); + Visitor->Apply(TransUnit); } /// TypeString - Return a string used to compose globalnames and labels. @@ -454,16 +449,16 @@ //===----------------------------------------------------------------------===// -/// ApplyToFields - Target the apply manager to the fields of the -/// GlobalVariableDesc. -void GlobalVariableDesc::ApplyToFields(DIApplyManager *Mgr) { - Mgr->Apply(Context); - Mgr->Apply(Name); - Mgr->Apply(TransUnit); - Mgr->Apply(TyDesc); - Mgr->Apply(IsStatic); - Mgr->Apply(IsDefinition); - Mgr->Apply(Global); +/// ApplyToFields - Target the visitor to the fields of the GlobalVariableDesc. +/// +void GlobalVariableDesc::ApplyToFields(DIVisitor *Visitor) { + Visitor->Apply(Context); + Visitor->Apply(Name); + Visitor->Apply(TransUnit); + Visitor->Apply(TyDesc); + Visitor->Apply(IsStatic); + Visitor->Apply(IsDefinition); + Visitor->Apply(Global); } /// TypeString - Return a string used to compose globalnames and labels. @@ -486,19 +481,19 @@ //===----------------------------------------------------------------------===// -/// ApplyToFields - Target the apply manager to the fields of the +/// ApplyToFields - Target the visitor to the fields of the /// SubprogramDesc. -void SubprogramDesc::ApplyToFields(DIApplyManager *Mgr) { - Mgr->Apply(Context); - Mgr->Apply(Name); - Mgr->Apply(TransUnit); - Mgr->Apply(TyDesc); - Mgr->Apply(IsStatic); - Mgr->Apply(IsDefinition); +void SubprogramDesc::ApplyToFields(DIVisitor *Visitor) { + Visitor->Apply(Context); + Visitor->Apply(Name); + Visitor->Apply(TransUnit); + Visitor->Apply(TyDesc); + Visitor->Apply(IsStatic); + Visitor->Apply(IsDefinition); // FIXME - Temp variable until restructured. GlobalVariable *Tmp; - Mgr->Apply(Tmp); + Visitor->Apply(Tmp); } /// TypeString - Return a string used to compose globalnames and labels. @@ -541,7 +536,7 @@ assert(Slot && "Unknown Tag"); // Deserialize the fields. - DIDeserializeAppMgr DRAM(*this, GV); + DIDeserializeVisitor DRAM(*this, GV); DRAM.ApplyToFields(Slot); return Slot; @@ -592,7 +587,7 @@ // Add tag field. Fields.push_back(Type::UIntTy); // Get types of remaining fields. - DIGetTypesAppMgr GTAM(*this, Fields); + DIGetTypesVisitor GTAM(*this, Fields); GTAM.ApplyToFields(DD); // Construct structured type. @@ -655,7 +650,7 @@ // Add Tag value. Elements.push_back(ConstantUInt::get(Type::UIntTy, Tag)); // Add remaining fields. - DISerializeAppMgr SRAM(*this, Elements); + DISerializeVisitor SRAM(*this, Elements); SRAM.ApplyToFields(DD); // Set the globals initializer. @@ -688,13 +683,13 @@ if (markVisited(GV)) return true; // Get the Tag - unsigned Tag = DebugInfoDesc::TagFromGlobal(GV, true); - if (Tag == DIInvalid) return false; + unsigned Tag = DebugInfoDesc::TagFromGlobal(GV); + if (Tag == DIINVALID) return false; // If a compile unit we need the debug version. if (Tag == DI_TAG_compile_unit) { - DebugVersion = CompileUnitDesc::DebugVersionFromGlobal(GV, true); - if (DebugVersion == DIInvalid) return false; + DebugVersion = CompileUnitDesc::DebugVersionFromGlobal(GV); + if (DebugVersion == DIINVALID) return false; } // Construct an empty DebugInfoDesc. @@ -711,7 +706,7 @@ unsigned &Slot = Counts[Tag]; if (!Slot) { // Check the operand count to the field count - DICountAppMgr CTAM; + DICountVisitor CTAM; CTAM.ApplyToFields(DD); Slot = CTAM.getCount(); } @@ -723,7 +718,7 @@ } // Check each field for valid type. - DIVerifyAppMgr VRAM(*this, GV); + DIVerifyVisitor VRAM(*this, GV); VRAM.ApplyToFields(DD); // Release empty DebugInfoDesc. @@ -810,7 +805,7 @@ // Scan all globals. for (unsigned i = 0, N = Globals.size(); i < N; ++i) { GlobalVariable *GV = Globals[i]; - if (DebugInfoDesc::TagFromGlobal(GV, true) == DI_TAG_global_variable) { + if (DebugInfoDesc::TagFromGlobal(GV) == DI_TAG_global_variable) { GlobalVariableDesc *GVD = static_cast(DR.Deserialize(GV)); GlobalVariables.push_back(GVD); From jlaskey at apple.com Mon Feb 6 15:54:18 2006 From: jlaskey at apple.com (Jim Laskey) Date: Mon, 6 Feb 2006 15:54:18 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/MachineDebugInfo.cpp Message-ID: <200602062154.PAA21522@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: MachineDebugInfo.cpp updated: 1.11 -> 1.12 --- Log message: Goodbye nasty macro. --- Diffs of the changes: (+4 -4) MachineDebugInfo.cpp | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) Index: llvm/lib/CodeGen/MachineDebugInfo.cpp diff -u llvm/lib/CodeGen/MachineDebugInfo.cpp:1.11 llvm/lib/CodeGen/MachineDebugInfo.cpp:1.12 --- llvm/lib/CodeGen/MachineDebugInfo.cpp:1.11 Mon Feb 6 13:12:02 2006 +++ llvm/lib/CodeGen/MachineDebugInfo.cpp Mon Feb 6 15:54:05 2006 @@ -175,7 +175,7 @@ /// GlobalVariable. unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV) { ConstantUInt *C = getUIntOperand(GV, 0); - return C ? C->getValue() : DIINVALID; + return C ? (unsigned)C->getValue() : (unsigned)DIInvalid; } /// DescFactory - Create an instance of debug info descriptor based on Tag. @@ -416,7 +416,7 @@ /// GlobalVariable. unsigned CompileUnitDesc::DebugVersionFromGlobal(GlobalVariable *GV) { ConstantUInt *C = getUIntOperand(GV, 1); - return C ? C->getValue() : DIINVALID; + return C ? (unsigned)C->getValue() : (unsigned)DIInvalid; } /// ApplyToFields - Target the visitor to the fields of the CompileUnitDesc. @@ -684,12 +684,12 @@ // Get the Tag unsigned Tag = DebugInfoDesc::TagFromGlobal(GV); - if (Tag == DIINVALID) return false; + if (Tag == DIInvalid) return false; // If a compile unit we need the debug version. if (Tag == DI_TAG_compile_unit) { DebugVersion = CompileUnitDesc::DebugVersionFromGlobal(GV); - if (DebugVersion == DIINVALID) return false; + if (DebugVersion == DIInvalid) return false; } // Construct an empty DebugInfoDesc. From jlaskey at apple.com Mon Feb 6 15:54:18 2006 From: jlaskey at apple.com (Jim Laskey) Date: Mon, 6 Feb 2006 15:54:18 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineDebugInfo.h Message-ID: <200602062154.PAA21520@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineDebugInfo.h updated: 1.12 -> 1.13 --- Log message: Goodbye nasty macro. --- Diffs of the changes: (+1 -3) MachineDebugInfo.h | 4 +--- 1 files changed, 1 insertion(+), 3 deletions(-) Index: llvm/include/llvm/CodeGen/MachineDebugInfo.h diff -u llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.12 llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.13 --- llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.12 Mon Feb 6 13:12:02 2006 +++ llvm/include/llvm/CodeGen/MachineDebugInfo.h Mon Feb 6 15:54:05 2006 @@ -51,11 +51,9 @@ //===----------------------------------------------------------------------===// // Debug info constants. -// Invalid result indicator. -#define DIINVALID (~0U) - enum { LLVMDebugVersion = 1, // Current version of debug information. + DIInvalid = ~0U, // Invalid result indicator. // DebugInfoDesc type identifying tags. // FIXME - Change over with gcc4. From lattner at cs.uiuc.edu Mon Feb 6 16:16:53 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 16:16:53 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/AsmPrinter.h Message-ID: <200602062216.QAA21795@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: AsmPrinter.h updated: 1.28 -> 1.29 --- Log message: Change the prototype of PrintAsmOperand --- Diffs of the changes: (+1 -1) AsmPrinter.h | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/include/llvm/CodeGen/AsmPrinter.h diff -u llvm/include/llvm/CodeGen/AsmPrinter.h:1.28 llvm/include/llvm/CodeGen/AsmPrinter.h:1.29 --- llvm/include/llvm/CodeGen/AsmPrinter.h:1.28 Sat Feb 4 19:24:06 2006 +++ llvm/include/llvm/CodeGen/AsmPrinter.h Mon Feb 6 16:16:41 2006 @@ -196,7 +196,7 @@ /// overried this to format as appropriate. This method can return true if /// the operand is erroneous. virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant); + unsigned AsmVariant, const char *ExtraCode); /// SetupMachineFunction - This should be called when a new MachineFunction /// is being processed from runOnMachineFunction. From lattner at cs.uiuc.edu Mon Feb 6 16:17:35 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 16:17:35 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/AsmPrinter.cpp Message-ID: <200602062217.QAA21858@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: AsmPrinter.cpp updated: 1.47 -> 1.48 --- Log message: Add support for modifier characters to operand printers --- Diffs of the changes: (+21 -2) AsmPrinter.cpp | 23 +++++++++++++++++++++-- 1 files changed, 21 insertions(+), 2 deletions(-) Index: llvm/lib/CodeGen/AsmPrinter.cpp diff -u llvm/lib/CodeGen/AsmPrinter.cpp:1.47 llvm/lib/CodeGen/AsmPrinter.cpp:1.48 --- llvm/lib/CodeGen/AsmPrinter.cpp:1.47 Sat Feb 4 19:46:49 2006 +++ llvm/lib/CodeGen/AsmPrinter.cpp Mon Feb 6 16:17:23 2006 @@ -538,7 +538,23 @@ } LastEmitted = IDEnd; + char Modifier[2] = { 0, 0 }; + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) { + std::cerr << "Bad ${:} expression in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + if (*LastEmitted != '}') { std::cerr << "Bad ${} expression in inline asm string: '" << AsmStr << "'\n"; @@ -553,11 +569,14 @@ exit(1); } + char ExtraCode = 0; // FIXME: + // Okay, we finally have an operand number. Ask the target to print this // operand! if (CurVariant == -1 || CurVariant == AsmPrinterVariant) if (const_cast(this)-> - PrintAsmOperand(MI, Val+1, AsmPrinterVariant)) { + PrintAsmOperand(MI, Val+1, AsmPrinterVariant, + Modifier[0] ? Modifier : 0)) { std::cerr << "Invalid operand found in inline asm: '" << AsmStr << "'\n"; MI->dump(); @@ -601,7 +620,7 @@ /// instruction, using the specified assembler variant. Targets should /// overried this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant) { + unsigned AsmVariant, const char *ExtraCode) { // Target doesn't support this yet! return true; } From lattner at cs.uiuc.edu Mon Feb 6 16:18:31 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 16:18:31 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Message-ID: <200602062218.QAA21945@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCAsmPrinter.cpp updated: 1.146 -> 1.147 --- Log message: Change prototype --- Diffs of the changes: (+1 -1) PPCAsmPrinter.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp diff -u llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.146 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.147 --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.146 Mon Feb 6 08:16:15 2006 +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Mon Feb 6 16:18:19 2006 @@ -96,7 +96,7 @@ } bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant) { + unsigned AsmVariant, const char *ExtraCode) { printOperand(MI, OpNo); return false; } From lattner at cs.uiuc.edu Mon Feb 6 16:43:40 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 16:43:40 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/AsmWriterEmitter.cpp Message-ID: <200602062243.QAA22186@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: AsmWriterEmitter.cpp updated: 1.25 -> 1.26 --- Log message: Simplify the variant handling code, no functionality change. --- Diffs of the changes: (+17 -34) AsmWriterEmitter.cpp | 51 +++++++++++++++++---------------------------------- 1 files changed, 17 insertions(+), 34 deletions(-) Index: llvm/utils/TableGen/AsmWriterEmitter.cpp diff -u llvm/utils/TableGen/AsmWriterEmitter.cpp:1.25 llvm/utils/TableGen/AsmWriterEmitter.cpp:1.26 --- llvm/utils/TableGen/AsmWriterEmitter.cpp:1.25 Wed Feb 1 13:12:23 2006 +++ llvm/utils/TableGen/AsmWriterEmitter.cpp Mon Feb 6 16:43:28 2006 @@ -94,7 +94,7 @@ /// AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) { this->CGI = &CGI; - bool inVariant = false; // True if we are inside a {.|.|.} region. + unsigned CurVariant = ~0U; // ~0 if we are outside a {.|.|.} region, other #. // NOTE: Any extensions to this code need to be mirrored in the // AsmPrinter::printInlineAsm code that executes as compile time (assuming @@ -109,50 +109,32 @@ // Emit a constant string fragment. if (DollarPos != LastEmitted) { // TODO: this should eventually handle escaping. - AddLiteralString(std::string(AsmString.begin()+LastEmitted, - AsmString.begin()+DollarPos)); + if (CurVariant == Variant || CurVariant == ~0U) + AddLiteralString(std::string(AsmString.begin()+LastEmitted, + AsmString.begin()+DollarPos)); LastEmitted = DollarPos; } else if (AsmString[DollarPos] == '{') { - if (inVariant) + if (CurVariant != ~0U) throw "Nested variants found for instruction '" + CGI.TheDef->getName() + "'!"; LastEmitted = DollarPos+1; - inVariant = true; // We are now inside of the variant! - for (unsigned i = 0; i != Variant; ++i) { - // Skip over all of the text for an irrelevant variant here. The - // next variant starts at |, or there may not be text for this - // variant if we see a }. - std::string::size_type NP = - AsmString.find_first_of("|}", LastEmitted); - if (NP == std::string::npos) - throw "Incomplete variant for instruction '" + - CGI.TheDef->getName() + "'!"; - LastEmitted = NP+1; - if (AsmString[NP] == '}') { - inVariant = false; // No text for this variant. - break; - } - } + CurVariant = 0; // We are now inside of the variant! } else if (AsmString[DollarPos] == '|') { - if (!inVariant) + if (CurVariant == ~0U) throw "'|' character found outside of a variant in instruction '" + CGI.TheDef->getName() + "'!"; - // Move to the end of variant list. - std::string::size_type NP = AsmString.find('}', LastEmitted); - if (NP == std::string::npos) - throw "Incomplete variant for instruction '" + - CGI.TheDef->getName() + "'!"; - LastEmitted = NP+1; - inVariant = false; + ++CurVariant; + ++LastEmitted; } else if (AsmString[DollarPos] == '}') { - if (!inVariant) + if (CurVariant == ~0U) throw "'}' character found outside of a variant in instruction '" + CGI.TheDef->getName() + "'!"; - LastEmitted = DollarPos+1; - inVariant = false; + ++LastEmitted; + CurVariant = ~0U; } else if (DollarPos+1 != AsmString.size() && AsmString[DollarPos+1] == '$') { - AddLiteralString("$"); // "$$" -> $ + if (CurVariant == Variant || CurVariant == ~0U) + AddLiteralString("$"); // "$$" -> $ LastEmitted = DollarPos+2; } else { // Get the name of the variable. @@ -181,7 +163,7 @@ throw "Reached end of string before terminating curly brace in '" + CGI.TheDef->getName() + "'"; if (AsmString[VarEnd] != '}') - throw "Variant name beginning with '{' did not end with '}' in '" + throw "Variable name beginning with '{' did not end with '}' in '" + CGI.TheDef->getName() + "'"; ++VarEnd; } @@ -202,7 +184,8 @@ --MIOp; } - Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp)); + if (CurVariant == Variant || CurVariant == ~0U) + Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp)); LastEmitted = VarEnd; } } From lattner at cs.uiuc.edu Mon Feb 6 17:41:00 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 17:41:00 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/AsmWriterEmitter.cpp Message-ID: <200602062341.RAA22636@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: AsmWriterEmitter.cpp updated: 1.26 -> 1.27 --- Log message: Add support for modifier strings in machine instr descriptions. This allows us to avoid creating lots of "Operand" types with different printers, instead we can fold several together and use modifiers. For example, we can now use: ${target:call} to say that the operand should be printed like a 'call' operand. --- Diffs of the changes: (+38 -6) AsmWriterEmitter.cpp | 44 ++++++++++++++++++++++++++++++++++++++------ 1 files changed, 38 insertions(+), 6 deletions(-) Index: llvm/utils/TableGen/AsmWriterEmitter.cpp diff -u llvm/utils/TableGen/AsmWriterEmitter.cpp:1.26 llvm/utils/TableGen/AsmWriterEmitter.cpp:1.27 --- llvm/utils/TableGen/AsmWriterEmitter.cpp:1.26 Mon Feb 6 16:43:28 2006 +++ llvm/utils/TableGen/AsmWriterEmitter.cpp Mon Feb 6 17:40:48 2006 @@ -37,17 +37,23 @@ /// MiOpNo - For isMachineInstrOperand, this is the operand number of the /// machine instruction. unsigned MIOpNo; + + /// MiModifier - For isMachineInstrOperand, this is the modifier string for + /// an operand, specified with syntax like ${opname:modifier}. + std::string MiModifier; AsmWriterOperand(const std::string &LitStr) : OperandType(isLiteralTextOperand), Str(LitStr) {} - AsmWriterOperand(const std::string &Printer, unsigned OpNo) - : OperandType(isMachineInstrOperand), Str(Printer), MIOpNo(OpNo) {} + AsmWriterOperand(const std::string &Printer, unsigned OpNo, + const std::string &Modifier) + : OperandType(isMachineInstrOperand), Str(Printer), MIOpNo(OpNo), + MiModifier(Modifier) {} bool operator!=(const AsmWriterOperand &Other) const { if (OperandType != Other.OperandType || Str != Other.Str) return true; if (OperandType == isMachineInstrOperand) - return MIOpNo != Other.MIOpNo; + return MIOpNo != Other.MIOpNo || MiModifier != Other.MiModifier; return false; } bool operator==(const AsmWriterOperand &Other) const { @@ -84,8 +90,12 @@ void AsmWriterOperand::EmitCode(std::ostream &OS) const { if (OperandType == isLiteralTextOperand) OS << "O << \"" << Str << "\"; "; - else - OS << Str << "(MI, " << MIOpNo << "); "; + else { + OS << Str << "(MI, " << MIOpNo; + if (!MiModifier.empty()) + OS << ", \"" << MiModifier << '"'; + OS << "); "; + } } @@ -155,6 +165,10 @@ std::string VarName(AsmString.begin()+DollarPos+1, AsmString.begin()+VarEnd); + // Modifier - Support ${foo:modifier} syntax, where "modifier" is passed + // into printOperand. + std::string Modifier; + // In order to avoid starting the next string at the terminating curly // brace, advance the end position past it if we found an opening curly // brace. @@ -162,6 +176,23 @@ if (VarEnd >= AsmString.size()) throw "Reached end of string before terminating curly brace in '" + CGI.TheDef->getName() + "'"; + + // Look for a modifier string. + if (AsmString[VarEnd] == ':') { + ++VarEnd; + if (VarEnd >= AsmString.size()) + throw "Reached end of string before terminating curly brace in '" + + CGI.TheDef->getName() + "'"; + + unsigned ModifierStart = VarEnd; + while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd])) + ++VarEnd; + Modifier = std::string(AsmString.begin()+ModifierStart, + AsmString.begin()+VarEnd); + if (Modifier.empty()) + throw "Bad operand modifier name in '"+ CGI.TheDef->getName() + "'"; + } + if (AsmString[VarEnd] != '}') throw "Variable name beginning with '{' did not end with '}' in '" + CGI.TheDef->getName() + "'"; @@ -185,7 +216,8 @@ } if (CurVariant == Variant || CurVariant == ~0U) - Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp)); + Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp, + Modifier)); LastEmitted = VarEnd; } } From lattner at cs.uiuc.edu Mon Feb 6 17:41:31 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 17:41:31 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ATTAsmPrinter.cpp X86ATTAsmPrinter.h X86InstrInfo.td X86IntelAsmPrinter.cpp X86IntelAsmPrinter.h Message-ID: <200602062341.RAA22679@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ATTAsmPrinter.cpp updated: 1.20 -> 1.21 X86ATTAsmPrinter.h updated: 1.5 -> 1.6 X86InstrInfo.td updated: 1.230 -> 1.231 X86IntelAsmPrinter.cpp updated: 1.14 -> 1.15 X86IntelAsmPrinter.h updated: 1.6 -> 1.7 --- Log message: Eliminate the printCallOperand method, using a 'call' modifier on printOperand instead. --- Diffs of the changes: (+25 -32) X86ATTAsmPrinter.cpp | 17 +++++++++++------ X86ATTAsmPrinter.h | 11 +++-------- X86InstrInfo.td | 8 ++------ X86IntelAsmPrinter.cpp | 10 +++++----- X86IntelAsmPrinter.h | 11 ++++------- 5 files changed, 25 insertions(+), 32 deletions(-) Index: llvm/lib/Target/X86/X86ATTAsmPrinter.cpp diff -u llvm/lib/Target/X86/X86ATTAsmPrinter.cpp:1.20 llvm/lib/Target/X86/X86ATTAsmPrinter.cpp:1.21 --- llvm/lib/Target/X86/X86ATTAsmPrinter.cpp:1.20 Wed Jan 25 20:27:43 2006 +++ llvm/lib/Target/X86/X86ATTAsmPrinter.cpp Mon Feb 6 17:41:19 2006 @@ -63,7 +63,9 @@ return false; } -void X86ATTAsmPrinter::printOp(const MachineOperand &MO, bool isCallOp) { +void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNo); const MRegisterInfo &RI = *TM.getRegisterInfo(); switch (MO.getType()) { case MachineOperand::MO_VirtualRegister: @@ -92,6 +94,7 @@ abort (); return; case MachineOperand::MO_GlobalAddress: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); // Darwin block shameless ripped from PowerPCAsmPrinter.cpp if (forDarwin) { if (!isCallOp) O << '$'; @@ -132,7 +135,8 @@ O << Offset; return; } - case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_ExternalSymbol: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); if (isCallOp && forDarwin) { std::string Name(GlobalPrefix); Name += MO.getSymbolName(); FnStubs.insert(Name); @@ -142,6 +146,7 @@ if (!isCallOp) O << '$'; O << GlobalPrefix << MO.getSymbolName(); return; + } default: O << ""; return; } @@ -183,7 +188,7 @@ O << "+" << DispSpec.getImmedValue(); if (IndexReg.getReg()) { O << "(,"; - printOp(IndexReg); + printOperand(MI, Op+2); if (ScaleVal != 1) O << "," << ScaleVal; O << ")"; @@ -192,7 +197,7 @@ } if (DispSpec.isGlobalAddress()) { - printOp(DispSpec, true); + printOperand(MI, Op+3, "call"); } else { int DispVal = DispSpec.getImmedValue(); if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) @@ -202,11 +207,11 @@ if (IndexReg.getReg() || BaseReg.getReg()) { O << "("; if (BaseReg.getReg()) - printOp(BaseReg); + printOperand(MI, Op); if (IndexReg.getReg()) { O << ","; - printOp(IndexReg); + printOperand(MI, Op+2); if (ScaleVal != 1) O << "," << ScaleVal; } Index: llvm/lib/Target/X86/X86ATTAsmPrinter.h diff -u llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.5 llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.6 --- llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.5 Tue Jan 31 16:28:30 2006 +++ llvm/lib/Target/X86/X86ATTAsmPrinter.h Mon Feb 6 17:41:19 2006 @@ -34,13 +34,9 @@ /// returns false. bool printInstruction(const MachineInstr *MI); - // This method is used by the tablegen'erated instruction printer. - void printOperand(const MachineInstr *MI, unsigned OpNo){ - printOp(MI->getOperand(OpNo)); - } - void printCallOperand(const MachineInstr *MI, unsigned OpNo) { - printOp(MI->getOperand(OpNo), true); // Don't print '$' prefix. - } + // These methods are used by the tablegen'erated instruction printer. + void printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier = 0); void printi8mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } @@ -64,7 +60,6 @@ } void printMachineInstruction(const MachineInstr *MI); - void printOp(const MachineOperand &MO, bool isCallOperand = false); void printSSECC(const MachineInstr *MI, unsigned Op); void printMemReference(const MachineInstr *MI, unsigned Op); bool runOnMachineFunction(MachineFunction &F); Index: llvm/lib/Target/X86/X86InstrInfo.td diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.230 llvm/lib/Target/X86/X86InstrInfo.td:1.231 --- llvm/lib/Target/X86/X86InstrInfo.td:1.230 Fri Feb 3 20:23:01 2006 +++ llvm/lib/Target/X86/X86InstrInfo.td Mon Feb 6 17:41:19 2006 @@ -159,10 +159,6 @@ // 32-bits but only 8 bits are significant. def i32i8imm : Operand; -// PCRelative calls need special operand formatting. -let PrintMethod = "printCallOperand" in - def calltarget : Operand; - // Branch targets have OtherVT type. def brtarget : Operand; @@ -516,7 +512,7 @@ // All calls clobber the non-callee saved registers... let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in { - def CALLpcrel32 : I<0xE8, RawFrm, (ops calltarget:$dst), "call $dst", + def CALLpcrel32 : I<0xE8, RawFrm, (ops i32imm:$dst), "call ${dst:call}", []>; def CALL32r : I<0xFF, MRM2r, (ops R32:$dst), "call {*}$dst", [(X86call R32:$dst)]>; @@ -526,7 +522,7 @@ // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, noResults = 1 in - def TAILJMPd : IBr<0xE9, (ops calltarget:$dst), "jmp $dst # TAIL CALL", []>; + def TAILJMPd : IBr<0xE9, (ops i32imm:$dst), "jmp ${dst:call} # TAIL CALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, noResults = 1 in def TAILJMPr : I<0xFF, MRM4r, (ops R32:$dst), "jmp {*}$dst # TAIL CALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, noResults = 1 in Index: llvm/lib/Target/X86/X86IntelAsmPrinter.cpp diff -u llvm/lib/Target/X86/X86IntelAsmPrinter.cpp:1.14 llvm/lib/Target/X86/X86IntelAsmPrinter.cpp:1.15 --- llvm/lib/Target/X86/X86IntelAsmPrinter.cpp:1.14 Sun Jan 22 17:37:17 2006 +++ llvm/lib/Target/X86/X86IntelAsmPrinter.cpp Mon Feb 6 17:41:19 2006 @@ -74,8 +74,8 @@ } } -void X86IntelAsmPrinter::printOp(const MachineOperand &MO, - bool elideOffsetKeyword /* = false */) { +void X86IntelAsmPrinter::printOp(const MachineOperand &MO, + const char *Modifier) { const MRegisterInfo &RI = *TM.getRegisterInfo(); switch (MO.getType()) { case MachineOperand::MO_VirtualRegister: @@ -109,7 +109,7 @@ abort (); return; case MachineOperand::MO_GlobalAddress: { - if (!elideOffsetKeyword) + if (!Modifier || strcmp(Modifier, "call")) O << "OFFSET "; O << Mang->getValueName(MO.getGlobal()); int Offset = MO.getOffset(); @@ -161,7 +161,7 @@ O << "["; bool NeedPlus = false; if (BaseReg.getReg()) { - printOp(BaseReg, true); + printOp(BaseReg, "call"); NeedPlus = true; } @@ -176,7 +176,7 @@ if (DispSpec.isGlobalAddress()) { if (NeedPlus) O << " + "; - printOp(DispSpec, true); + printOp(DispSpec, "call"); } else { int DispVal = DispSpec.getImmedValue(); if (DispVal || (!BaseReg.getReg() && !IndexReg.getReg())) { Index: llvm/lib/Target/X86/X86IntelAsmPrinter.h diff -u llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.6 llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.7 --- llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.6 Tue Jan 31 16:28:30 2006 +++ llvm/lib/Target/X86/X86IntelAsmPrinter.h Mon Feb 6 17:41:19 2006 @@ -37,21 +37,18 @@ bool printInstruction(const MachineInstr *MI); // This method is used by the tablegen'erated instruction printer. - void printOperand(const MachineInstr *MI, unsigned OpNo){ + void printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier = 0) { const MachineOperand &MO = MI->getOperand(OpNo); if (MO.getType() == MachineOperand::MO_MachineRegister) { assert(MRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physref??"); // Bug Workaround: See note in Printer::doInitialization about %. O << "%" << TM.getRegisterInfo()->get(MO.getReg()).Name; } else { - printOp(MO); + printOp(MO, Modifier); } } - void printCallOperand(const MachineInstr *MI, unsigned OpNo) { - printOp(MI->getOperand(OpNo), true); // Don't print "OFFSET". - } - void printi8mem(const MachineInstr *MI, unsigned OpNo) { O << "BYTE PTR "; printMemReference(MI, OpNo); @@ -82,7 +79,7 @@ } void printMachineInstruction(const MachineInstr *MI); - void printOp(const MachineOperand &MO, bool elideOffsetKeyword = false); + void printOp(const MachineOperand &MO, const char *Modifier = 0); void printSSECC(const MachineInstr *MI, unsigned Op); void printMemReference(const MachineInstr *MI, unsigned Op); bool runOnMachineFunction(MachineFunction &F); From evan.cheng at apple.com Mon Feb 6 18:37:52 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 6 Feb 2006 18:37:52 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/DAGISelEmitter.cpp DAGISelEmitter.h Message-ID: <200602070037.SAA22878@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: DAGISelEmitter.cpp updated: 1.168 -> 1.169 DAGISelEmitter.h updated: 1.55 -> 1.56 --- Log message: Hoist all SDOperand declarations within a Select_{opcode}() to the top level to reduce stack memory usage. This is intended to work around the gcc bug. --- Diffs of the changes: (+67 -34) DAGISelEmitter.cpp | 95 +++++++++++++++++++++++++++++++++++------------------ DAGISelEmitter.h | 6 +-- 2 files changed, 67 insertions(+), 34 deletions(-) Index: llvm/utils/TableGen/DAGISelEmitter.cpp diff -u llvm/utils/TableGen/DAGISelEmitter.cpp:1.168 llvm/utils/TableGen/DAGISelEmitter.cpp:1.169 --- llvm/utils/TableGen/DAGISelEmitter.cpp:1.168 Mon Feb 6 02:12:55 2006 +++ llvm/utils/TableGen/DAGISelEmitter.cpp Mon Feb 6 18:37:41 2006 @@ -1854,6 +1854,9 @@ /// tested, and if true, the match fails) [when true] or normal code to emit /// [when false]. std::vector > &GeneratedCode; + /// GeneratedDecl - This is the set of all SDOperand declarations needed for + /// the set of patterns for each top-level opcode. + std::set &GeneratedDecl; std::string ChainName; bool DoReplace; @@ -1867,12 +1870,18 @@ if (!S.empty()) GeneratedCode.push_back(std::make_pair(false, S)); } + void emitDecl(const std::string &S) { + assert(!S.empty() && "Invalid declaration"); + GeneratedDecl.insert(S); + } public: PatternCodeEmitter(DAGISelEmitter &ise, ListInit *preds, TreePatternNode *pattern, TreePatternNode *instr, - std::vector > &gc, bool dorep) + std::vector > &gc, + std::set &gd, + bool dorep) : ISE(ise), Predicates(preds), Pattern(pattern), Instruction(instr), - GeneratedCode(gc), DoReplace(dorep), TmpNo(0) {} + GeneratedCode(gc), GeneratedDecl(gd), DoReplace(dorep), TmpNo(0) {} /// EmitMatchCode - Emit a matcher for N, going to the label for PatternNo /// if the match fails. At this point, we already know that the opcode for N @@ -2002,7 +2011,8 @@ else FoundChain = true; ChainName = "Chain" + ChainSuffix; - emitCode("SDOperand " + ChainName + " = " + RootName + + emitDecl(ChainName); + emitCode(ChainName + " = " + RootName + ".getOperand(0);"); } } @@ -2030,7 +2040,8 @@ } for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i, ++OpNo) { - emitCode("SDOperand " + RootName + utostr(OpNo) + " = " + + emitDecl(RootName + utostr(OpNo)); + emitCode(RootName + utostr(OpNo) + " = " + RootName + ".getOperand(" +utostr(OpNo) + ");"); TreePatternNode *Child = N->getChild(i); @@ -2137,44 +2148,49 @@ } emitCode(CastType + " Tmp" + utostr(ResNo) + "C = (" + CastType + ")cast(" + Val + ")->getValue();"); - emitCode("SDOperand Tmp" + utostr(ResNo) + + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = CurDAG->getTargetConstant(Tmp" + utostr(ResNo) + "C, MVT::" + getEnumName(N->getTypeNum(0)) + ");"); } else if (!N->isLeaf() && N->getOperator()->getName() == "texternalsym"){ Record *Op = OperatorMap[N->getName()]; // Transform ExternalSymbol to TargetExternalSymbol if (Op && Op->getName() == "externalsym") { - emitCode("SDOperand Tmp" + utostr(ResNo) + " = CurDAG->getTarget" + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = CurDAG->getTarget" "ExternalSymbol(cast(" + Val + ")->getSymbol(), MVT::" + getEnumName(N->getTypeNum(0)) + ");"); } else { - emitCode("SDOperand Tmp" + utostr(ResNo) + " = " + Val + ";"); + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = " + Val + ";"); } } else if (!N->isLeaf() && N->getOperator()->getName() == "tglobaladdr") { Record *Op = OperatorMap[N->getName()]; // Transform GlobalAddress to TargetGlobalAddress if (Op && Op->getName() == "globaladdr") { - emitCode("SDOperand Tmp" + utostr(ResNo) + " = CurDAG->getTarget" + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = CurDAG->getTarget" "GlobalAddress(cast(" + Val + ")->getGlobal(), MVT::" + getEnumName(N->getTypeNum(0)) + ");"); } else { - emitCode("SDOperand Tmp" + utostr(ResNo) + " = " + Val + ";"); + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = " + Val + ";"); } } else if (!N->isLeaf() && N->getOperator()->getName() == "texternalsym"){ - emitCode("SDOperand Tmp" + utostr(ResNo) + " = " + Val + ";"); + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = " + Val + ";"); } else if (!N->isLeaf() && N->getOperator()->getName() == "tconstpool") { - emitCode("SDOperand Tmp" + utostr(ResNo) + " = " + Val + ";"); + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = " + Val + ";"); } else if (N->isLeaf() && (CP = NodeGetComplexPattern(N, ISE))) { std::string Fn = CP->getSelectFunc(); NumRes = CP->getNumOperands(); - std::string Code = "SDOperand "; - for (unsigned i = 0; i < NumRes - 1; ++i) - Code += "Tmp" + utostr(i+ResNo) + ", "; - emitCode(Code + "Tmp" + utostr(NumRes - 1 + ResNo) + ";"); + for (unsigned i = 0; i < NumRes; ++i) + emitDecl("Tmp" + utostr(i+ResNo)); - Code = Fn + "(" + Val; + std::string Code = Fn + "(" + Val; for (unsigned i = 0; i < NumRes; i++) Code += ", Tmp" + utostr(i + ResNo); emitCheck(Code + ")"); @@ -2185,7 +2201,8 @@ TmpNo = ResNo + NumRes; } else { - emitCode("SDOperand Tmp" + utostr(ResNo) + " = Select(" + Val + ");"); + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = Select(" + Val + ");"); } // Add Tmp to VariableMap, so that we don't multiply select this // value if used multiple times by this pattern result. @@ -2198,7 +2215,8 @@ if (DefInit *DI = dynamic_cast(N->getLeafValue())) { unsigned ResNo = TmpNo++; if (DI->getDef()->isSubClassOf("Register")) { - emitCode("SDOperand Tmp" + utostr(ResNo) + " = CurDAG->getRegister(" + + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = CurDAG->getRegister(" + ISE.getQualifiedName(DI->getDef()) + ", MVT::" + getEnumName(N->getTypeNum(0)) + ");"); return std::make_pair(1, ResNo); @@ -2206,7 +2224,8 @@ } else if (IntInit *II = dynamic_cast(N->getLeafValue())) { unsigned ResNo = TmpNo++; assert(N->getExtTypes().size() == 1 && "Multiple types not handled!"); - emitCode("SDOperand Tmp" + utostr(ResNo) + + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = CurDAG->getTargetConstant(" + itostr(II->getValue()) + ", MVT::" + getEnumName(N->getTypeNum(0)) + ");"); return std::make_pair(1, ResNo); @@ -2236,7 +2255,7 @@ (isRoot && PatternHasProperty(Pattern, SDNodeInfo::SDNPHasChain, ISE)); if (HasInFlag || NodeHasOutFlag || HasOptInFlag || HasImpInputs) - emitCode("SDOperand InFlag = SDOperand(0, 0);"); + emitDecl("InFlag"); if (HasOptInFlag) emitCode("bool HasOptInFlag = false;"); @@ -2289,8 +2308,9 @@ unsigned NumResults = Inst.getNumResults(); unsigned ResNo = TmpNo++; if (!isRoot) { + emitDecl("Tmp" + utostr(ResNo)); std::string Code = - "SDOperand Tmp" + utostr(ResNo) + " = CurDAG->getTargetNode(" + + "Tmp" + utostr(ResNo) + " = CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); if (N->getTypeNum(0) != MVT::isVoid) Code += ", MVT::" + getEnumName(N->getTypeNum(0)); @@ -2309,8 +2329,9 @@ utostr(NumResults) + ");"); } } else if (HasChain || NodeHasOutFlag) { + emitDecl("Result"); if (HasOptInFlag) { - emitCode("SDOperand Result = SDOperand(0, 0);"); + emitCode("Result = SDOperand(0, 0);"); unsigned FlagNo = (unsigned) NodeHasChain + Pattern->getNumChildren(); emitCode("if (HasOptInFlag)"); std::string Code = " Result = CurDAG->getTargetNode(" + @@ -2352,7 +2373,7 @@ if (HasChain) Code += ", " + ChainName + ");"; emitCode(Code); } else { - std::string Code = "SDOperand Result = CurDAG->getTargetNode(" + + std::string Code = "Result = CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); // Output order: results, chain, flags @@ -2469,7 +2490,8 @@ assert(N->getNumChildren() == 1 && "node xform should have one child!"); unsigned OpVal = EmitResultCode(N->getChild(0)).second; unsigned ResNo = TmpNo++; - emitCode("SDOperand Tmp" + utostr(ResNo) + " = Transform_" + Op->getName() + emitDecl("Tmp" + utostr(ResNo)); + emitCode("Tmp" + utostr(ResNo) + " = Transform_" + Op->getName() + "(Tmp" + utostr(OpVal) + ".Val);"); if (isRoot) { emitCode("CodeGenMap[N] = Tmp" +utostr(ResNo) + ";"); @@ -2537,11 +2559,12 @@ emitCode("InFlag = Select(" + RootName + utostr(OpNo) + ");"); } else { if (!ChainEmitted) { - emitCode("SDOperand Chain = CurDAG->getEntryNode();"); + emitDecl("Chain"); + emitCode("Chain = CurDAG->getEntryNode();"); ChainName = "Chain"; ChainEmitted = true; } - emitCode("SDOperand " + RootName + "CR" + utostr(i) + ";"); + emitDecl(RootName + "CR" + utostr(i)); emitCode(RootName + "CR" + utostr(i) + " = CurDAG->getCopyToReg(" + ChainName + ", CurDAG->getRegister(" + ISE.getQualifiedName(RR) + @@ -2590,7 +2613,8 @@ MVT::ValueType RVT = getRegisterValueType(RR, CGT); if (RVT != MVT::Flag) { if (!ChainEmitted) { - emitCode("SDOperand Chain = CurDAG->getEntryNode();"); + emitDecl("Chain"); + emitCode("Chain = CurDAG->getEntryNode();"); ChainEmitted = true; ChainName = "Chain"; } @@ -2613,10 +2637,11 @@ /// succeeds. Returns true if the pattern is not guaranteed to match. void DAGISelEmitter::GenerateCodeForPattern(PatternToMatch &Pattern, std::vector > &GeneratedCode, + std::set &GeneratedDecl, bool DoReplace) { PatternCodeEmitter Emitter(*this, Pattern.getPredicates(), Pattern.getSrcPattern(), Pattern.getDstPattern(), - GeneratedCode, DoReplace); + GeneratedCode, GeneratedDecl, DoReplace); // Emit the matcher, capturing named arguments in VariableMap. bool FoundChain = false; @@ -2870,13 +2895,16 @@ // cost one is at the start. std::stable_sort(Patterns.begin(), Patterns.end(), PatternSortingPredicate(*this)); - + typedef std::vector > CodeList; + typedef std::set DeclSet; std::vector > CodeForPatterns; + std::set GeneratedDecl; for (unsigned i = 0, e = Patterns.size(); i != e; ++i) { CodeList GeneratedCode; - GenerateCodeForPattern(*Patterns[i], GeneratedCode, OptSlctOrder); + GenerateCodeForPattern(*Patterns[i], GeneratedCode, GeneratedDecl, + OptSlctOrder); CodeForPatterns.push_back(std::make_pair(Patterns[i], GeneratedCode)); } @@ -2903,7 +2931,12 @@ exit(1); } } - + + // Print all declarations. + for (std::set::iterator I = GeneratedDecl.begin(), + E = GeneratedDecl.end(); I != E; ++I) + OS << " SDOperand " << *I << ";\n"; + // Loop through and reverse all of the CodeList vectors, as we will be // accessing them from their logical front, but accessing the end of a // vector is more efficient. Index: llvm/utils/TableGen/DAGISelEmitter.h diff -u llvm/utils/TableGen/DAGISelEmitter.h:1.55 llvm/utils/TableGen/DAGISelEmitter.h:1.56 --- llvm/utils/TableGen/DAGISelEmitter.h:1.55 Sun Feb 5 00:43:12 2006 +++ llvm/utils/TableGen/DAGISelEmitter.h Mon Feb 6 18:37:41 2006 @@ -16,6 +16,7 @@ #include "TableGenBackend.h" #include "CodeGenTarget.h" +#include namespace llvm { class Record; @@ -405,12 +406,10 @@ /// and emission of the instruction selector. /// class DAGISelEmitter : public TableGenBackend { -public: - //typedef std::pair PatternToMatch; private: RecordKeeper &Records; CodeGenTarget Target; - + std::map SDNodes; std::map > SDNodeXForms; std::map ComplexPatterns; @@ -472,6 +471,7 @@ std::vector &InstImpResults); void GenerateCodeForPattern(PatternToMatch &Pattern, std::vector > &GeneratedCode, + std::set &GeneratedDecl, bool UseGoto); void EmitPatterns(std::vector > > > &Patterns, From lattner at cs.uiuc.edu Mon Feb 6 18:47:25 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 18:47:25 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelLowering.cpp PPCISelLowering.h Message-ID: <200602070047.SAA23036@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCISelLowering.cpp updated: 1.84 -> 1.85 PPCISelLowering.h updated: 1.19 -> 1.20 --- Log message: Add the simple PPC integer constraints --- Diffs of the changes: (+42 -1) PPCISelLowering.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ PPCISelLowering.h | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.84 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.85 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.84 Wed Feb 1 01:19:44 2006 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp Mon Feb 6 18:47:13 2006 @@ -1035,3 +1035,44 @@ // Handle explicit register names. return TargetLowering::getRegForInlineAsmConstraint(Constraint); } + +// isOperandValidForConstraint +bool PPCTargetLowering:: +isOperandValidForConstraint(SDOperand Op, char Letter) { + switch (Letter) { + default: break; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': { + if (!isa(Op)) return false; // Must be an immediate. + unsigned Value = cast(Op)->getValue(); + switch (Letter) { + default: assert(0 && "Unknown constraint letter!"); + case 'I': // "I" is a signed 16-bit constant. + return (short)Value == (int)Value; + case 'J': // "J" is a constant with only the high-order 16 bits nonzero. + case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. + return (short)Value == 0; + case 'K': // "K" is a constant with only the low-order 16 bits nonzero. + return (Value >> 16) == 0; + case 'M': // "M" is a constant that is greater than 31. + return Value > 31; + case 'N': // "N" is a positive constant that is an exact power of two. + return (int)Value > 0 && isPowerOf2_32(Value); + case 'O': // "O" is the constant zero. + return Value == 0; + case 'P': // "P" is a constant whose negation is a signed 16-bit constant. + return (short)-Value == (int)-Value; + } + break; + } + } + + // Handle standard constraint letters. + return TargetLowering::isOperandValidForConstraint(Op, Letter); +} Index: llvm/lib/Target/PowerPC/PPCISelLowering.h diff -u llvm/lib/Target/PowerPC/PPCISelLowering.h:1.19 llvm/lib/Target/PowerPC/PPCISelLowering.h:1.20 --- llvm/lib/Target/PowerPC/PPCISelLowering.h:1.19 Tue Jan 31 13:20:21 2006 +++ llvm/lib/Target/PowerPC/PPCISelLowering.h Mon Feb 6 18:47:13 2006 @@ -99,7 +99,7 @@ std::vector getRegForInlineAsmConstraint(const std::string &Constraint) const; - + bool isOperandValidForConstraint(SDOperand Op, char ConstraintLetter); }; } From lattner at cs.uiuc.edu Mon Feb 6 19:13:01 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon, 6 Feb 2006 19:13:01 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/InlineAsm.h Message-ID: <200602070113.TAA23177@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm: InlineAsm.h updated: 1.8 -> 1.9 --- Log message: fix an error compiling with -pedantic --- Diffs of the changes: (+1 -1) InlineAsm.h | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/include/llvm/InlineAsm.h diff -u llvm/include/llvm/InlineAsm.h:1.8 llvm/include/llvm/InlineAsm.h:1.9 --- llvm/include/llvm/InlineAsm.h:1.8 Wed Feb 1 18:23:12 2006 +++ llvm/include/llvm/InlineAsm.h Mon Feb 6 19:12:49 2006 @@ -70,7 +70,7 @@ enum ConstraintPrefix { isInput, // 'x' isOutput, // '=x' - isClobber, // '~x' + isClobber // '~x' }; struct ConstraintInfo { From jeffc at jolt-lang.org Mon Feb 6 21:34:47 2006 From: jeffc at jolt-lang.org (Jeff Cohen) Date: Mon, 6 Feb 2006 21:34:47 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/ELFWriter.h Message-ID: <200602070334.VAA23688@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: ELFWriter.h updated: 1.10 -> 1.11 --- Log message: Fix some truncation warnings. --- Diffs of the changes: (+16 -16) ELFWriter.h | 32 ++++++++++++++++---------------- 1 files changed, 16 insertions(+), 16 deletions(-) Index: llvm/include/llvm/CodeGen/ELFWriter.h diff -u llvm/include/llvm/CodeGen/ELFWriter.h:1.10 llvm/include/llvm/CodeGen/ELFWriter.h:1.11 --- llvm/include/llvm/CodeGen/ELFWriter.h:1.10 Sat Jul 16 12:40:34 2005 +++ llvm/include/llvm/CodeGen/ELFWriter.h Mon Feb 6 21:34:35 2006 @@ -256,23 +256,23 @@ } void outxword(DataBuffer &Output, uint64_t X) { if (isLittleEndian) { - Output.push_back((X >> 0) & 255); - Output.push_back((X >> 8) & 255); - Output.push_back((X >> 16) & 255); - Output.push_back((X >> 24) & 255); - Output.push_back((X >> 32) & 255); - Output.push_back((X >> 40) & 255); - Output.push_back((X >> 48) & 255); - Output.push_back((X >> 56) & 255); + Output.push_back(unsigned(X >> 0) & 255); + Output.push_back(unsigned(X >> 8) & 255); + Output.push_back(unsigned(X >> 16) & 255); + Output.push_back(unsigned(X >> 24) & 255); + Output.push_back(unsigned(X >> 32) & 255); + Output.push_back(unsigned(X >> 40) & 255); + Output.push_back(unsigned(X >> 48) & 255); + Output.push_back(unsigned(X >> 56) & 255); } else { - Output.push_back((X >> 56) & 255); - Output.push_back((X >> 48) & 255); - Output.push_back((X >> 40) & 255); - Output.push_back((X >> 32) & 255); - Output.push_back((X >> 24) & 255); - Output.push_back((X >> 16) & 255); - Output.push_back((X >> 8) & 255); - Output.push_back((X >> 0) & 255); + Output.push_back(unsigned(X >> 56) & 255); + Output.push_back(unsigned(X >> 48) & 255); + Output.push_back(unsigned(X >> 40) & 255); + Output.push_back(unsigned(X >> 32) & 255); + Output.push_back(unsigned(X >> 24) & 255); + Output.push_back(unsigned(X >> 16) & 255); + Output.push_back(unsigned(X >> 8) & 255); + Output.push_back(unsigned(X >> 0) & 255); } } void outaddr32(DataBuffer &Output, unsigned X) { From jeffc at jolt-lang.org Mon Feb 6 23:12:08 2006 From: jeffc at jolt-lang.org (Jeff Cohen) Date: Mon, 6 Feb 2006 23:12:08 -0600 Subject: [llvm-commits] CVS: llvm/lib/ExecutionEngine/ExecutionEngine.cpp Message-ID: <200602070512.XAA24177@zion.cs.uiuc.edu> Changes in directory llvm/lib/ExecutionEngine: ExecutionEngine.cpp updated: 1.73 -> 1.74 --- Log message: Teach the interpreter to handle global variables that are added to a module after interpretation has begun. The JIT already handles this situation correctly, and the interpreter can already handle new functions being added. --- Diffs of the changes: (+10 -1) ExecutionEngine.cpp | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletion(-) Index: llvm/lib/ExecutionEngine/ExecutionEngine.cpp diff -u llvm/lib/ExecutionEngine/ExecutionEngine.cpp:1.73 llvm/lib/ExecutionEngine/ExecutionEngine.cpp:1.74 --- llvm/lib/ExecutionEngine/ExecutionEngine.cpp:1.73 Fri Jan 20 12:18:40 2006 +++ llvm/lib/ExecutionEngine/ExecutionEngine.cpp Mon Feb 6 23:11:57 2006 @@ -171,7 +171,16 @@ return getPointerToFunction(F); MutexGuard locked(lock); - assert(state.getGlobalAddressMap(locked)[GV] && "Global hasn't had an address allocated yet?"); + void *p = state.getGlobalAddressMap(locked)[GV]; + if (p) + return p; + + // Global variable might have been added since interpreter started. + if (GlobalVariable *GVar = + const_cast(dyn_cast(GV))) + EmitGlobalVariable(GVar); + else + assert("Global hasn't had an address allocated yet!"); return state.getGlobalAddressMap(locked)[GV]; } From jeffc at jolt-lang.org Mon Feb 6 23:29:57 2006 From: jeffc at jolt-lang.org (Jeff Cohen) Date: Mon, 6 Feb 2006 23:29:57 -0600 Subject: [llvm-commits] CVS: llvm/lib/ExecutionEngine/Interpreter/Execution.cpp Interpreter.cpp Interpreter.h Message-ID: <200602070529.XAA24235@zion.cs.uiuc.edu> Changes in directory llvm/lib/ExecutionEngine/Interpreter: Execution.cpp updated: 1.138 -> 1.139 Interpreter.cpp updated: 1.25 -> 1.26 Interpreter.h updated: 1.71 -> 1.72 --- Log message: The interpreter assumes that the caller of runFunction() must be lli, and therefore the function being called must be a main() returning an int. The consequences when these assumptions are false are not good, so don't assume them. --- Diffs of the changes: (+7 -8) Execution.cpp | 6 +++--- Interpreter.cpp | 7 +++---- Interpreter.h | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) Index: llvm/lib/ExecutionEngine/Interpreter/Execution.cpp diff -u llvm/lib/ExecutionEngine/Interpreter/Execution.cpp:1.138 llvm/lib/ExecutionEngine/Interpreter/Execution.cpp:1.139 --- llvm/lib/ExecutionEngine/Interpreter/Execution.cpp:1.138 Sat Jun 18 13:34:52 2005 +++ llvm/lib/ExecutionEngine/Interpreter/Execution.cpp Mon Feb 6 23:29:44 2006 @@ -553,7 +553,7 @@ /// Pop the last stack frame off of ECStack and then copy the result /// back into the result variable if we are not returning void. The -/// result variable may be the ExitCode, or the Value of the calling +/// result variable may be the ExitValue, or the Value of the calling /// CallInst if there was a previous stack frame. This method may /// invalidate any ECStack iterators you have. This method also takes /// care of switching to the normal destination BB, if we are returning @@ -566,9 +566,9 @@ if (ECStack.empty()) { // Finished main. Put result into exit code... if (RetTy && RetTy->isIntegral()) { // Nonvoid return type? - ExitCode = Result.IntVal; // Capture the exit code of the program + ExitValue = Result; // Capture the exit value of the program } else { - ExitCode = 0; + memset(&ExitValue, 0, sizeof(ExitValue)); } } else { // If we have a previous stack frame, and we have a previous call, Index: llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp diff -u llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp:1.25 llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp:1.26 --- llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp:1.25 Wed Jul 27 01:12:33 2005 +++ llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp Mon Feb 6 23:29:44 2006 @@ -50,10 +50,11 @@ // Interpreter::Interpreter(Module *M, bool isLittleEndian, bool isLongPointer, IntrinsicLowering *il) - : ExecutionEngine(M), ExitCode(0), + : ExecutionEngine(M), TD("lli", isLittleEndian, isLongPointer ? 8 : 4, isLongPointer ? 8 : 4, isLongPointer ? 8 : 4), IL(il) { + memset(&ExitValue, 0, sizeof(ExitValue)); setTargetData(TD); // Initialize the "backend" initializeExecutionEngine(); @@ -100,8 +101,6 @@ // Start executing the function. run(); - GenericValue rv; - rv.IntVal = ExitCode; - return rv; + return ExitValue; } Index: llvm/lib/ExecutionEngine/Interpreter/Interpreter.h diff -u llvm/lib/ExecutionEngine/Interpreter/Interpreter.h:1.71 llvm/lib/ExecutionEngine/Interpreter/Interpreter.h:1.72 --- llvm/lib/ExecutionEngine/Interpreter/Interpreter.h:1.71 Sat Jun 18 13:34:52 2005 +++ llvm/lib/ExecutionEngine/Interpreter/Interpreter.h Mon Feb 6 23:29:44 2006 @@ -80,7 +80,7 @@ // Interpreter - This class represents the entirety of the interpreter. // class Interpreter : public ExecutionEngine, public InstVisitor { - int ExitCode; // The exit code to be returned by the lli util + GenericValue ExitValue; // The return value of the called function TargetData TD; IntrinsicLowering *IL; From lattner at cs.uiuc.edu Tue Feb 7 00:17:21 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 00:17:21 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/Type.h DerivedTypes.h Constants.h Message-ID: <200602070617.AAA24755@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm: Type.h updated: 1.82 -> 1.83 DerivedTypes.h updated: 1.70 -> 1.71 Constants.h updated: 1.77 -> 1.78 --- Log message: add a new Type::getIntegralTypeMask() method, which is useful for clients that want to do bitwise inspection of integer types. --- Diffs of the changes: (+9 -2) Constants.h | 1 - DerivedTypes.h | 1 - Type.h | 9 +++++++++ 3 files changed, 9 insertions(+), 2 deletions(-) Index: llvm/include/llvm/Type.h diff -u llvm/include/llvm/Type.h:1.82 llvm/include/llvm/Type.h:1.83 --- llvm/include/llvm/Type.h:1.82 Mon Dec 26 02:36:53 2005 +++ llvm/include/llvm/Type.h Tue Feb 7 00:17:10 2006 @@ -36,6 +36,7 @@ #include "AbstractTypeUser.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/DataTypes.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/iterator" #include @@ -232,6 +233,14 @@ /// getSignedVersion - If this is an integer type, return the signed variant /// of this type. For example uint -> int. const Type *getSignedVersion() const; + + /// getIntegralTypeMask - Return a bitmask with ones set for all of the bits + /// that can be set by an unsigned version of this type. This is 0xFF for + /// sbyte/ubyte, 0xFFFF for shorts, etc. + uint64_t getIntegralTypeMask() const { + assert(isIntegral() && "This only works for integral types!"); + return ~0ULL >> (64-getPrimitiveSizeInBits()); + } /// getForwaredType - Return the type that this type has been resolved to if /// it has been resolved to anything. This is used to implement the Index: llvm/include/llvm/DerivedTypes.h diff -u llvm/include/llvm/DerivedTypes.h:1.70 llvm/include/llvm/DerivedTypes.h:1.71 --- llvm/include/llvm/DerivedTypes.h:1.70 Sat Nov 12 21:26:12 2005 +++ llvm/include/llvm/DerivedTypes.h Tue Feb 7 00:17:10 2006 @@ -19,7 +19,6 @@ #define LLVM_DERIVED_TYPES_H #include "llvm/Type.h" -#include "llvm/Support/DataTypes.h" namespace llvm { Index: llvm/include/llvm/Constants.h diff -u llvm/include/llvm/Constants.h:1.77 llvm/include/llvm/Constants.h:1.78 --- llvm/include/llvm/Constants.h:1.77 Tue Jan 17 14:05:59 2006 +++ llvm/include/llvm/Constants.h Tue Feb 7 00:17:10 2006 @@ -22,7 +22,6 @@ #include "llvm/Constant.h" #include "llvm/Type.h" -#include "llvm/Support/DataTypes.h" namespace llvm { From lattner at cs.uiuc.edu Tue Feb 7 00:56:46 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 00:56:46 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602070656.AAA25037@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.418 -> 1.419 --- Log message: Implement the beginnings of a facility for simplifying expressions based on 'demanded bits', inspired by Nate's work in the dag combiner. This isn't complete, but needs to unrelated instcombiner changes to continue. --- Diffs of the changes: (+145 -3) InstructionCombining.cpp | 148 ++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 145 insertions(+), 3 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.418 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.419 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.418 Sun Feb 5 01:54:04 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Feb 7 00:56:34 2006 @@ -72,7 +72,7 @@ /// the instruction to the work lists because they might get more simplified /// now. /// - void AddUsersToWorkList(Instruction &I) { + void AddUsersToWorkList(Value &I) { for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ++UI) WorkList.push_back(cast(*UI)); @@ -188,6 +188,21 @@ } } + // UpdateValueUsesWith - This method is to be used when an value is + // found to be replacable with another preexisting expression or was + // updated. Here we add all uses of I to the worklist, replace all uses of + // I with the new value (unless the instruction was just updated), then + // return true, so that the inst combiner will know that I was modified. + // + bool UpdateValueUsesWith(Value *Old, Value *New) { + AddUsersToWorkList(*Old); // Add all modified instrs to worklist + if (Old != New) + Old->replaceAllUsesWith(New); + if (Instruction *I = dyn_cast(Old)) + WorkList.push_back(I); + return true; + } + // EraseInstFromFunction - When dealing with an instruction that has side // effects or produces a void value, we can't rely on DCE to delete the // instruction. Instead, visit methods should return the value returned by @@ -200,7 +215,6 @@ return 0; // Don't do anything with FI } - private: /// InsertOperandCastBefore - This inserts a cast of V to DestTy before the /// InsertBefore instruction. This is specialized a bit to avoid inserting @@ -213,6 +227,7 @@ // operators. bool SimplifyCommutative(BinaryOperator &I); + bool SimplifyDemandedBits(Value *V, uint64_t Mask, unsigned Depth = 0); // FoldOpIntoPhi - Given a binary operator or cast instruction which has a // PHI node as operand #0, see if we can fold the instruction into the PHI @@ -476,6 +491,122 @@ return false; } +/// SimplifyDemandedBits - Look at V. At this point, we know that only the Mask +/// bits of the result of V are ever used downstream. If we can use this +/// information to simplify V, return V and set NewVal to the new value we +/// should use in V's place. +bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t Mask, + unsigned Depth) { + if (!V->hasOneUse()) { // Other users may use these bits. + if (Depth != 0) // Not at the root. + return false; + // If this is the root being simplified, allow it to have multiple uses, + // just set the Mask to all bits. + Mask = V->getType()->getIntegralTypeMask(); + } else if (Mask == 0) { // Not demanding any bits from V. + return UpdateValueUsesWith(V, UndefValue::get(V->getType())); + } else if (Depth == 6) { // Limit search depth. + return false; + } + + Instruction *I = dyn_cast(V); + if (!I) return false; // Only analyze instructions. + + switch (I->getOpcode()) { + default: break; + case Instruction::And: + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + // Only demanding an intersection of the bits. + if (SimplifyDemandedBits(I->getOperand(0), RHS->getRawValue() & Mask, + Depth+1)) + return true; + if (~Mask & RHS->getRawValue()) { + // If this is producing any bits that are not needed, simplify the RHS. + if (I->getType()->isSigned()) { + int64_t Val = Mask & cast(RHS)->getValue(); + I->setOperand(1, ConstantSInt::get(I->getType(), Val)); + } else { + uint64_t Val = Mask & cast(RHS)->getValue(); + I->setOperand(1, ConstantUInt::get(I->getType(), Val)); + } + return UpdateValueUsesWith(I, I); + } + } + // Walk the LHS and the RHS. + return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1) || + SimplifyDemandedBits(I->getOperand(1), Mask, Depth+1); + case Instruction::Or: + case Instruction::Xor: + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + // If none of the [x]or'd in bits are demanded, don't both with the [x]or. + if ((Mask & RHS->getRawValue()) == 0) + return UpdateValueUsesWith(I, I->getOperand(0)); + + // Otherwise, for an OR, we only demand those bits not set by the OR. + if (I->getOpcode() == Instruction::Or) + Mask &= ~RHS->getRawValue(); + return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1); + } + // Walk the LHS and the RHS. + return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1) || + SimplifyDemandedBits(I->getOperand(1), Mask, Depth+1); + case Instruction::Cast: { + const Type *SrcTy = I->getOperand(0)->getType(); + if (SrcTy == Type::BoolTy) + return SimplifyDemandedBits(I->getOperand(0), Mask&1, Depth+1); + + if (!SrcTy->isInteger()) return false; + + unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); + // If this is a sign-extend, treat specially. + if (SrcTy->isSigned() && + SrcBits < I->getType()->getPrimitiveSizeInBits()) { + // If none of the top bits are demanded, convert this into an unsigned + // extend instead of a sign extend. + if ((Mask & ((1ULL << SrcBits)-1)) == 0) { + // Convert to unsigned first. + Value *NewVal; + NewVal = new CastInst(I->getOperand(0), SrcTy->getUnsignedVersion(), + I->getOperand(0)->getName(), I); + NewVal = new CastInst(I->getOperand(0), I->getType(), I->getName()); + return UpdateValueUsesWith(I, NewVal); + } + + // Otherwise, the high-bits *are* demanded. This means that the code + // implicitly demands computation of the sign bit of the input, make sure + // we explicitly include it in Mask. + Mask |= 1ULL << (SrcBits-1); + } + + // If this is an extension, the top bits are ignored. + Mask &= SrcTy->getIntegralTypeMask(); + return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1); + } + case Instruction::Select: + // Simplify the T and F values if they are not demanded. + return SimplifyDemandedBits(I->getOperand(2), Mask, Depth+1) || + SimplifyDemandedBits(I->getOperand(1), Mask, Depth+1); + case Instruction::Shl: + // We only demand the low bits of the input. + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) + return SimplifyDemandedBits(I->getOperand(0), Mask >> SA->getValue(), + Depth+1); + break; + case Instruction::Shr: + // We only demand the high bits of the input. + if (I->getType()->isUnsigned()) + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { + Mask <<= SA->getValue(); + Mask &= I->getType()->getIntegralTypeMask(); + return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1); + } + // FIXME: handle signed shr, demanding the appropriate bits. If the top + // bits aren't demanded, strength reduce to a logical SHR instead. + break; + } + return false; +} + // isTrueWhenEqual - Return true if the specified setcondinst instruction is // true when both operands are equal... // @@ -1824,6 +1955,11 @@ if (MaskedValueIsZero(Op0, NotAndRHS)) return ReplaceInstUsesWith(I, Op0); + // See if we can simplify any instructions used by the LHS whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedBits(Op0, AndRHS->getRawValue())) + return &I; + // Optimize a variety of ((val OP C1) & C2) combinations... if (isa(Op0) || isa(Op0)) { Instruction *Op0I = cast(Op0); @@ -4122,12 +4258,18 @@ return And; } } - + // If this is a cast to bool, turn it into the appropriate setne instruction. if (CI.getType() == Type::BoolTy) return BinaryOperator::createSetNE(CI.getOperand(0), Constant::getNullValue(CI.getOperand(0)->getType())); + // See if we can simplify any instructions used by the LHS whose sole + // purpose is to compute bits we don't care about. + if (CI.getType()->isInteger() && CI.getOperand(0)->getType()->isIntegral() && + SimplifyDemandedBits(&CI, CI.getType()->getIntegralTypeMask())) + return &CI; + // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! // From lattner at cs.uiuc.edu Tue Feb 7 01:00:53 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 01:00:53 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602070700.BAA25135@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.419 -> 1.420 --- Log message: Use Type::getIntegralTypeMask() to simplify some code --- Diffs of the changes: (+8 -15) InstructionCombining.cpp | 23 ++++++++--------------- 1 files changed, 8 insertions(+), 15 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.419 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.420 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.419 Tue Feb 7 00:56:34 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Feb 7 01:00:41 2006 @@ -844,9 +844,8 @@ // X + (signbit) --> X ^ signbit if (ConstantInt *CI = dyn_cast(RHSC)) { - unsigned NumBits = CI->getType()->getPrimitiveSizeInBits(); - uint64_t Val = CI->getRawValue() & (~0ULL >> (64- NumBits)); - if (Val == (1ULL << (NumBits-1))) + uint64_t Val = CI->getRawValue() & CI->getType()->getIntegralTypeMask(); + if (Val == (1ULL << (CI->getType()->getPrimitiveSizeInBits()-1))) return BinaryOperator::createXor(LHS, RHS); } @@ -970,7 +969,7 @@ // Form a mask of all bits from the lowest bit added through the top. uint64_t AddRHSHighBits = ~((AddRHSV & -AddRHSV)-1); - AddRHSHighBits &= ~0ULL >> (64-C2->getType()->getPrimitiveSizeInBits()); + AddRHSHighBits &= C2->getType()->getIntegralTypeMask(); // See if the and mask includes all of these bits. uint64_t AddRHSHighBitsAnd = AddRHSHighBits & C2->getRawValue(); @@ -1523,13 +1522,8 @@ // isMaxValueMinusOne - return true if this is Max-1 static bool isMaxValueMinusOne(const ConstantInt *C) { - if (const ConstantUInt *CU = dyn_cast(C)) { - // Calculate -1 casted to the right type... - unsigned TypeBits = C->getType()->getPrimitiveSizeInBits(); - uint64_t Val = ~0ULL; // All ones - Val >>= 64-TypeBits; // Shift out unwanted 1 bits... - return CU->getValue() == Val-1; - } + if (const ConstantUInt *CU = dyn_cast(C)) + return CU->getValue() == C->getType()->getIntegralTypeMask()-1; const ConstantSInt *CS = cast(C); @@ -1709,7 +1703,7 @@ uint64_t AndRHSV = cast(AndRHS)->getRawValue(); // Clear bits that are not part of the constant. - AndRHSV &= ~0ULL >> (64-AndRHS->getType()->getPrimitiveSizeInBits()); + AndRHSV &= AndRHS->getType()->getIntegralTypeMask(); // If there is only one bit set... if (isOneBitSet(cast(AndRHS))) { @@ -2644,8 +2638,7 @@ Value *Result = Constant::getNullValue(SIntPtrTy); // Build a mask for high order bits. - uint64_t PtrSizeMask = ~0ULL; - PtrSizeMask >>= 64-(TD.getPointerSize()*8); + uint64_t PtrSizeMask = ~0ULL >> (64-TD.getPointerSize()*8); for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { Value *Op = GEP->getOperand(i); @@ -4245,7 +4238,7 @@ CI.getType()->getPrimitiveSizeInBits()) { assert(CSrc->getType() != Type::ULongTy && "Cannot have type bigger than ulong!"); - uint64_t AndValue = ~0ULL>>(64-CSrc->getType()->getPrimitiveSizeInBits()); + uint64_t AndValue = CSrc->getType()->getIntegralTypeMask(); Constant *AndOp = ConstantUInt::get(A->getType()->getUnsignedVersion(), AndValue); AndOp = ConstantExpr::getCast(AndOp, A->getType()); From lattner at cs.uiuc.edu Tue Feb 7 01:28:04 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 01:28:04 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602070728.BAA25287@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.420 -> 1.421 --- Log message: Make MaskedValueIsZero take a uint64_t instead of a ConstantIntegral as a mask. This allows the code to be simpler and more efficient. Also, generalize some of the cases in MVIZ a bit, making it slightly more aggressive. --- Diffs of the changes: (+58 -69) InstructionCombining.cpp | 127 +++++++++++++++++++++-------------------------- 1 files changed, 58 insertions(+), 69 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.420 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.421 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.420 Tue Feb 7 01:00:41 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Feb 7 01:27:52 2006 @@ -406,20 +406,19 @@ } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use -/// this predicate to simplify operations downstream. V and Mask are known to -/// be the same type. -static bool MaskedValueIsZero(Value *V, ConstantIntegral *Mask, - unsigned Depth = 0) { +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +static bool MaskedValueIsZero(Value *V, uint64_t Mask, unsigned Depth = 0) { // Note, we cannot consider 'undef' to be "IsZero" here. The problem is that // we cannot optimize based on the assumption that it is zero without changing - // to to an explicit zero. If we don't change it to zero, other code could + // it to be an explicit zero. If we don't change it to zero, other code could // optimized based on the contradictory assumption that it is non-zero. // Because instcombine aggressively folds operations with undef args anyway, // this won't lose us code quality. - if (Mask->isNullValue()) + if (Mask == 0) return true; if (ConstantIntegral *CI = dyn_cast(V)) - return ConstantExpr::getAnd(CI, Mask)->isNullValue(); + return (CI->getRawValue() & Mask) == 0; if (Depth == 6) return false; // Limit search depth. @@ -427,12 +426,9 @@ switch (I->getOpcode()) { case Instruction::And: // (X & C1) & C2 == 0 iff C1 & C2 == 0. - if (ConstantIntegral *CI = dyn_cast(I->getOperand(1))) { - ConstantIntegral *C1C2 = - cast(ConstantExpr::getAnd(CI, Mask)); - if (MaskedValueIsZero(I->getOperand(0), C1C2, Depth+1)) - return true; - } + if (ConstantIntegral *CI = dyn_cast(I->getOperand(1))) + return MaskedValueIsZero(I->getOperand(0), CI->getRawValue() & Mask, + Depth+1); // If either the LHS or the RHS are MaskedValueIsZero, the result is zero. return MaskedValueIsZero(I->getOperand(1), Mask, Depth+1) || MaskedValueIsZero(I->getOperand(0), Mask, Depth+1); @@ -448,41 +444,34 @@ case Instruction::Cast: { const Type *SrcTy = I->getOperand(0)->getType(); if (SrcTy == Type::BoolTy) - return (Mask->getRawValue() & 1) == 0; + return (Mask & 1) == 0; + if (!SrcTy->isInteger()) return false; - if (SrcTy->isInteger()) { - // (cast X to int) & C2 == 0 iff could not have contained C2. - if (SrcTy->isUnsigned() && // Only handle zero ext. - ConstantExpr::getCast(Mask, SrcTy)->isNullValue()) - return true; - - // If this is a noop cast, recurse. - if ((SrcTy->isSigned() && SrcTy->getUnsignedVersion() == I->getType())|| - SrcTy->getSignedVersion() == I->getType()) { - Constant *NewMask = - ConstantExpr::getCast(Mask, I->getOperand(0)->getType()); - return MaskedValueIsZero(I->getOperand(0), - cast(NewMask), Depth+1); - } - } + // (cast X to int) & C2 == 0 iff could not have contained C2. + if (SrcTy->isUnsigned()) // Only handle zero ext. + return MaskedValueIsZero(I->getOperand(0), + Mask & SrcTy->getIntegralTypeMask(), Depth+1); + + // If this is a noop or trunc cast, recurse. + if (SrcTy->getPrimitiveSizeInBits() >= + I->getType()->getPrimitiveSizeInBits()) + return MaskedValueIsZero(I->getOperand(0), + Mask & SrcTy->getIntegralTypeMask(), Depth+1); break; } case Instruction::Shl: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) - return MaskedValueIsZero(I->getOperand(0), - cast(ConstantExpr::getUShr(Mask, SA)), + return MaskedValueIsZero(I->getOperand(0), Mask >> SA->getValue(), Depth+1); break; case Instruction::Shr: // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) if (I->getType()->isUnsigned()) { - Constant *C1 = ConstantIntegral::getAllOnesValue(I->getType()); - C1 = ConstantExpr::getShr(C1, SA); - C1 = ConstantExpr::getAnd(C1, Mask); - if (C1->isNullValue()) - return true; + Mask <<= SA->getValue(); + Mask &= I->getType()->getIntegralTypeMask(); + return MaskedValueIsZero(I->getOperand(0), Mask, Depth+1); } break; } @@ -877,10 +866,9 @@ } if (Found) { // This is a sign extend if the top bits are known zero. - Constant *Mask = ConstantInt::getAllOnesValue(XorLHS->getType()); - Mask = ConstantExpr::getShl(Mask, - ConstantInt::get(Type::UByteTy, 64-(TySizeBits-Size))); - if (!MaskedValueIsZero(XorLHS, cast(Mask))) + uint64_t Mask = XorLHS->getType()->getIntegralTypeMask(); + Mask <<= 64-(TySizeBits-Size); + if (!MaskedValueIsZero(XorLHS, Mask)) Size = 0; // Not a sign ext, but can't be any others either. goto FoundSExt; } @@ -1375,10 +1363,10 @@ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); if (I.getType()->isSigned()) { - // If the top bits of both operands are zero (i.e. we can prove they are + // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - ConstantIntegral *MaskV = ConstantSInt::getMinValue(I.getType()); - if (MaskedValueIsZero(Op1, MaskV) && MaskedValueIsZero(Op0, MaskV)) { + uint64_t Mask = 1ULL << (I.getType()->getPrimitiveSizeInBits()-1); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { const Type *NTy = Op0->getType()->getUnsignedVersion(); Instruction *LHS = new CastInst(Op0, NTy, Op0->getName()); InsertNewInstBefore(LHS, I); @@ -1430,8 +1418,8 @@ // If the top bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - ConstantIntegral *MaskV = ConstantSInt::getMinValue(I.getType()); - if (MaskedValueIsZero(Op1, MaskV) && MaskedValueIsZero(Op0, MaskV)) { + uint64_t Mask = 1ULL << (I.getType()->getPrimitiveSizeInBits()-1); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { const Type *NTy = Op0->getType()->getUnsignedVersion(); Instruction *LHS = new CastInst(Op0, NTy, Op0->getName()); InsertNewInstBefore(LHS, I); @@ -1888,11 +1876,9 @@ // is all N is, ignore it. unsigned MB, ME; if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive - Constant *Mask = ConstantInt::getAllOnesValue(RHS->getType()); - Mask = ConstantExpr::getUShr(Mask, - ConstantInt::get(Type::UByteTy, - (64-MB+1))); - if (MaskedValueIsZero(RHS, cast(Mask))) + uint64_t Mask = RHS->getType()->getIntegralTypeMask(); + Mask >>= 64-MB+1; + if (MaskedValueIsZero(RHS, Mask)) break; } } @@ -1939,13 +1925,13 @@ return BinaryOperator::createAnd(X, ConstantExpr::getAnd(C1, AndRHS)); } - if (MaskedValueIsZero(Op0, AndRHS)) // LHS & RHS == 0 + if (MaskedValueIsZero(Op0, AndRHS->getZExtValue())) // LHS & RHS == 0 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // If the mask is not masking out any bits, there is no reason to do the // and in the first place. - ConstantIntegral *NotAndRHS = - cast(ConstantExpr::getNot(AndRHS)); + uint64_t NotAndRHS = // ~ANDRHS + AndRHS->getZExtValue()^Op0->getType()->getIntegralTypeMask(); if (MaskedValueIsZero(Op0, NotAndRHS)) return ReplaceInstUsesWith(I, Op0); @@ -1964,9 +1950,9 @@ case Instruction::Or: // (X ^ V) & C2 --> (X & C2) iff (V & C2) == 0 // (X | V) & C2 --> (X & C2) iff (V & C2) == 0 - if (MaskedValueIsZero(Op0LHS, AndRHS)) + if (MaskedValueIsZero(Op0LHS, AndRHS->getZExtValue())) return BinaryOperator::createAnd(Op0RHS, AndRHS); - if (MaskedValueIsZero(Op0RHS, AndRHS)) + if (MaskedValueIsZero(Op0RHS, AndRHS->getZExtValue())) return BinaryOperator::createAnd(Op0LHS, AndRHS); // If the mask is only needed on one incoming arm, push it up. @@ -1979,7 +1965,7 @@ return BinaryOperator::create( cast(Op0I)->getOpcode(), Op0LHS, NewRHS); } - if (!isa(NotAndRHS) && + if (!isa(Op0RHS) && MaskedValueIsZero(Op0RHS, NotAndRHS)) { // Not masking anything out for the RHS, move to LHS. Instruction *NewLHS = BinaryOperator::createAnd(Op0LHS, AndRHS, @@ -1993,8 +1979,8 @@ break; case Instruction::And: // (X & V) & C2 --> 0 iff (V & C2) == 0 - if (MaskedValueIsZero(Op0LHS, AndRHS) || - MaskedValueIsZero(Op0RHS, AndRHS)) + if (MaskedValueIsZero(Op0LHS, AndRHS->getZExtValue()) || + MaskedValueIsZero(Op0RHS, AndRHS->getZExtValue())) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); break; case Instruction::Add: @@ -2239,8 +2225,8 @@ if (ConstantIntegral *RHS = dyn_cast(Op1)) { // If X is known to only contain bits that already exist in RHS, just // replace this instruction with RHS directly. - if (MaskedValueIsZero(Op0, - cast(ConstantExpr::getNot(RHS)))) + if (MaskedValueIsZero(Op0, + RHS->getZExtValue()^RHS->getType()->getIntegralTypeMask())) return ReplaceInstUsesWith(I, RHS); ConstantInt *C1 = 0; Value *X = 0; @@ -2282,7 +2268,7 @@ // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op1, C1)) { + MaskedValueIsZero(Op1, C1->getZExtValue())) { Instruction *NOr = BinaryOperator::createOr(A, Op1, Op0->getName()); Op0->setName(""); return BinaryOperator::createXor(InsertNewInstBefore(NOr, I), C1); @@ -2290,7 +2276,7 @@ // Y|(X^C) -> (X|Y)^C iff Y&C == 0 if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op0, C1)) { + MaskedValueIsZero(Op0, C1->getZExtValue())) { Instruction *NOr = BinaryOperator::createOr(A, Op0, Op1->getName()); Op0->setName(""); return BinaryOperator::createXor(InsertNewInstBefore(NOr, I), C1); @@ -2312,18 +2298,18 @@ if ((C2->getRawValue() & (C2->getRawValue()+1)) == 0 && // C2 == 0+1+ match(A, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2)) + if (V1 == B && MaskedValueIsZero(V2, C2->getZExtValue())) return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2)) + if (V2 == B && MaskedValueIsZero(V1, C2->getZExtValue())) return ReplaceInstUsesWith(I, A); } // Or commutes, try both ways. if ((C1->getRawValue() & (C1->getRawValue()+1)) == 0 && match(B, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1)) + if (V1 == A && MaskedValueIsZero(V2, C1->getZExtValue())) return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1)) + if (V2 == A && MaskedValueIsZero(V1, C1->getZExtValue())) return ReplaceInstUsesWith(I, B); } } @@ -3599,7 +3585,8 @@ // See if we can turn a signed shr into an unsigned shr. if (!isLeftShift && I.getType()->isSigned()) { - if (MaskedValueIsZero(Op0, ConstantInt::getMinValue(I.getType()))) { + if (MaskedValueIsZero(Op0, + 1ULL << (I.getType()->getPrimitiveSizeInBits()-1))) { Value *V = InsertCastBefore(Op0, I.getType()->getUnsignedVersion(), I); V = InsertNewInstBefore(new ShiftInst(Instruction::Shr, V, Op1, I.getName()), I); @@ -4373,7 +4360,8 @@ Constant *Not1 = ConstantExpr::getNot(ConstantInt::get(Op0->getType(), 1)); // cast (X != 0) to int --> X if X&~1 == 0 - if (MaskedValueIsZero(Op0, cast(Not1))) { + if (MaskedValueIsZero(Op0, + cast(Not1)->getZExtValue())) { if (CI.getType() == Op0->getType()) return ReplaceInstUsesWith(CI, Op0); else @@ -4415,7 +4403,8 @@ if (Op1C->getRawValue() == 1) { Constant *Not1 = ConstantExpr::getNot(ConstantInt::get(Op0->getType(), 1)); - if (MaskedValueIsZero(Op0, cast(Not1))) { + if (MaskedValueIsZero(Op0, + cast(Not1)->getZExtValue())) { if (CI.getType() == Op0->getType()) return ReplaceInstUsesWith(CI, Op0); else From lattner at cs.uiuc.edu Tue Feb 7 02:05:34 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 02:05:34 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602070805.CAA25675@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.421 -> 1.422 --- Log message: Generalize MaskedValueIsZero into a ComputeMaskedNonZeroBits function, which is just as efficient as MVIZ and is also more general. Fix a few minor bugs introduced in recent patches --- Diffs of the changes: (+53 -44) InstructionCombining.cpp | 97 +++++++++++++++++++++++++---------------------- 1 files changed, 53 insertions(+), 44 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.421 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.422 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.421 Tue Feb 7 01:27:52 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Feb 7 02:05:22 2006 @@ -405,65 +405,66 @@ ConstantInt::get(C->getType(), 1))); } -/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use -/// this predicate to simplify operations downstream. Mask is known to be zero -/// for bits that V cannot have. -static bool MaskedValueIsZero(Value *V, uint64_t Mask, unsigned Depth = 0) { +/// ComputeMaskedNonZeroBits - Determine which of the bits specified in Mask are +/// not known to be zero and return them as a bitmask. The bits that we can +/// guarantee to be zero are returned as zero bits in the result. +static uint64_t ComputeMaskedNonZeroBits(Value *V, uint64_t Mask, + unsigned Depth = 0) { // Note, we cannot consider 'undef' to be "IsZero" here. The problem is that // we cannot optimize based on the assumption that it is zero without changing // it to be an explicit zero. If we don't change it to zero, other code could // optimized based on the contradictory assumption that it is non-zero. // Because instcombine aggressively folds operations with undef args anyway, // this won't lose us code quality. - if (Mask == 0) - return true; if (ConstantIntegral *CI = dyn_cast(V)) - return (CI->getRawValue() & Mask) == 0; - - if (Depth == 6) return false; // Limit search depth. + return CI->getRawValue() & Mask; + if (Depth == 6 || Mask == 0) + return Mask; // Limit search depth. if (Instruction *I = dyn_cast(V)) { switch (I->getOpcode()) { case Instruction::And: // (X & C1) & C2 == 0 iff C1 & C2 == 0. if (ConstantIntegral *CI = dyn_cast(I->getOperand(1))) - return MaskedValueIsZero(I->getOperand(0), CI->getRawValue() & Mask, - Depth+1); + return ComputeMaskedNonZeroBits(I->getOperand(0), + CI->getRawValue() & Mask, Depth+1); // If either the LHS or the RHS are MaskedValueIsZero, the result is zero. - return MaskedValueIsZero(I->getOperand(1), Mask, Depth+1) || - MaskedValueIsZero(I->getOperand(0), Mask, Depth+1); + Mask = ComputeMaskedNonZeroBits(I->getOperand(1), Mask, Depth+1); + Mask = ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); + return Mask; case Instruction::Or: case Instruction::Xor: - // If the LHS and the RHS are MaskedValueIsZero, the result is also zero. - return MaskedValueIsZero(I->getOperand(1), Mask, Depth+1) && - MaskedValueIsZero(I->getOperand(0), Mask, Depth+1); + // Any non-zero bits in the LHS or RHS are potentially non-zero in the + // result. + return ComputeMaskedNonZeroBits(I->getOperand(1), Mask, Depth+1) | + ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); case Instruction::Select: - // If the T and F values are MaskedValueIsZero, the result is also zero. - return MaskedValueIsZero(I->getOperand(2), Mask, Depth+1) && - MaskedValueIsZero(I->getOperand(1), Mask, Depth+1); + // Any non-zero bits in the T or F values are potentially non-zero in the + // result. + return ComputeMaskedNonZeroBits(I->getOperand(2), Mask, Depth+1) | + ComputeMaskedNonZeroBits(I->getOperand(1), Mask, Depth+1); case Instruction::Cast: { const Type *SrcTy = I->getOperand(0)->getType(); if (SrcTy == Type::BoolTy) - return (Mask & 1) == 0; - if (!SrcTy->isInteger()) return false; + return ComputeMaskedNonZeroBits(I->getOperand(0), Mask & 1, Depth+1); + if (!SrcTy->isInteger()) return Mask; // (cast X to int) & C2 == 0 iff could not have contained C2. - if (SrcTy->isUnsigned()) // Only handle zero ext. - return MaskedValueIsZero(I->getOperand(0), - Mask & SrcTy->getIntegralTypeMask(), Depth+1); - - // If this is a noop or trunc cast, recurse. - if (SrcTy->getPrimitiveSizeInBits() >= - I->getType()->getPrimitiveSizeInBits()) - return MaskedValueIsZero(I->getOperand(0), - Mask & SrcTy->getIntegralTypeMask(), Depth+1); + if (SrcTy->isUnsigned() || // Only handle zero ext/trunc/noop + SrcTy->getPrimitiveSizeInBits() >= + I->getType()->getPrimitiveSizeInBits()) { + Mask &= SrcTy->getIntegralTypeMask(); + return ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); + } + + // FIXME: handle sext casts. break; } case Instruction::Shl: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) - return MaskedValueIsZero(I->getOperand(0), Mask >> SA->getValue(), - Depth+1); + return ComputeMaskedNonZeroBits(I->getOperand(0),Mask >> SA->getValue(), + Depth+1); break; case Instruction::Shr: // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 @@ -471,13 +472,20 @@ if (I->getType()->isUnsigned()) { Mask <<= SA->getValue(); Mask &= I->getType()->getIntegralTypeMask(); - return MaskedValueIsZero(I->getOperand(0), Mask, Depth+1); + return ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); } break; } } - return false; + return Mask; +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +static bool MaskedValueIsZero(Value *V, uint64_t Mask, unsigned Depth = 0) { + return ComputeMaskedNonZeroBits(V, Mask, Depth) == 0; } /// SimplifyDemandedBits - Look at V. At this point, we know that only the Mask @@ -493,7 +501,9 @@ // just set the Mask to all bits. Mask = V->getType()->getIntegralTypeMask(); } else if (Mask == 0) { // Not demanding any bits from V. - return UpdateValueUsesWith(V, UndefValue::get(V->getType())); + if (V != UndefValue::get(V->getType())) + return UpdateValueUsesWith(V, UndefValue::get(V->getType())); + return false; } else if (Depth == 6) { // Limit search depth. return false; } @@ -509,15 +519,14 @@ if (SimplifyDemandedBits(I->getOperand(0), RHS->getRawValue() & Mask, Depth+1)) return true; - if (~Mask & RHS->getRawValue()) { + if (~Mask & RHS->getZExtValue()) { // If this is producing any bits that are not needed, simplify the RHS. - if (I->getType()->isSigned()) { - int64_t Val = Mask & cast(RHS)->getValue(); - I->setOperand(1, ConstantSInt::get(I->getType(), Val)); - } else { - uint64_t Val = Mask & cast(RHS)->getValue(); - I->setOperand(1, ConstantUInt::get(I->getType(), Val)); - } + uint64_t Val = Mask & RHS->getZExtValue(); + Constant *RHS = + ConstantUInt::get(I->getType()->getUnsignedVersion(), Val); + if (I->getType()->isSigned()) + RHS = ConstantExpr::getCast(RHS, I->getType()); + I->setOperand(1, RHS); return UpdateValueUsesWith(I, I); } } @@ -833,7 +842,7 @@ // X + (signbit) --> X ^ signbit if (ConstantInt *CI = dyn_cast(RHSC)) { - uint64_t Val = CI->getRawValue() & CI->getType()->getIntegralTypeMask(); + uint64_t Val = CI->getZExtValue(); if (Val == (1ULL << (CI->getType()->getPrimitiveSizeInBits()-1))) return BinaryOperator::createXor(LHS, RHS); } From evan.cheng at apple.com Tue Feb 7 02:35:56 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 7 Feb 2006 02:35:56 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/README.txt Message-ID: <200602070835.CAA26045@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: README.txt updated: 1.46 -> 1.47 --- Log message: Remind myself to add PIC and static asm printer support. --- Diffs of the changes: (+4 -0) README.txt | 4 ++++ 1 files changed, 4 insertions(+) Index: llvm/lib/Target/X86/README.txt diff -u llvm/lib/Target/X86/README.txt:1.46 llvm/lib/Target/X86/README.txt:1.47 --- llvm/lib/Target/X86/README.txt:1.46 Fri Feb 3 00:22:11 2006 +++ llvm/lib/Target/X86/README.txt Tue Feb 7 02:35:44 2006 @@ -382,3 +382,7 @@ addl $12, %esp ret +//===---------------------------------------------------------------------===// + +The x86 backend currently supports dynamic-no-pic. Need to add asm +printer support for static and PIC. From evan.cheng at apple.com Tue Feb 7 02:38:49 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 7 Feb 2006 02:38:49 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ATTAsmPrinter.cpp X86AsmPrinter.cpp X86ISelLowering.cpp X86IntelAsmPrinter.cpp Message-ID: <200602070838.CAA26109@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ATTAsmPrinter.cpp updated: 1.21 -> 1.22 X86AsmPrinter.cpp updated: 1.160 -> 1.161 X86ISelLowering.cpp updated: 1.78 -> 1.79 X86IntelAsmPrinter.cpp updated: 1.15 -> 1.16 --- Log message: Darwin ABI issues: weak, linkonce, etc. dynamic-no-pic support is complete. Also fixed a function stub bug. Added weak and linkonce support for x86 Linux. --- Diffs of the changes: (+109 -109) X86ATTAsmPrinter.cpp | 70 ++++++++++++++++---------- X86AsmPrinter.cpp | 130 ++++++++++++++++++++----------------------------- X86ISelLowering.cpp | 12 ++-- X86IntelAsmPrinter.cpp | 6 +- 4 files changed, 109 insertions(+), 109 deletions(-) Index: llvm/lib/Target/X86/X86ATTAsmPrinter.cpp diff -u llvm/lib/Target/X86/X86ATTAsmPrinter.cpp:1.21 llvm/lib/Target/X86/X86ATTAsmPrinter.cpp:1.22 --- llvm/lib/Target/X86/X86ATTAsmPrinter.cpp:1.21 Mon Feb 6 17:41:19 2006 +++ llvm/lib/Target/X86/X86ATTAsmPrinter.cpp Tue Feb 7 02:38:37 2006 @@ -33,12 +33,32 @@ EmitConstantPool(MF.getConstantPool()); // Print out labels for the function. - SwitchSection("\t.text\n", MF.getFunction()); - EmitAlignment(4); // FIXME: This should be parameterized somewhere. - if (!MF.getFunction()->hasInternalLinkage()) + const Function *F = MF.getFunction(); + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + SwitchSection(".text", F); + EmitAlignment(4, F); // FIXME: This should be parameterized somewhere. + break; + case Function::ExternalLinkage: + SwitchSection(".text", F); + EmitAlignment(4, F); // FIXME: This should be parameterized somewhere. O << "\t.globl\t" << CurrentFnName << "\n"; - if (HasDotTypeDotSizeDirective) - O << "\t.type\t" << CurrentFnName << ", @function\n"; + break; + case Function::WeakLinkage: + case Function::LinkOnceLinkage: + if (forDarwin) { + SwitchSection(".section __TEXT,__textcoal_nt,coalesced,pure_instructions", + F); + O << "\t.weak_definition\t" << CurrentFnName << "\n"; + } else { + EmitAlignment(4, F); // FIXME: This should be parameterized somewhere. + O << "\t.section\t.llvm.linkonce.t." << CurrentFnName + << ",\"ax\", at progbits\n"; + O << "\t.weak " << CurrentFnName << "\n"; + } + break; + } O << CurrentFnName << ":\n"; // Print out code for the function. @@ -95,27 +115,24 @@ return; case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); - // Darwin block shameless ripped from PowerPCAsmPrinter.cpp + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + // Darwin block shameless ripped from PPCAsmPrinter.cpp if (forDarwin) { - if (!isCallOp) O << '$'; + if (!isMemOp && !isCallOp) O << '$'; GlobalValue *GV = MO.getGlobal(); std::string Name = Mang->getValueName(GV); - - // Dynamically-resolved functions need a stub for the function. Be - // wary however not to output $stub for external functions whose addresses - // are taken. Those should be emitted as $non_lazy_ptr below. - Function *F = dyn_cast(GV); - if (F && isCallOp && F->isExternal()) { - FnStubs.insert(Name); - O << "L" << Name << "$stub"; - } else if (GV->hasLinkOnceLinkage()) { - // Link-once, External, or Weakly-linked global variables need - // non-lazily-resolved stubs - LinkOnceStubs.insert(Name); - O << "L" << Name << "$non_lazy_ptr"; - } else if (GV->isExternal() || GV->hasWeakLinkage()) { - GVStubs.insert(Name); - O << "L" << Name << "$non_lazy_ptr"; + // Link-once, External, or Weakly-linked global variables need + // non-lazily-resolved stubs + if (GV->isExternal() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()) { + // Dynamically-resolved functions need a stub for the function. + if (isCallOp && isa(GV) && cast(GV)->isExternal()) { + FnStubs.insert(Name); + O << "L" << Name << "$stub"; + } else { + GVStubs.insert(Name); + O << "L" << Name << "$non_lazy_ptr"; + } } else { O << Mang->getValueName(GV); } @@ -126,7 +143,7 @@ O << Offset; return; } - if (!isCallOp) O << '$'; + if (!isMemOp && !isCallOp) O << '$'; O << Mang->getValueName(MO.getGlobal()); int Offset = MO.getOffset(); if (Offset > 0) @@ -137,13 +154,14 @@ } case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); if (isCallOp && forDarwin) { std::string Name(GlobalPrefix); Name += MO.getSymbolName(); FnStubs.insert(Name); O << "L" << Name << "$stub"; return; } - if (!isCallOp) O << '$'; + if (!isMemOp && !isCallOp) O << '$'; O << GlobalPrefix << MO.getSymbolName(); return; } @@ -197,7 +215,7 @@ } if (DispSpec.isGlobalAddress()) { - printOperand(MI, Op+3, "call"); + printOperand(MI, Op+3, "mem"); } else { int DispVal = DispSpec.getImmedValue(); if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) Index: llvm/lib/Target/X86/X86AsmPrinter.cpp diff -u llvm/lib/Target/X86/X86AsmPrinter.cpp:1.160 llvm/lib/Target/X86/X86AsmPrinter.cpp:1.161 --- llvm/lib/Target/X86/X86AsmPrinter.cpp:1.160 Sat Feb 4 19:45:04 2006 +++ llvm/lib/Target/X86/X86AsmPrinter.cpp Tue Feb 7 02:38:37 2006 @@ -80,113 +80,93 @@ const TargetData &TD = TM.getTargetData(); // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { if (!I->hasInitializer()) continue; // External global require no code // Check to see if this is a special global used by LLVM, if so, emit it. if (I->hasAppendingLinkage() && EmitSpecialLLVMGlobal(I)) continue; - O << "\n\n"; std::string name = Mang->getValueName(I); Constant *C = I->getInitializer(); unsigned Size = TD.getTypeSize(C->getType()); unsigned Align = getPreferredAlignmentLog(I); - switch (I->getLinkage()) { - default: assert(0 && "Unknown linkage type!"); - case GlobalValue::LinkOnceLinkage: - case GlobalValue::WeakLinkage: // FIXME: Verify correct for weak. - if (C->isNullValue()) { - O << COMMDirective << name << "," << Size; - if (COMMDirectiveTakesAlignment) - O << "," << (1 << Align); - O << "\t\t" << CommentString << " " << I->getName() << "\n"; - continue; - } - - // Nonnull linkonce -> weak - O << "\t.weak " << name << "\n"; - O << "\t.section\t.llvm.linkonce.d." << name << ",\"aw\", at progbits\n"; - SwitchSection("", I); - break; - case GlobalValue::InternalLinkage: - if (C->isNullValue()) { - if (LCOMMDirective) { + if (C->isNullValue() && /* FIXME: Verify correct */ + (I->hasInternalLinkage() || I->hasWeakLinkage() || + I->hasLinkOnceLinkage())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + if (forDarwin) { + SwitchSection(".data", I); + if (I->hasInternalLinkage()) O << LCOMMDirective << name << "," << Size << "," << Align; - continue; + else + O << COMMDirective << name << "," << Size; + } else { + SwitchSection(".local", I); + O << COMMDirective << name << "," << Size << "," << Align; + } + O << "\t\t" << CommentString << " '" << I->getName() << "'\n"; + } else { + switch (I->getLinkage()) { + case GlobalValue::LinkOnceLinkage: + case GlobalValue::WeakLinkage: + if (forDarwin) { + O << "\t.globl " << name << '\n' + << "\t.weak_definition " << name << '\n'; + SwitchSection(".section __DATA,__datacoal_nt,coalesced", I); } else { - SwitchSection(".bss", I); - O << "\t.local " << name << "\n"; - O << COMMDirective << name << "," << Size; - if (COMMDirectiveTakesAlignment) - O << "," << (1 << Align); - O << "\t\t# "; - WriteAsOperand(O, I, true, true, &M); - O << "\n"; - continue; + O << "\t.section\t.llvm.linkonce.d." << name << ",\"aw\", at progbits\n"; + O << "\t.weak " << name << "\n"; } + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << "\n"; + // FALL THROUGH + case GlobalValue::InternalLinkage: + SwitchSection(".data", I); + break; + default: + assert(0 && "Unknown linkage type!"); } - SwitchSection(".data", I); - break; - case GlobalValue::AppendingLinkage: - // FIXME: appending linkage variables should go into a section of - // their name or something. For now, just emit them as external. - case GlobalValue::ExternalLinkage: - SwitchSection(C->isNullValue() ? ".bss" : ".data", I); - // If external or appending, declare as a global symbol - O << "\t.globl " << name << "\n"; - break; - } - EmitAlignment(Align); - if (HasDotTypeDotSizeDirective) { - O << "\t.type " << name << ", at object\n"; - O << "\t.size " << name << "," << Size << "\n"; + EmitAlignment(Align, I); + O << name << ":\t\t\t\t" << CommentString << " '" << I->getName() + << "'\n"; + EmitGlobalConstant(C); + O << '\n'; } - O << name << ":\t\t\t" << CommentString << ' ' << I->getName() << '\n'; - EmitGlobalConstant(C); } if (forDarwin) { SwitchSection("", 0); - // Output stubs for external global variables - if (GVStubs.begin() != GVStubs.end()) - O << "\t.non_lazy_symbol_pointer\n"; - for (std::set::iterator i = GVStubs.begin(), e = GVStubs.end(); - i != e; ++i) { - O << "L" << *i << "$non_lazy_ptr:\n"; - O << "\t.indirect_symbol " << *i << "\n"; - O << "\t.long\t0\n"; - } // Output stubs for dynamically-linked functions unsigned j = 1; for (std::set::iterator i = FnStubs.begin(), e = FnStubs.end(); i != e; ++i, ++j) { - O << "\t.symbol_stub\n"; + SwitchSection(".section __IMPORT,__jump_table,symbol_stubs," + "self_modifying_code+pure_instructions,5", 0); O << "L" << *i << "$stub:\n"; O << "\t.indirect_symbol " << *i << "\n"; - O << "\tjmp\t*L" << j << "$lz\n"; - O << "L" << *i << "$stub_binder:\n"; - O << "\tpushl\t$L" << j << "$lz\n"; - O << "\tjmp\tdyld_stub_binding_helper\n"; - O << "\t.section __DATA, __la_sym_ptr3,lazy_symbol_pointers\n"; - O << "L" << j << "$lz:\n"; - O << "\t.indirect_symbol " << *i << "\n"; - O << "\t.long\tL" << *i << "$stub_binder\n"; + O << "\thlt ; hlt ; hlt ; hlt ; hlt\n"; } O << "\n"; - // Output stubs for link-once variables - if (LinkOnceStubs.begin() != LinkOnceStubs.end()) - O << ".data\n.align 2\n"; - for (std::set::iterator i = LinkOnceStubs.begin(), - e = LinkOnceStubs.end(); i != e; ++i) { - O << "L" << *i << "$non_lazy_ptr:\n" - << "\t.long\t" << *i << '\n'; + // Output stubs for external and common global variables. + if (GVStubs.begin() != GVStubs.end()) + SwitchSection(".section __IMPORT,__pointers,non_lazy_symbol_pointers", 0); + for (std::set::iterator i = GVStubs.begin(), e = GVStubs.end(); + i != e; ++i) { + O << "L" << *i << "$non_lazy_ptr:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\t.long\t0\n"; } } Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.78 llvm/lib/Target/X86/X86ISelLowering.cpp:1.79 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.78 Fri Feb 3 20:20:30 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Tue Feb 7 02:38:37 2006 @@ -1876,11 +1876,13 @@ // the GlobalAddress must be in the base or index register of the address, // not the GV offset field. if (getTargetMachine(). - getSubtarget().getIndirectExternAndWeakGlobals() && - (GV->hasWeakLinkage() || GV->isExternal())) - Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), - DAG.getTargetGlobalAddress(GV, getPointerTy()), - DAG.getSrcValue(NULL)); + getSubtarget().getIndirectExternAndWeakGlobals()) { + if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || + (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())) + Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), + DAG.getTargetGlobalAddress(GV, getPointerTy()), + DAG.getSrcValue(NULL)); + } return Result; } case ISD::VASTART: { Index: llvm/lib/Target/X86/X86IntelAsmPrinter.cpp diff -u llvm/lib/Target/X86/X86IntelAsmPrinter.cpp:1.15 llvm/lib/Target/X86/X86IntelAsmPrinter.cpp:1.16 --- llvm/lib/Target/X86/X86IntelAsmPrinter.cpp:1.15 Mon Feb 6 17:41:19 2006 +++ llvm/lib/Target/X86/X86IntelAsmPrinter.cpp Tue Feb 7 02:38:37 2006 @@ -109,7 +109,7 @@ abort (); return; case MachineOperand::MO_GlobalAddress: { - if (!Modifier || strcmp(Modifier, "call")) + if (!Modifier || strcmp(Modifier, "call") || strcmp(Modifier, "mem")) O << "OFFSET "; O << Mang->getValueName(MO.getGlobal()); int Offset = MO.getOffset(); @@ -161,7 +161,7 @@ O << "["; bool NeedPlus = false; if (BaseReg.getReg()) { - printOp(BaseReg, "call"); + printOp(BaseReg, "mem"); NeedPlus = true; } @@ -176,7 +176,7 @@ if (DispSpec.isGlobalAddress()) { if (NeedPlus) O << " + "; - printOp(DispSpec, "call"); + printOp(DispSpec, "mem"); } else { int DispVal = DispSpec.getImmedValue(); if (DispVal || (!BaseReg.getReg() && !IndexReg.getReg())) { From lattner at cs.uiuc.edu Tue Feb 7 13:07:37 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 13:07:37 -0600 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/InstCombine/2006-02-07-SextZextCrash.ll Message-ID: <200602071907.NAA15276@zion.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/InstCombine: 2006-02-07-SextZextCrash.ll added (r1.1) --- Log message: new testcase that caused instcombine to crash on 176.gcc last night. --- Diffs of the changes: (+22 -0) 2006-02-07-SextZextCrash.ll | 22 ++++++++++++++++++++++ 1 files changed, 22 insertions(+) Index: llvm/test/Regression/Transforms/InstCombine/2006-02-07-SextZextCrash.ll diff -c /dev/null llvm/test/Regression/Transforms/InstCombine/2006-02-07-SextZextCrash.ll:1.1 *** /dev/null Tue Feb 7 13:07:36 2006 --- llvm/test/Regression/Transforms/InstCombine/2006-02-07-SextZextCrash.ll Tue Feb 7 13:07:25 2006 *************** *** 0 **** --- 1,22 ---- + ; RUN: llvm-as < %s | opt -instcombine -disable-output + + %struct.rtx_const = type { uint, { %union.real_extract } } + %struct.rtx_def = type { int, [1 x %union.rtunion_def] } + %union.real_extract = type { double } + %union.rtunion_def = type { uint } + + implementation ; Functions: + + fastcc void %decode_rtx_const(%struct.rtx_def* %x, %struct.rtx_const* %value) { + %tmp.54 = getelementptr %struct.rtx_const* %value, int 0, uint 0 ; [#uses=1] + %tmp.56 = getelementptr %struct.rtx_def* %x, int 0, uint 0 ; [#uses=1] + %tmp.57 = load int* %tmp.56 ; [#uses=1] + %tmp.58 = shl int %tmp.57, ubyte 8 ; [#uses=1] + %tmp.59 = shr int %tmp.58, ubyte 24 ; [#uses=1] + %tmp.60 = cast int %tmp.59 to ushort ; [#uses=1] + %tmp.61 = cast ushort %tmp.60 to uint ; [#uses=1] + %tmp.62 = shl uint %tmp.61, ubyte 16 ; [#uses=1] + %tmp.65 = or uint 0, %tmp.62 ; [#uses=1] + store uint %tmp.65, uint* %tmp.54 + ret void + } From lattner at cs.uiuc.edu Tue Feb 7 13:07:52 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 13:07:52 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602071907.NAA15289@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.422 -> 1.423 --- Log message: Fix Transforms/InstCombine/2006-02-07-SextZextCrash.ll --- Diffs of the changes: (+5 -3) InstructionCombining.cpp | 8 +++++--- 1 files changed, 5 insertions(+), 3 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.422 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.423 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.422 Tue Feb 7 02:05:22 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Feb 7 13:07:40 2006 @@ -563,10 +563,12 @@ // extend instead of a sign extend. if ((Mask & ((1ULL << SrcBits)-1)) == 0) { // Convert to unsigned first. - Value *NewVal; + Instruction *NewVal; NewVal = new CastInst(I->getOperand(0), SrcTy->getUnsignedVersion(), - I->getOperand(0)->getName(), I); - NewVal = new CastInst(I->getOperand(0), I->getType(), I->getName()); + I->getOperand(0)->getName()); + InsertNewInstBefore(NewVal, *I); + NewVal = new CastInst(NewVal, I->getType(), I->getName()); + InsertNewInstBefore(NewVal, *I); return UpdateValueUsesWith(I, NewVal); } From lattner at cs.uiuc.edu Tue Feb 7 14:13:57 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 14:13:57 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/Target/TargetLowering.h Message-ID: <200602072013.OAA18950@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Target: TargetLowering.h updated: 1.47 -> 1.48 --- Log message: getConstraintType should be virtual. --- Diffs of the changes: (+3 -1) TargetLowering.h | 4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) Index: llvm/include/llvm/Target/TargetLowering.h diff -u llvm/include/llvm/Target/TargetLowering.h:1.47 llvm/include/llvm/Target/TargetLowering.h:1.48 --- llvm/include/llvm/Target/TargetLowering.h:1.47 Fri Feb 3 20:12:09 2006 +++ llvm/include/llvm/Target/TargetLowering.h Tue Feb 7 14:13:44 2006 @@ -451,7 +451,7 @@ /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. - ConstraintType getConstraintType(char ConstraintLetter) const; + virtual ConstraintType getConstraintType(char ConstraintLetter) const; /// getRegForInlineAsmConstraint - Given a constraint letter or register /// name (e.g. "r" or "edx"), return a list of registers that can be used to @@ -460,6 +460,8 @@ virtual std::vector getRegForInlineAsmConstraint(const std::string &Constraint) const; + /// isOperandValidForConstraint - Return true if the specified SDOperand is + /// valid for the specified target constraint letter. virtual bool isOperandValidForConstraint(SDOperand Op, char ConstraintLetter); //===--------------------------------------------------------------------===// From lattner at cs.uiuc.edu Tue Feb 7 14:16:42 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 14:16:42 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelLowering.cpp PPCISelLowering.h Message-ID: <200602072016.OAA19036@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCISelLowering.cpp updated: 1.85 -> 1.86 PPCISelLowering.h updated: 1.20 -> 1.21 --- Log message: Implement getConstraintType for PPC. --- Diffs of the changes: (+18 -0) PPCISelLowering.cpp | 17 +++++++++++++++++ PPCISelLowering.h | 1 + 2 files changed, 18 insertions(+) Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.85 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.86 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.85 Mon Feb 6 18:47:13 2006 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp Tue Feb 7 14:16:30 2006 @@ -980,6 +980,23 @@ return BB; } +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +PPCTargetLowering::ConstraintType +PPCTargetLowering::getConstraintType(char ConstraintLetter) const { + switch (ConstraintLetter) { + default: break; + case 'b': + case 'r': + case 'f': + case 'v': + case 'y': + return C_RegisterClass; + } + return TargetLowering::getConstraintType(ConstraintLetter); +} + + std::vector PPCTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint) const { if (Constraint.size() == 1) { Index: llvm/lib/Target/PowerPC/PPCISelLowering.h diff -u llvm/lib/Target/PowerPC/PPCISelLowering.h:1.20 llvm/lib/Target/PowerPC/PPCISelLowering.h:1.21 --- llvm/lib/Target/PowerPC/PPCISelLowering.h:1.20 Mon Feb 6 18:47:13 2006 +++ llvm/lib/Target/PowerPC/PPCISelLowering.h Tue Feb 7 14:16:30 2006 @@ -97,6 +97,7 @@ virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI, MachineBasicBlock *MBB); + ConstraintType getConstraintType(char ConstraintLetter) const; std::vector getRegForInlineAsmConstraint(const std::string &Constraint) const; bool isOperandValidForConstraint(SDOperand Op, char ConstraintLetter); From evan.cheng at apple.com Tue Feb 7 15:54:21 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 7 Feb 2006 15:54:21 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86AsmPrinter.cpp Message-ID: <200602072154.PAA20019@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86AsmPrinter.cpp updated: 1.161 -> 1.162 --- Log message: For ELF, .comm takes alignment value as the optional 3rd argument. It must be specified in bytes. --- Diffs of the changes: (+3 -1) X86AsmPrinter.cpp | 4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) Index: llvm/lib/Target/X86/X86AsmPrinter.cpp diff -u llvm/lib/Target/X86/X86AsmPrinter.cpp:1.161 llvm/lib/Target/X86/X86AsmPrinter.cpp:1.162 --- llvm/lib/Target/X86/X86AsmPrinter.cpp:1.161 Tue Feb 7 02:38:37 2006 +++ llvm/lib/Target/X86/X86AsmPrinter.cpp Tue Feb 7 15:54:08 2006 @@ -105,7 +105,9 @@ O << COMMDirective << name << "," << Size; } else { SwitchSection(".local", I); - O << COMMDirective << name << "," << Size << "," << Align; + O << COMMDirective << name << "," << Size; + if (COMMDirectiveTakesAlignment) + O << "," << (AlignmentIsInBytes ? (1 << Align) : Align); } O << "\t\t" << CommentString << " '" << I->getName() << "'\n"; } else { From evan.cheng at apple.com Tue Feb 7 17:33:10 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 7 Feb 2006 17:33:10 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86AsmPrinter.cpp Message-ID: <200602072333.RAA20938@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86AsmPrinter.cpp updated: 1.162 -> 1.163 --- Log message: Fixed a local common symbol bug. --- Diffs of the changes: (+17 -11) X86AsmPrinter.cpp | 28 +++++++++++++++++----------- 1 files changed, 17 insertions(+), 11 deletions(-) Index: llvm/lib/Target/X86/X86AsmPrinter.cpp diff -u llvm/lib/Target/X86/X86AsmPrinter.cpp:1.162 llvm/lib/Target/X86/X86AsmPrinter.cpp:1.163 --- llvm/lib/Target/X86/X86AsmPrinter.cpp:1.162 Tue Feb 7 15:54:08 2006 +++ llvm/lib/Target/X86/X86AsmPrinter.cpp Tue Feb 7 17:32:58 2006 @@ -97,26 +97,29 @@ (I->hasInternalLinkage() || I->hasWeakLinkage() || I->hasLinkOnceLinkage())) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - if (forDarwin) { - SwitchSection(".data", I); - if (I->hasInternalLinkage()) - O << LCOMMDirective << name << "," << Size << "," << Align; - else + SwitchSection(".data", I); + if (LCOMMDirective != NULL) { + if (I->hasInternalLinkage()) { + O << LCOMMDirective << name << "," << Size; + if (forDarwin) + O << "," << (AlignmentIsInBytes ? (1 << Align) : Align); + } else O << COMMDirective << name << "," << Size; } else { - SwitchSection(".local", I); + if (I->hasInternalLinkage()) + O <<"\t.local\t" << name << "\n"; O << COMMDirective << name << "," << Size; if (COMMDirectiveTakesAlignment) O << "," << (AlignmentIsInBytes ? (1 << Align) : Align); } - O << "\t\t" << CommentString << " '" << I->getName() << "'\n"; + O << "\t\t" << CommentString << " " << I->getName() << "\n"; } else { switch (I->getLinkage()) { case GlobalValue::LinkOnceLinkage: case GlobalValue::WeakLinkage: if (forDarwin) { - O << "\t.globl " << name << '\n' - << "\t.weak_definition " << name << '\n'; + O << "\t.globl " << name << "\n" + << "\t.weak_definition " << name << "\n"; SwitchSection(".section __DATA,__datacoal_nt,coalesced", I); } else { O << "\t.section\t.llvm.linkonce.d." << name << ",\"aw\", at progbits\n"; @@ -138,8 +141,11 @@ } EmitAlignment(Align, I); - O << name << ":\t\t\t\t" << CommentString << " '" << I->getName() - << "'\n"; + O << name << ":\t\t\t\t" << CommentString << " " << I->getName() + << "\n"; + if (HasDotTypeDotSizeDirective) + O << "\t.size " << name << ", " << Size << "\n"; + EmitGlobalConstant(C); O << '\n'; } From lattner at cs.uiuc.edu Tue Feb 7 19:20:35 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 19:20:35 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602080120.TAA21670@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.423 -> 1.424 --- Log message: Fix a problem in my patch yesterday, causing a miscompilation of 176.gcc --- Diffs of the changes: (+3 -2) InstructionCombining.cpp | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.423 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.424 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.423 Tue Feb 7 13:07:40 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Feb 7 19:20:23 2006 @@ -464,7 +464,7 @@ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) return ComputeMaskedNonZeroBits(I->getOperand(0),Mask >> SA->getValue(), - Depth+1); + Depth+1) << SA->getValue(); break; case Instruction::Shr: // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 @@ -472,7 +472,8 @@ if (I->getType()->isUnsigned()) { Mask <<= SA->getValue(); Mask &= I->getType()->getIntegralTypeMask(); - return ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); + return ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1) + >> SA->getValue(); } break; } From lattner at cs.uiuc.edu Tue Feb 7 20:05:58 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 20:05:58 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/ValueTypes.h Message-ID: <200602080205.UAA22790@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: ValueTypes.h updated: 1.16 -> 1.17 --- Log message: Add some happy helper methods. --- Diffs of the changes: (+15 -1) ValueTypes.h | 16 +++++++++++++++- 1 files changed, 15 insertions(+), 1 deletion(-) Index: llvm/include/llvm/CodeGen/ValueTypes.h diff -u llvm/include/llvm/CodeGen/ValueTypes.h:1.16 llvm/include/llvm/CodeGen/ValueTypes.h:1.17 --- llvm/include/llvm/CodeGen/ValueTypes.h:1.16 Thu Dec 29 18:09:56 2005 +++ llvm/include/llvm/CodeGen/ValueTypes.h Tue Feb 7 20:05:45 2006 @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_VALUETYPES_H #include +#include "llvm/Support/DataTypes.h" namespace llvm { class Type; @@ -67,7 +68,7 @@ static inline bool isVector(ValueType VT) { return (VT >= v16i8 && VT <= v2f64); } - + /// getVectorType - Returns the ValueType that represents a vector NumElements /// in length, where each element is of type VT. If there is no ValueType /// that represents this vector, a ValueType of Other is returned. @@ -107,6 +108,19 @@ case MVT::v2f64: return 128; } } + + /// getIntVTBitMask - Return an integer with 1's every place there are bits + /// in the specified integer value type. + static inline uint64_t getIntVTBitMask(ValueType VT) { + assert(isInteger(VT) && !isVector(VT) && "Only applies to int scalars!"); + return ~0ULL >> (64-getSizeInBits(VT)); + } + /// getIntVTSignBit - Return an integer with a 1 in the position of the sign + /// bit for the specified integer value type. + static inline uint64_t getIntVTSignBit(ValueType VT) { + assert(isInteger(VT) && !isVector(VT) && "Only applies to int scalars!"); + return 1ULL << (getSizeInBits(VT)-1); + } /// MVT::getValueTypeString - This function returns value type as a string, /// e.g. "i32". From lattner at cs.uiuc.edu Tue Feb 7 20:13:27 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 20:13:27 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <200602080213.UAA22948@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: DAGCombiner.cpp updated: 1.99 -> 1.100 --- Log message: Compile this: xori r6, r2, 1 rlwinm r6, r6, 0, 31, 31 cmpwi cr0, r6, 0 bne cr0, LBB1_3 ; endif to this: rlwinm r6, r2, 0, 31, 31 cmpwi cr0, r6, 0 beq cr0, LBB1_3 ; endif --- Diffs of the changes: (+26 -0) DAGCombiner.cpp | 26 ++++++++++++++++++++++++++ 1 files changed, 26 insertions(+) Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp diff -u llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.99 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.100 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.99 Sun Feb 5 02:23:00 2006 +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Feb 7 20:13:15 2006 @@ -2544,6 +2544,32 @@ DAG.getConstant(C1 & (~0ULL>>(64-ExtSrcTyBits)), ExtDstTy), Cond); + } else if ((N1C->getValue() == 0 || N1C->getValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && + (N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa(N0.getOperand(1)) && + cast(N0.getOperand(1))->getValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We can + // only do this if the top bits are known zero. + if (TLI.MaskedValueIsZero(N1, + MVT::getIntVTBitMask(N0.getValueType())-1)) { + // Okay, get the un-inverted input value. + SDOperand Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, N0.getValueType(), + N0.getOperand(0).getOperand(0), N0.getOperand(1)); + } + return DAG.getSetCC(VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } } uint64_t MinVal, MaxVal; From lattner at cs.uiuc.edu Tue Feb 7 20:37:53 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 20:37:53 -0600 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/SCCP/select.ll Message-ID: <200602080237.UAA23515@zion.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/SCCP: select.ll added (r1.1) --- Log message: new testcase for more interesting select sccp cases --- Diffs of the changes: (+12 -0) select.ll | 12 ++++++++++++ 1 files changed, 12 insertions(+) Index: llvm/test/Regression/Transforms/SCCP/select.ll diff -c /dev/null llvm/test/Regression/Transforms/SCCP/select.ll:1.1 *** /dev/null Tue Feb 7 20:37:50 2006 --- llvm/test/Regression/Transforms/SCCP/select.ll Tue Feb 7 20:37:40 2006 *************** *** 0 **** --- 1,12 ---- + ; RUN: llvm-as < %s | opt -sccp -disable-output && + ; RUN: llvm-as < %s | opt -sccp | llvm-dis | not grep select + + int %test1(bool %C) { + %X = select bool %C, int 0, int 0 + ret int %X + } + + int %test2(bool %C) { + %X = select bool %C, int 0, int undef + ret int %X + } From lattner at cs.uiuc.edu Tue Feb 7 20:38:22 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 20:38:22 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/SCCP.cpp Message-ID: <200602080238.UAA23550@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: SCCP.cpp updated: 1.128 -> 1.129 --- Log message: Implement some more interesting select sccp cases. This implements: test/Regression/Transforms/SCCP/select.ll --- Diffs of the changes: (+35 -15) SCCP.cpp | 50 +++++++++++++++++++++++++++++++++++--------------- 1 files changed, 35 insertions(+), 15 deletions(-) Index: llvm/lib/Transforms/Scalar/SCCP.cpp diff -u llvm/lib/Transforms/Scalar/SCCP.cpp:1.128 llvm/lib/Transforms/Scalar/SCCP.cpp:1.129 --- llvm/lib/Transforms/Scalar/SCCP.cpp:1.128 Sun Jan 22 17:32:06 2006 +++ llvm/lib/Transforms/Scalar/SCCP.cpp Tue Feb 7 20:38:11 2006 @@ -241,6 +241,11 @@ else if (IV.getConstant() != MergeWithV.getConstant()) markOverdefined(IV, V); } + + inline void mergeInValue(Value *V, LatticeVal &MergeWithV) { + return mergeInValue(ValueState[V], V, MergeWithV); + } + // getValueState - Return the LatticeVal object that corresponds to the value. // This function is necessary because not all values should start out in the @@ -589,23 +594,38 @@ void SCCPSolver::visitSelectInst(SelectInst &I) { LatticeVal &CondValue = getValueState(I.getCondition()); - if (CondValue.isOverdefined()) - markOverdefined(&I); - else if (CondValue.isConstant()) { + if (CondValue.isUndefined()) + return; + if (CondValue.isConstant()) { + Value *InVal = 0; if (CondValue.getConstant() == ConstantBool::True) { - LatticeVal &Val = getValueState(I.getTrueValue()); - if (Val.isOverdefined()) - markOverdefined(&I); - else if (Val.isConstant()) - markConstant(&I, Val.getConstant()); + mergeInValue(&I, getValueState(I.getTrueValue())); + return; } else if (CondValue.getConstant() == ConstantBool::False) { - LatticeVal &Val = getValueState(I.getFalseValue()); - if (Val.isOverdefined()) - markOverdefined(&I); - else if (Val.isConstant()) - markConstant(&I, Val.getConstant()); - } else - markOverdefined(&I); + mergeInValue(&I, getValueState(I.getFalseValue())); + return; + } + } + + // Otherwise, the condition is overdefined or a constant we can't evaluate. + // See if we can produce something better than overdefined based on the T/F + // value. + LatticeVal &TVal = getValueState(I.getTrueValue()); + LatticeVal &FVal = getValueState(I.getFalseValue()); + + // select ?, C, C -> C. + if (TVal.isConstant() && FVal.isConstant() && + TVal.getConstant() == FVal.getConstant()) { + markConstant(&I, FVal.getConstant()); + return; + } + + if (TVal.isUndefined()) { // select ?, undef, X -> X. + mergeInValue(&I, FVal); + } else if (FVal.isUndefined()) { // select ?, X, undef -> X. + mergeInValue(&I, TVal); + } else { + markOverdefined(&I); } } From lattner at cs.uiuc.edu Tue Feb 7 21:25:44 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue, 7 Feb 2006 21:25:44 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602080325.VAA24139@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.424 -> 1.425 --- Log message: Use EraseInstFromFunction in a few cases to put the uses of the removed instruction onto the worklist (in case they are now dead). Add a really trivial local DSE implementation to help out bitfield code. We now fold this: struct S { unsigned char a : 1, b : 1, c : 1, d : 2, e : 3; S(); }; S::S() : a(0), b(0), c(1), d(0), e(6) {} to this: void %_ZN1SC1Ev(%struct.S* %this) { entry: %tmp.1 = getelementptr %struct.S* %this, int 0, uint 0 store ubyte 38, ubyte* %tmp.1 ret void } much earlier (in gccas instead of only in gccld after DSE runs). --- Diffs of the changes: (+37 -15) InstructionCombining.cpp | 52 +++++++++++++++++++++++++++++++++-------------- 1 files changed, 37 insertions(+), 15 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.424 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.425 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.424 Tue Feb 7 19:20:23 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Feb 7 21:25:32 2006 @@ -60,6 +60,7 @@ Statistic<> NumCombined ("instcombine", "Number of insts combined"); Statistic<> NumConstProp("instcombine", "Number of constant folds"); Statistic<> NumDeadInst ("instcombine", "Number of dead inst eliminated"); + Statistic<> NumDeadStore("instcombine", "Number of dead stores eliminated"); Statistic<> NumSunkInst ("instcombine", "Number of instructions sunk"); class InstCombiner : public FunctionPass, @@ -1923,6 +1924,11 @@ if (Op0 == Op1) return ReplaceInstUsesWith(I, Op1); + // See if we can simplify any instructions used by the LHS whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask())) + return &I; + if (ConstantIntegral *AndRHS = dyn_cast(Op1)) { // and X, -1 == X if (AndRHS->isAllOnesValue()) @@ -1947,11 +1953,6 @@ if (MaskedValueIsZero(Op0, NotAndRHS)) return ReplaceInstUsesWith(I, Op0); - // See if we can simplify any instructions used by the LHS whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedBits(Op0, AndRHS->getRawValue())) - return &I; - // Optimize a variety of ((val OP C1) & C2) combinations... if (isa(Op0) || isa(Op0)) { Instruction *Op0I = cast(Op0); @@ -5898,13 +5899,37 @@ Value *Ptr = SI.getOperand(1); if (isa(Ptr)) { // store X, undef -> noop (even if volatile) - removeFromWorkList(&SI); - SI.eraseFromParent(); + EraseInstFromFunction(SI); ++NumCombined; return 0; } - if (SI.isVolatile()) return 0; // Don't hack volatile loads. + // Do really simple DSE, to catch cases where there are several consequtive + // stores to the same location, separated by a few arithmetic operations. This + // situation often occurs with bitfield accesses. + BasicBlock::iterator BBI = &SI; + for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; + --ScanInsts) { + --BBI; + + if (StoreInst *PrevSI = dyn_cast(BBI)) { + // Prev store isn't volatile, and stores to the same location? + if (!PrevSI->isVolatile() && PrevSI->getOperand(1) == SI.getOperand(1)) { + ++NumDeadStore; + ++BBI; + EraseInstFromFunction(*PrevSI); + continue; + } + break; + } + + // Don't skip over loads or things that can modify memory. + if (BBI->mayWriteToMemory() || isa(BBI)) + break; + } + + + if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa(Ptr)) { @@ -5919,8 +5944,7 @@ // store undef, Ptr -> noop if (isa(Val)) { - removeFromWorkList(&SI); - SI.eraseFromParent(); + EraseInstFromFunction(SI); ++NumCombined; return 0; } @@ -5938,7 +5962,7 @@ // If this store is the last instruction in the basic block, and if the block // ends with an unconditional branch, try to move it to the successor block. - BasicBlock::iterator BBI = &SI; ++BBI; + BBI = &SI; ++BBI; if (BranchInst *BI = dyn_cast(BBI)) if (BI->isUnconditional()) { // Check to see if the successor block has exactly two incoming edges. If @@ -5990,10 +6014,8 @@ OtherStore->isVolatile()), *BBI); // Nuke the old stores. - removeFromWorkList(&SI); - removeFromWorkList(OtherStore); - SI.eraseFromParent(); - OtherStore->eraseFromParent(); + EraseInstFromFunction(SI); + EraseInstFromFunction(*OtherStore); ++NumCombined; return 0; } From lattner at cs.uiuc.edu Wed Feb 8 00:44:03 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 00:44:03 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/README.txt Message-ID: <200602080644.AAA24924@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: README.txt updated: 1.62 -> 1.63 --- Log message: Move emails from nate into public places --- Diffs of the changes: (+17 -0) README.txt | 17 +++++++++++++++++ 1 files changed, 17 insertions(+) Index: llvm/lib/Target/PowerPC/README.txt diff -u llvm/lib/Target/PowerPC/README.txt:1.62 llvm/lib/Target/PowerPC/README.txt:1.63 --- llvm/lib/Target/PowerPC/README.txt:1.62 Sat Feb 4 23:27:35 2006 +++ llvm/lib/Target/PowerPC/README.txt Wed Feb 8 00:43:51 2006 @@ -279,6 +279,23 @@ cmpwi cr0, r7, 0 bne cr0, LBB_compare_4 ; loopexit +FreeBench/mason has a basic block that looks like this: + + %tmp.130 = seteq int %p.0__, 5 ; [#uses=1] + %tmp.134 = seteq int %p.1__, 6 ; [#uses=1] + %tmp.139 = seteq int %p.2__, 12 ; [#uses=1] + %tmp.144 = seteq int %p.3__, 13 ; [#uses=1] + %tmp.149 = seteq int %p.4__, 14 ; [#uses=1] + %tmp.154 = seteq int %p.5__, 15 ; [#uses=1] + %bothcond = and bool %tmp.134, %tmp.130 ; [#uses=1] + %bothcond123 = and bool %bothcond, %tmp.139 ; + %bothcond124 = and bool %bothcond123, %tmp.144 ; + %bothcond125 = and bool %bothcond124, %tmp.149 ; + %bothcond126 = and bool %bothcond125, %tmp.154 ; + br bool %bothcond126, label %shortcirc_next.5, label %else.0 + +This is a particularly important case where handling CRs better will help. + ===-------------------------------------------------------------------------=== Simple IPO for argument passing, change: From lattner at cs.uiuc.edu Wed Feb 8 00:52:18 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 00:52:18 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/README.txt Message-ID: <200602080652.AAA25054@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: README.txt updated: 1.47 -> 1.48 --- Log message: Add some random notes, not high-prio --- Diffs of the changes: (+21 -0) README.txt | 21 +++++++++++++++++++++ 1 files changed, 21 insertions(+) Index: llvm/lib/Target/X86/README.txt diff -u llvm/lib/Target/X86/README.txt:1.47 llvm/lib/Target/X86/README.txt:1.48 --- llvm/lib/Target/X86/README.txt:1.47 Tue Feb 7 02:35:44 2006 +++ llvm/lib/Target/X86/README.txt Wed Feb 8 00:52:06 2006 @@ -161,6 +161,14 @@ Combine: a = sin(x), b = cos(x) into a,b = sincos(x). +Expand these to calls of sin/cos and stores: + double sincos(double x, double *sin, double *cos); + float sincosf(float x, float *sin, float *cos); + long double sincosl(long double x, long double *sin, long double *cos); + +Doing so could allow SROA of the destination pointers. See also: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687 + //===---------------------------------------------------------------------===// The instruction selector sometimes misses folding a load into a compare. The @@ -386,3 +394,16 @@ The x86 backend currently supports dynamic-no-pic. Need to add asm printer support for static and PIC. + +//===---------------------------------------------------------------------===// + +We should generate bts/btr/etc instructions on targets where they are cheap or +when codesize is important. e.g., for: + +void setbit(int *target, int bit) { + *target |= (1 << bit); +} +void clearbit(int *target, int bit) { + *target &= ~(1 << bit); +} + From lattner at cs.uiuc.edu Wed Feb 8 00:56:52 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 00:56:52 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Message-ID: <200602080656.AAA25095@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCAsmPrinter.cpp updated: 1.147 -> 1.148 --- Log message: Emit the 'mr' pseudoop for easier reading. --- Diffs of the changes: (+9 -0) PPCAsmPrinter.cpp | 9 +++++++++ 1 files changed, 9 insertions(+) Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp diff -u llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.147 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.148 --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.147 Mon Feb 6 16:18:19 2006 +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Wed Feb 8 00:56:40 2006 @@ -422,6 +422,15 @@ O << ", " << (unsigned int)SH << "\n"; return; } + } else if (MI->getOpcode() == PPC::OR4 || MI->getOpcode() == PPC::OR8) { + if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + O << "mr "; + printOperand(MI, 0); + O << ", "; + printOperand(MI, 1); + O << "\n"; + return; + } } if (printInstruction(MI)) From lattner at cs.uiuc.edu Wed Feb 8 01:12:19 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 01:12:19 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/README.txt Message-ID: <200602080712.BAA25206@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: README.txt updated: 1.48 -> 1.49 --- Log message: more email -> README moving --- Diffs of the changes: (+4 -0) README.txt | 4 ++++ 1 files changed, 4 insertions(+) Index: llvm/lib/Target/X86/README.txt diff -u llvm/lib/Target/X86/README.txt:1.48 llvm/lib/Target/X86/README.txt:1.49 --- llvm/lib/Target/X86/README.txt:1.48 Wed Feb 8 00:52:06 2006 +++ llvm/lib/Target/X86/README.txt Wed Feb 8 01:12:07 2006 @@ -93,6 +93,10 @@ 2. Code duplication (addressing mode) during isel. 3. Other ideas from "Register-Sensitive Selection, Duplication, and Sequencing of Instructions". +4. Scheduling for reduced register pressure. E.g. "Minimum Register + Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs" + and other related papers. + http://citeseer.ist.psu.edu/govindarajan01minimum.html //===---------------------------------------------------------------------===// From evan.cheng at apple.com Wed Feb 8 01:28:34 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 01:28:34 -0600 Subject: [llvm-commits] CVS: llvm/utils/NightlyTest.pl Message-ID: <200602080728.BAA25378@zion.cs.uiuc.edu> Changes in directory llvm/utils: NightlyTest.pl updated: 1.102 -> 1.103 --- Log message: Remove -pedantic. It no longer works. --- Diffs of the changes: (+0 -5) NightlyTest.pl | 5 ----- 1 files changed, 5 deletions(-) Index: llvm/utils/NightlyTest.pl diff -u llvm/utils/NightlyTest.pl:1.102 llvm/utils/NightlyTest.pl:1.103 --- llvm/utils/NightlyTest.pl:1.102 Tue Jan 31 10:10:53 2006 +++ llvm/utils/NightlyTest.pl Wed Feb 8 01:28:22 2006 @@ -22,7 +22,6 @@ # -nodejagnu Do not run feature or regression tests # -parallel Run two parallel jobs with GNU Make. # -release Build an LLVM Release version -# -pedantic Enable additional GCC warnings to detect possible errors. # -enable-llcbeta Enable testing of beta features in llc. # -disable-llc Disable LLC tests in the nightly tester. # -disable-jit Disable JIT tests in the nightly tester. @@ -280,10 +279,6 @@ if (/^-norunningtests$/) { $NORUNNINGTESTS = 1; next; } if (/^-parallel$/) { $MAKEOPTS = "$MAKEOPTS -j2 -l3.0"; next; } if (/^-release$/) { $MAKEOPTS = "$MAKEOPTS ENABLE_OPTIMIZED=1"; next; } - if (/^-pedantic$/) { - $MAKEOPTS = "$MAKEOPTS CompileOptimizeOpts='-O3 -DNDEBUG -finline-functions -Wpointer-arith -Wcast-align -Wno-deprecated -Wold-style-cast -Wabi -Woverloaded-virtual -ffor-scope'"; - next; - } if (/^-enable-llcbeta$/) { $PROGTESTOPTS .= " ENABLE_LLCBETA=1"; next; } if (/^-disable-llc$/) { $PROGTESTOPTS .= " DISABLE_LLC=1"; $CONFIGUREARGS .= " --disable-llc_diffs"; next; } From lattner at cs.uiuc.edu Wed Feb 8 01:35:02 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 01:35:02 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602080735.BAA25484@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.425 -> 1.426 --- Log message: Simplify some code, reducing calls to MaskedValueIsZero. Implement a minor optimization where we reduce the number of bits in AND masks when possible. --- Diffs of the changes: (+33 -17) InstructionCombining.cpp | 50 +++++++++++++++++++++++++++++++---------------- 1 files changed, 33 insertions(+), 17 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.425 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.426 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.425 Tue Feb 7 21:25:32 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Wed Feb 8 01:34:50 2006 @@ -1930,28 +1930,44 @@ return &I; if (ConstantIntegral *AndRHS = dyn_cast(Op1)) { - // and X, -1 == X - if (AndRHS->isAllOnesValue()) + uint64_t AndRHSMask = AndRHS->getZExtValue(); + uint64_t TypeMask = Op0->getType()->getIntegralTypeMask(); + + if (AndRHSMask == TypeMask) // and X, -1 == X return ReplaceInstUsesWith(I, Op0); + else if (AndRHSMask == 0) // and X, 0 == 0 + return ReplaceInstUsesWith(I, AndRHS); // and (and X, c1), c2 -> and (x, c1&c2). Handle this case here, before - // calling MaskedValueIsZero, to avoid inefficient cases where we traipse - // through many levels of ands. + // calling ComputeMaskedNonZeroBits, to avoid inefficient cases where we + // traipse through many levels of ands. { Value *X = 0; ConstantInt *C1 = 0; if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1)))) return BinaryOperator::createAnd(X, ConstantExpr::getAnd(C1, AndRHS)); } - if (MaskedValueIsZero(Op0, AndRHS->getZExtValue())) // LHS & RHS == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // If the mask is not masking out any bits, there is no reason to do the - // and in the first place. - uint64_t NotAndRHS = // ~ANDRHS - AndRHS->getZExtValue()^Op0->getType()->getIntegralTypeMask(); - if (MaskedValueIsZero(Op0, NotAndRHS)) + // Figure out which of the input bits are not known to be zero, and which + // bits are known to be zero. + uint64_t NonZeroBits = ComputeMaskedNonZeroBits(Op0, TypeMask); + uint64_t ZeroBits = NonZeroBits^TypeMask; + + // If the mask is not masking out any bits (i.e. all of the zeros in the + // mask are already known to be zero), there is no reason to do the and in + // the first place. + uint64_t NotAndRHS = AndRHSMask^TypeMask; + if ((NotAndRHS & ZeroBits) == NotAndRHS) return ReplaceInstUsesWith(I, Op0); + + // If the AND mask contains bits that are known zero, remove them. A + // special case is when there are no bits in common, in which case we + // implicitly turn this into an AND X, 0, which is later simplified into 0. + if ((AndRHSMask & NonZeroBits) != AndRHSMask) { + Constant *NewRHS = + ConstantUInt::get(Type::ULongTy, AndRHSMask & NonZeroBits); + I.setOperand(1, ConstantExpr::getCast(NewRHS, I.getType())); + return &I; + } // Optimize a variety of ((val OP C1) & C2) combinations... if (isa(Op0) || isa(Op0)) { @@ -1963,9 +1979,9 @@ case Instruction::Or: // (X ^ V) & C2 --> (X & C2) iff (V & C2) == 0 // (X | V) & C2 --> (X & C2) iff (V & C2) == 0 - if (MaskedValueIsZero(Op0LHS, AndRHS->getZExtValue())) + if (MaskedValueIsZero(Op0LHS, AndRHSMask)) return BinaryOperator::createAnd(Op0RHS, AndRHS); - if (MaskedValueIsZero(Op0RHS, AndRHS->getZExtValue())) + if (MaskedValueIsZero(Op0RHS, AndRHSMask)) return BinaryOperator::createAnd(Op0LHS, AndRHS); // If the mask is only needed on one incoming arm, push it up. @@ -1992,8 +2008,8 @@ break; case Instruction::And: // (X & V) & C2 --> 0 iff (V & C2) == 0 - if (MaskedValueIsZero(Op0LHS, AndRHS->getZExtValue()) || - MaskedValueIsZero(Op0RHS, AndRHS->getZExtValue())) + if (MaskedValueIsZero(Op0LHS, AndRHSMask) || + MaskedValueIsZero(Op0RHS, AndRHSMask)) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); break; case Instruction::Add: @@ -2028,7 +2044,7 @@ if (SrcTy->getPrimitiveSizeInBits() >= I.getType()->getPrimitiveSizeInBits() && CastOp->getNumOperands() == 2) - if (ConstantInt *AndCI =dyn_cast(CastOp->getOperand(1))) + if (ConstantInt *AndCI = dyn_cast(CastOp->getOperand(1))) if (CastOp->getOpcode() == Instruction::And) { // Change: and (cast (and X, C1) to T), C2 // into : and (cast X to T), trunc(C1)&C2 From evan.cheng at apple.com Wed Feb 8 03:08:24 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 03:08:24 -0600 Subject: [llvm-commits] CVS: llvm/utils/NightlyTest.pl Message-ID: <200602080908.DAA23270@zion.cs.uiuc.edu> Changes in directory llvm/utils: NightlyTest.pl updated: 1.103 -> 1.104 --- Log message: Added options -cflag, -cxxflags, and -ldflags to override the default C compilation, C++ compilation, and linker options. e.g. This is the options I use for testing on my x86 iMac: nice ./NightlyTest.pl -release -cflags "-Os -DNDEBUG -fomit-frame-pointer" -cxxflags "-Os -DNDEBUG -finline-functions -felide-constructors -fomit-frame-pointer" --- Diffs of the changes: (+15 -0) NightlyTest.pl | 15 +++++++++++++++ 1 files changed, 15 insertions(+) Index: llvm/utils/NightlyTest.pl diff -u llvm/utils/NightlyTest.pl:1.103 llvm/utils/NightlyTest.pl:1.104 --- llvm/utils/NightlyTest.pl:1.103 Wed Feb 8 01:28:22 2006 +++ llvm/utils/NightlyTest.pl Wed Feb 8 03:08:06 2006 @@ -36,6 +36,12 @@ # -cvstag Check out a specific CVS tag to build LLVM (useful for # testing release branches) # -target Specify the target triplet +# -cflags Next argument specifies that C compilation options that +# override the default. +# -cxxflags Next argument specifies that C++ compilation options that +# override the default. +# -ldflags Next argument specifies that linker options that override +# the default. # # ---------------- Options to configure llvm-test ---------------------------- # -spec2000path Path to the benchspec directory in the SPEC 2000 distro @@ -299,6 +305,15 @@ if (/^-target/) { $CONFIGUREARGS .= " --target=$ARGV[0]"; shift; next; } + if (/^-cflags/) { + $MAKEOPTS = "$MAKEOPTS C.Flags=\'$ARGV[0]\'"; shift; next; + } + if (/^-cxxflags/) { + $MAKEOPTS = "$MAKEOPTS CXX.Flags=\'$ARGV[0]\'"; shift; next; + } + if (/^-ldflags/) { + $MAKEOPTS = "$MAKEOPTS LD.Flags=\'$ARGV[0]\'"; shift; next; + } if (/^-noexternals$/) { $NOEXTERNALS = 1; next; } if (/^-nodejagnu$/) { $NODEJAGNU = 1; next; } if (/^-spec2000path$/) { From alkis at cs.uiuc.edu Wed Feb 8 04:43:00 2006 From: alkis at cs.uiuc.edu (Alkis Evlogimenos) Date: Wed, 8 Feb 2006 04:43:00 -0600 Subject: [llvm-commits] CVS: llvm-java/lib/Compiler/VMClass.cpp Resolver.cpp Compiler.cpp Message-ID: <200602081043.EAA05457@zion.cs.uiuc.edu> Changes in directory llvm-java/lib/Compiler: VMClass.cpp updated: 1.47 -> 1.48 Resolver.cpp updated: 1.22 -> 1.23 Compiler.cpp updated: 1.283 -> 1.284 --- Log message: Make this compile again after the removal of the iostream include from llvm/Support/Debug.h --- Diffs of the changes: (+4 -1) Compiler.cpp | 3 ++- Resolver.cpp | 1 + VMClass.cpp | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) Index: llvm-java/lib/Compiler/VMClass.cpp diff -u llvm-java/lib/Compiler/VMClass.cpp:1.47 llvm-java/lib/Compiler/VMClass.cpp:1.48 --- llvm-java/lib/Compiler/VMClass.cpp:1.47 Tue May 10 21:28:59 2005 +++ llvm-java/lib/Compiler/VMClass.cpp Wed Feb 8 04:42:42 2006 @@ -23,6 +23,7 @@ #include #include #include +#include using namespace llvm; using namespace llvm::Java; Index: llvm-java/lib/Compiler/Resolver.cpp diff -u llvm-java/lib/Compiler/Resolver.cpp:1.22 llvm-java/lib/Compiler/Resolver.cpp:1.23 --- llvm-java/lib/Compiler/Resolver.cpp:1.22 Fri Apr 22 19:12:14 2005 +++ llvm-java/lib/Compiler/Resolver.cpp Wed Feb 8 04:42:43 2006 @@ -18,6 +18,7 @@ #include #include #include +#include using namespace llvm; using namespace llvm::Java; Index: llvm-java/lib/Compiler/Compiler.cpp diff -u llvm-java/lib/Compiler/Compiler.cpp:1.283 llvm-java/lib/Compiler/Compiler.cpp:1.284 --- llvm-java/lib/Compiler/Compiler.cpp:1.283 Sat May 7 11:37:37 2005 +++ llvm-java/lib/Compiler/Compiler.cpp Wed Feb 8 04:42:43 2006 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -793,7 +794,7 @@ r = new SelectInst(c, MINUS_ONE, r, TMP, currentBB_); c = new CallInst(module_->getOrInsertFunction ("llvm.isunordered", - Type::BoolTy, v1->getType(), v2->getType(), 0), + Type::BoolTy, v1->getType(), v2->getType(), NULL), v1, v2, TMP, currentBB_); r = new SelectInst(c, pushOne ? ONE : MINUS_ONE, r, TMP, currentBB_); push(r); From alkis at cs.uiuc.edu Wed Feb 8 04:49:47 2006 From: alkis at cs.uiuc.edu (Alkis Evlogimenos) Date: Wed, 8 Feb 2006 04:49:47 -0600 Subject: [llvm-commits] CVS: llvm-java/lib/Transforms/AddStubs.cpp Message-ID: <200602081049.EAA06652@zion.cs.uiuc.edu> Changes in directory llvm-java/lib/Transforms: AddStubs.cpp updated: 1.6 -> 1.7 --- Log message: Make this compile again after the removal of the iostream include from llvm/Support/Debug.h --- Diffs of the changes: (+1 -0) AddStubs.cpp | 1 + 1 files changed, 1 insertion(+) Index: llvm-java/lib/Transforms/AddStubs.cpp diff -u llvm-java/lib/Transforms/AddStubs.cpp:1.6 llvm-java/lib/Transforms/AddStubs.cpp:1.7 --- llvm-java/lib/Transforms/AddStubs.cpp:1.6 Mon Mar 21 13:48:59 2005 +++ llvm-java/lib/Transforms/AddStubs.cpp Wed Feb 8 04:49:29 2006 @@ -22,6 +22,7 @@ #include #include #include +#include using namespace llvm; From lattner at cs.uiuc.edu Wed Feb 8 11:00:12 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 11:00:12 -0600 Subject: [llvm-commits] CVS: llvm/docs/Lexicon.html Message-ID: <200602081700.LAA05798@zion.cs.uiuc.edu> Changes in directory llvm/docs: Lexicon.html updated: 1.12 -> 1.13 --- Log message: Add SRoA to the lexicon. Patch by Marco Matthies! --- Diffs of the changes: (+4 -1) Lexicon.html | 5 ++++- 1 files changed, 4 insertions(+), 1 deletion(-) Index: llvm/docs/Lexicon.html diff -u llvm/docs/Lexicon.html:1.12 llvm/docs/Lexicon.html:1.13 --- llvm/docs/Lexicon.html:1.12 Mon Nov 28 19:34:50 2005 +++ llvm/docs/Lexicon.html Wed Feb 8 10:59:49 2006 @@ -55,6 +55,7 @@ SCC SCCP + SRoA SSA @@ -155,6 +156,8 @@
Strongly Connected Component
SCCP
Sparse Conditional Constant Propagation
+
SRoA
+
Scalar Replacement of Aggregates
SSA
Static Single Assignment
@@ -167,7 +170,7 @@ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!">The LLVM Team
The LLVM Compiler Infrastructure
-Last modified: $Date: 2005/11/29 01:34:50 $ +Last modified: $Date: 2006/02/08 16:59:49 $ From lattner at cs.uiuc.edu Wed Feb 8 11:01:49 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 11:01:49 -0600 Subject: [llvm-commits] CVS: llvm/docs/HowToSubmitABug.html Message-ID: <200602081701.LAA05864@zion.cs.uiuc.edu> Changes in directory llvm/docs: HowToSubmitABug.html updated: 1.24 -> 1.25 --- Log message: Mention that delta can be used to reduce some Front-end problems. Patch by Marco Matthies, thanks! --- Diffs of the changes: (+5 -2) HowToSubmitABug.html | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) Index: llvm/docs/HowToSubmitABug.html diff -u llvm/docs/HowToSubmitABug.html:1.24 llvm/docs/HowToSubmitABug.html:1.25 --- llvm/docs/HowToSubmitABug.html:1.24 Mon Nov 1 02:24:13 2004 +++ llvm/docs/HowToSubmitABug.html Wed Feb 8 11:01:37 2006 @@ -128,7 +128,10 @@ behind a foo.i file (containing preprocessed C source code) and possibly foo.s (containing LLVM assembly code), for each compiled foo.c file. Send us the foo.i file, -along with a brief description of the error it caused.

+along with a brief description of the error it caused. A tool that might help +you reduce a front-end testcase to a more manageable size is +delta. +

@@ -349,7 +352,7 @@ Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2004/11/01 08:24:13 $ + Last modified: $Date: 2006/02/08 17:01:37 $ From lattner at cs.uiuc.edu Wed Feb 8 11:47:34 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 11:47:34 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/README.txt Message-ID: <200602081747.LAA06246@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: README.txt updated: 1.49 -> 1.50 --- Log message: add a simple optimization --- Diffs of the changes: (+16 -0) README.txt | 16 ++++++++++++++++ 1 files changed, 16 insertions(+) Index: llvm/lib/Target/X86/README.txt diff -u llvm/lib/Target/X86/README.txt:1.49 llvm/lib/Target/X86/README.txt:1.50 --- llvm/lib/Target/X86/README.txt:1.49 Wed Feb 8 01:12:07 2006 +++ llvm/lib/Target/X86/README.txt Wed Feb 8 11:47:22 2006 @@ -411,3 +411,19 @@ *target &= ~(1 << bit); } +//===---------------------------------------------------------------------===// + +Easy: Global addresses are not always allowed as immediates. For this: + +int dst = 0; int *ptr = 0; +void foo() { ptr = &dst; } + +we get this: + +_foo: + movl $_dst, %eax + movl %eax, _ptr + ret + +When: "movl $_dst, _ptr" is sufficient. + From jlaskey at apple.com Wed Feb 8 12:17:18 2006 From: jlaskey at apple.com (Jim Laskey) Date: Wed, 8 Feb 2006 12:17:18 -0600 Subject: [llvm-commits] CVS: llvm/test/Regression/CodeGen/Generic/DebugStuff.ll Message-ID: <200602081817.MAA06425@zion.cs.uiuc.edu> Changes in directory llvm/test/Regression/CodeGen/Generic: DebugStuff.ll updated: 1.1 -> 1.2 --- Log message: Disable this test for the time being as debug is brought up to speed. --- Diffs of the changes: (+1 -0) DebugStuff.ll | 1 + 1 files changed, 1 insertion(+) Index: llvm/test/Regression/CodeGen/Generic/DebugStuff.ll diff -u llvm/test/Regression/CodeGen/Generic/DebugStuff.ll:1.1 llvm/test/Regression/CodeGen/Generic/DebugStuff.ll:1.2 --- llvm/test/Regression/CodeGen/Generic/DebugStuff.ll:1.1 Tue Dec 20 23:09:57 2005 +++ llvm/test/Regression/CodeGen/Generic/DebugStuff.ll Wed Feb 8 12:17:06 2006 @@ -1,5 +1,6 @@ ; Verify debugger intrinsics are ignored or accepted. ; RUN: llvm-as < %s | llc +; XFAIL: * ;; Debugger type declarations %lldb.compile_unit = type { uint, ushort, ushort, sbyte*, sbyte*, sbyte*, {}* } From lattner at cs.uiuc.edu Wed Feb 8 13:33:38 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 13:33:38 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCBranchSelector.cpp Message-ID: <200602081933.NAA06941@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCBranchSelector.cpp updated: 1.20 -> 1.21 --- Log message: Rename BSel -> PPCBSel for the benefit of doxygen users. Move the methods out of line. Remove unused Debug.h stuff. Teach getNumBytesForInstruction to know the size of an inline asm. --- Diffs of the changes: (+115 -105) PPCBranchSelector.cpp | 220 ++++++++++++++++++++++++++------------------------ 1 files changed, 115 insertions(+), 105 deletions(-) Index: llvm/lib/Target/PowerPC/PPCBranchSelector.cpp diff -u llvm/lib/Target/PowerPC/PPCBranchSelector.cpp:1.20 llvm/lib/Target/PowerPC/PPCBranchSelector.cpp:1.21 --- llvm/lib/Target/PowerPC/PPCBranchSelector.cpp:1.20 Sun Oct 16 00:39:50 2005 +++ llvm/lib/Target/PowerPC/PPCBranchSelector.cpp Wed Feb 8 13:33:26 2006 @@ -15,121 +15,19 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "bsel" #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/Debug.h" #include using namespace llvm; namespace { - struct BSel : public MachineFunctionPass { + struct PPCBSel : public MachineFunctionPass { // OffsetMap - Mapping between BB and byte offset from start of function std::map OffsetMap; - /// bytesForOpcode - A convenience function for totalling up the number of - /// bytes in a basic block. - /// - static unsigned bytesForOpcode(unsigned opcode) { - switch (opcode) { - case PPC::COND_BRANCH: - // while this will be 4 most of the time, if we emit 12 it is just a - // minor pessimization that saves us from having to worry about - // keeping the offsets up to date later when we emit long branch glue. - return 12; - case PPC::IMPLICIT_DEF_GPR: // no asm emitted - case PPC::IMPLICIT_DEF_F4: // no asm emitted - case PPC::IMPLICIT_DEF_F8: // no asm emitted - return 0; - default: - break; - } - return 4; // PowerPC instructions are all 4 bytes - } - - virtual bool runOnMachineFunction(MachineFunction &Fn) { - // Running total of instructions encountered since beginning of function - unsigned ByteCount = 0; - - // For each MBB, add its offset to the offset map, and count up its - // instructions - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock *MBB = MFI; - OffsetMap[MBB] = ByteCount; - - for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); - MBBI != EE; ++MBBI) - ByteCount += bytesForOpcode(MBBI->getOpcode()); - } - - // We're about to run over the MBB's again, so reset the ByteCount - ByteCount = 0; - - // For each MBB, find the conditional branch pseudo instructions, and - // calculate the difference between the target MBB and the current ICount - // to decide whether or not to emit a short or long branch. - // - // short branch: - // bCC .L_TARGET_MBB - // - // long branch: - // bInverseCC $PC+8 - // b .L_TARGET_MBB - // b .L_FALLTHROUGH_MBB - - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock *MBB = MFI; - - for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); - MBBI != EE; ++MBBI) { - // We may end up deleting the MachineInstr that MBBI points to, so - // remember its opcode now so we can refer to it after calling erase() - unsigned OpcodeToReplace = MBBI->getOpcode(); - - if (OpcodeToReplace == PPC::COND_BRANCH) { - MachineBasicBlock::iterator MBBJ = MBBI; - ++MBBJ; - - // condbranch operands: - // 0. CR0 register - // 1. bc opcode - // 2. target MBB - // 3. fallthrough MBB - MachineBasicBlock *trueMBB = - MBBI->getOperand(2).getMachineBasicBlock(); - MachineBasicBlock *falseMBB = - MBBI->getOperand(3).getMachineBasicBlock(); - - int Displacement = OffsetMap[trueMBB] - ByteCount; - unsigned Opcode = MBBI->getOperand(1).getImmedValue(); - unsigned CRReg = MBBI->getOperand(0).getReg(); - unsigned Inverted = PPCInstrInfo::invertPPCBranchOpcode(Opcode); - - if (Displacement >= -32768 && Displacement <= 32767) { - BuildMI(*MBB, MBBJ, Opcode, 2).addReg(CRReg).addMBB(trueMBB); - } else { - BuildMI(*MBB, MBBJ, Inverted, 2).addReg(CRReg).addSImm(8); - BuildMI(*MBB, MBBJ, PPC::B, 1).addMBB(trueMBB); - BuildMI(*MBB, MBBJ, PPC::B, 1).addMBB(falseMBB); - } - - // Erase the psuedo COND_BRANCH instruction, and then back up the - // iterator so that when the for loop increments it, we end up in - // the correct place rather than iterating off the end. - MBB->erase(MBBI); - MBBI = --MBBJ; - } - ByteCount += bytesForOpcode(OpcodeToReplace); - } - } - - OffsetMap.clear(); - return true; - } + virtual bool runOnMachineFunction(MachineFunction &Fn); virtual const char *getPassName() const { return "PowerPC Branch Selection"; @@ -141,5 +39,117 @@ /// Pass /// FunctionPass *llvm::createPPCBranchSelectionPass() { - return new BSel(); + return new PPCBSel(); } + +/// getNumBytesForInstruction - Return the number of bytes of code the specified +/// instruction may be. This returns the maximum number of bytes. +/// +static unsigned getNumBytesForInstruction(MachineInstr *MI) { + switch (MI->getOpcode()) { + case PPC::COND_BRANCH: + // while this will be 4 most of the time, if we emit 12 it is just a + // minor pessimization that saves us from having to worry about + // keeping the offsets up to date later when we emit long branch glue. + return 12; + case PPC::IMPLICIT_DEF_GPR: // no asm emitted + case PPC::IMPLICIT_DEF_F4: // no asm emitted + case PPC::IMPLICIT_DEF_F8: // no asm emitted + return 0; + case PPC::INLINEASM: // Inline Asm: Variable size. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isExternalSymbol()) { + const char *AsmStr = MI->getOperand(i).getSymbolName(); + // Count the number of newline's in the asm string. + unsigned NumInstrs = 0; + for (; *AsmStr; ++AsmStr) + NumInstrs += *AsmStr == '\n'; + return NumInstrs*4; + } + assert(0 && "INLINEASM didn't have format string??"); + default: + return 4; // PowerPC instructions are all 4 bytes + } +} + + +bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { + // Running total of instructions encountered since beginning of function + unsigned ByteCount = 0; + + // For each MBB, add its offset to the offset map, and count up its + // instructions + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock *MBB = MFI; + OffsetMap[MBB] = ByteCount; + + for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); + MBBI != EE; ++MBBI) + ByteCount += getNumBytesForInstruction(MBBI); + } + + // We're about to run over the MBB's again, so reset the ByteCount + ByteCount = 0; + + // For each MBB, find the conditional branch pseudo instructions, and + // calculate the difference between the target MBB and the current ICount + // to decide whether or not to emit a short or long branch. + // + // short branch: + // bCC .L_TARGET_MBB + // + // long branch: + // bInverseCC $PC+8 + // b .L_TARGET_MBB + // b .L_FALLTHROUGH_MBB + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock *MBB = MFI; + + for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); + MBBI != EE; ++MBBI) { + // We may end up deleting the MachineInstr that MBBI points to, so + // remember its opcode now so we can refer to it after calling erase() + unsigned ByteSize = getNumBytesForInstruction(MBBI); + if (MBBI->getOpcode() == PPC::COND_BRANCH) { + MachineBasicBlock::iterator MBBJ = MBBI; + ++MBBJ; + + // condbranch operands: + // 0. CR0 register + // 1. bc opcode + // 2. target MBB + // 3. fallthrough MBB + MachineBasicBlock *trueMBB = + MBBI->getOperand(2).getMachineBasicBlock(); + MachineBasicBlock *falseMBB = + MBBI->getOperand(3).getMachineBasicBlock(); + + int Displacement = OffsetMap[trueMBB] - ByteCount; + unsigned Opcode = MBBI->getOperand(1).getImmedValue(); + unsigned CRReg = MBBI->getOperand(0).getReg(); + unsigned Inverted = PPCInstrInfo::invertPPCBranchOpcode(Opcode); + + if (Displacement >= -32768 && Displacement <= 32767) { + BuildMI(*MBB, MBBJ, Opcode, 2).addReg(CRReg).addMBB(trueMBB); + } else { + BuildMI(*MBB, MBBJ, Inverted, 2).addReg(CRReg).addSImm(8); + BuildMI(*MBB, MBBJ, PPC::B, 1).addMBB(trueMBB); + BuildMI(*MBB, MBBJ, PPC::B, 1).addMBB(falseMBB); + } + + // Erase the psuedo COND_BRANCH instruction, and then back up the + // iterator so that when the for loop increments it, we end up in + // the correct place rather than iterating off the end. + MBB->erase(MBBI); + MBBI = --MBBJ; + } + ByteCount += ByteSize; + } + } + + OffsetMap.clear(); + return true; +} + From lattner at cs.uiuc.edu Wed Feb 8 17:41:46 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 17:41:46 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/AsmPrinter.h Message-ID: <200602082341.RAA08747@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: AsmPrinter.h updated: 1.29 -> 1.30 --- Log message: Add support for assembler directives that wrap inline asm --- Diffs of the changes: (+5 -0) AsmPrinter.h | 5 +++++ 1 files changed, 5 insertions(+) Index: llvm/include/llvm/CodeGen/AsmPrinter.h diff -u llvm/include/llvm/CodeGen/AsmPrinter.h:1.29 llvm/include/llvm/CodeGen/AsmPrinter.h:1.30 --- llvm/include/llvm/CodeGen/AsmPrinter.h:1.29 Mon Feb 6 16:16:41 2006 +++ llvm/include/llvm/CodeGen/AsmPrinter.h Wed Feb 8 17:41:34 2006 @@ -86,6 +86,11 @@ const char *FunctionAddrPrefix; // Defaults to "" const char *FunctionAddrSuffix; // Defaults to "" + /// InlineAsmStart/End - If these are nonempty, they contain a directive to + /// emit before and after an inline assmebly statement. + const char *InlineAsmStart; // Defaults to "#APP\n" + const char *InlineAsmEnd; // Defaults to "#NO_APP\n" + //===--- Data Emission Directives -------------------------------------===// /// ZeroDirective - this should be set to the directive used to get some From lattner at cs.uiuc.edu Wed Feb 8 17:42:08 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 17:42:08 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/AsmPrinter.cpp Message-ID: <200602082342.RAA08809@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: AsmPrinter.cpp updated: 1.48 -> 1.49 --- Log message: Add support for assembler directives that wrap inline asm --- Diffs of the changes: (+4 -1) AsmPrinter.cpp | 5 ++++- 1 files changed, 4 insertions(+), 1 deletion(-) Index: llvm/lib/CodeGen/AsmPrinter.cpp diff -u llvm/lib/CodeGen/AsmPrinter.cpp:1.48 llvm/lib/CodeGen/AsmPrinter.cpp:1.49 --- llvm/lib/CodeGen/AsmPrinter.cpp:1.48 Mon Feb 6 16:17:23 2006 +++ llvm/lib/CodeGen/AsmPrinter.cpp Wed Feb 8 17:41:56 2006 @@ -32,6 +32,8 @@ GlobalVarAddrSuffix(""), FunctionAddrPrefix(""), FunctionAddrSuffix(""), + InlineAsmStart("#APP\n"), + InlineAsmEnd("#NO_APP\n"), ZeroDirective("\t.zero\t"), AsciiDirective("\t.ascii\t"), AscizDirective("\t.asciz\t"), @@ -482,6 +484,7 @@ /// printInlineAsm - This method formats and prints the specified machine /// instruction that is an inline asm. void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { + O << InlineAsmStart; unsigned NumOperands = MI->getNumOperands(); // Count the number of register definitions. @@ -613,7 +616,7 @@ break; } } - O << "\n"; + O << "\n" << InlineAsmEnd; } /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM From lattner at cs.uiuc.edu Wed Feb 8 17:42:36 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 17:42:36 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86AsmPrinter.cpp Message-ID: <200602082342.RAA08849@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86AsmPrinter.cpp updated: 1.163 -> 1.164 --- Log message: Darwin doesn't support #APP/#NO_APP --- Diffs of the changes: (+1 -0) X86AsmPrinter.cpp | 1 + 1 files changed, 1 insertion(+) Index: llvm/lib/Target/X86/X86AsmPrinter.cpp diff -u llvm/lib/Target/X86/X86AsmPrinter.cpp:1.163 llvm/lib/Target/X86/X86AsmPrinter.cpp:1.164 --- llvm/lib/Target/X86/X86AsmPrinter.cpp:1.163 Tue Feb 7 17:32:58 2006 +++ llvm/lib/Target/X86/X86AsmPrinter.cpp Wed Feb 8 17:42:22 2006 @@ -60,6 +60,7 @@ forDarwin = true; StaticCtorsSection = ".mod_init_func"; StaticDtorsSection = ".mod_term_func"; + InlineAsmStart = InlineAsmEnd = ""; // Don't use #APP/#NO_APP break; case X86Subtarget::isCygwin: GlobalPrefix = "_"; From lattner at cs.uiuc.edu Wed Feb 8 17:42:36 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 17:42:36 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Message-ID: <200602082342.RAA08851@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCAsmPrinter.cpp updated: 1.148 -> 1.149 --- Log message: Darwin doesn't support #APP/#NO_APP --- Diffs of the changes: (+2 -2) PPCAsmPrinter.cpp | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp diff -u llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.148 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.149 --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp:1.148 Wed Feb 8 00:56:40 2006 +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp Wed Feb 8 17:42:22 2006 @@ -246,8 +246,7 @@ DarwinDwarfWriter DW; DarwinAsmPrinter(std::ostream &O, TargetMachine &TM) - : PPCAsmPrinter(O, TM), DW(O, this) - { + : PPCAsmPrinter(O, TM), DW(O, this) { CommentString = ";"; GlobalPrefix = "_"; PrivateGlobalPrefix = "L"; // Marker for constant pool idxs @@ -258,6 +257,7 @@ LCOMMDirective = "\t.lcomm\t"; StaticCtorsSection = ".mod_init_func"; StaticDtorsSection = ".mod_term_func"; + InlineAsmStart = InlineAsmEnd = ""; // Don't use #APP/#NO_APP } virtual const char *getPassName() const { From evan.cheng at apple.com Wed Feb 8 18:38:13 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 18:38:13 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Message-ID: <200602090038.SAA09212@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCISelDAGToDAG.cpp updated: 1.155 -> 1.156 --- Log message: Change Select() from SDOperand Select(SDOperand N); to void Select(SDOperand &Result, SDOperand N); --- Diffs of the changes: (+133 -80) PPCISelDAGToDAG.cpp | 213 ++++++++++++++++++++++++++++++++-------------------- 1 files changed, 133 insertions(+), 80 deletions(-) Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp diff -u llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.155 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.156 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.155 Sun Feb 5 02:45:01 2006 +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Wed Feb 8 18:37:58 2006 @@ -62,7 +62,7 @@ // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDOperand Select(SDOperand Op); + void Select(SDOperand &Result, SDOperand Op); SDNode *SelectBitfieldInsert(SDNode *N); @@ -149,7 +149,8 @@ } // Finally, legalize this node. - Select(Node); + SDOperand Dummy; + Select(Dummy, Node); } // Select target instructions for the DAG. @@ -365,15 +366,16 @@ // where both bitfield halves are sourced from the same value. if (IsRotate && fullMask && N->getOperand(0).getOperand(0) == N->getOperand(1).getOperand(0)) { - Op0 = CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, - Select(N->getOperand(0).getOperand(0)), + SDOperand Tmp; + Select(Tmp, N->getOperand(0).getOperand(0)); + Op0 = CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, Tmp, getI32Imm(SH), getI32Imm(0), getI32Imm(31)); return Op0.Val; } - SDOperand Tmp1 = (Op0IsAND && fullMask) ? Select(Op0.getOperand(0)) - : Select(Op0); - SDOperand Tmp2 = IsAndWithShiftOp ? Select(Op1.getOperand(0).getOperand(0)) - : Select(Op1.getOperand(0)); + SDOperand Tmp1, Tmp2; + Select(Tmp1, ((Op0IsAND && fullMask) ? Op0.getOperand(0) : Op0)); + Select(Tmp2, (IsAndWithShiftOp ? Op1.getOperand(0).getOperand(0) + : Op1.getOperand(0))); Op0 = CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Tmp1, Tmp2, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME)); return Op0.Val; @@ -457,7 +459,7 @@ SDOperand PPCDAGToDAGISel::SelectCC(SDOperand LHS, SDOperand RHS, ISD::CondCode CC) { // Always select the LHS. - LHS = Select(LHS); + Select(LHS, LHS); // Use U to determine whether the SETCC immediate range is signed or not. if (MVT::isInteger(LHS.getValueType())) { @@ -467,12 +469,15 @@ ((U && isUInt16(Imm)) || (!U && isInt16(Imm)))) return CurDAG->getTargetNode(U ? PPC::CMPLWI : PPC::CMPWI, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF)); + Select(RHS, RHS); return CurDAG->getTargetNode(U ? PPC::CMPLW : PPC::CMPW, MVT::i32, - LHS, Select(RHS)); + LHS, RHS); } else if (LHS.getValueType() == MVT::f32) { - return CurDAG->getTargetNode(PPC::FCMPUS, MVT::i32, LHS, Select(RHS)); + Select(RHS, RHS); + return CurDAG->getTargetNode(PPC::FCMPUS, MVT::i32, LHS, RHS); } else { - return CurDAG->getTargetNode(PPC::FCMPUD, MVT::i32, LHS, Select(RHS)); + Select(RHS, RHS); + return CurDAG->getTargetNode(PPC::FCMPUD, MVT::i32, LHS, RHS); } } @@ -535,8 +540,9 @@ SDOperand PPCDAGToDAGISel::SelectADD_PARTS(SDOperand Op) { SDNode *N = Op.Val; - SDOperand LHSL = Select(N->getOperand(0)); - SDOperand LHSH = Select(N->getOperand(1)); + SDOperand LHSL, LHSH; + Select(LHSL, N->getOperand(0)); + Select(LHSH, N->getOperand(1)); unsigned Imm; bool ME = false, ZE = false; @@ -546,7 +552,7 @@ } std::vector Result; - SDOperand CarryFromLo; + SDOperand CarryFromLo, Tmp; if (isIntImmediate(N->getOperand(2), Imm) && ((signed)Imm >= -32768 || (signed)Imm < 32768)) { // Codegen the low 32 bits of the add. Interestingly, there is no @@ -554,8 +560,9 @@ CarryFromLo = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, LHSL, getI32Imm(Imm)); } else { + Select(Tmp, N->getOperand(2)); CarryFromLo = CurDAG->getTargetNode(PPC::ADDC, MVT::i32, MVT::Flag, - LHSL, Select(N->getOperand(2))); + LHSL, Tmp); } CarryFromLo = CarryFromLo.getValue(1); @@ -566,9 +573,11 @@ ResultHi = CurDAG->getTargetNode(PPC::ADDZE, MVT::i32, LHSH, CarryFromLo); else if (ME) ResultHi = CurDAG->getTargetNode(PPC::ADDME, MVT::i32, LHSH, CarryFromLo); - else + else { + Select(Tmp, N->getOperand(3)); ResultHi = CurDAG->getTargetNode(PPC::ADDE, MVT::i32, LHSH, - Select(N->getOperand(3)), CarryFromLo); + Tmp, CarryFromLo); + } Result.push_back(CarryFromLo.getValue(0)); Result.push_back(ResultHi); @@ -578,10 +587,11 @@ } SDOperand PPCDAGToDAGISel::SelectSUB_PARTS(SDOperand Op) { SDNode *N = Op.Val; - SDOperand LHSL = Select(N->getOperand(0)); - SDOperand LHSH = Select(N->getOperand(1)); - SDOperand RHSL = Select(N->getOperand(2)); - SDOperand RHSH = Select(N->getOperand(3)); + SDOperand LHSL, LHSH, RHSL, RHSH; + Select(LHSL, N->getOperand(0)); + Select(LHSH, N->getOperand(1)); + Select(RHSL, N->getOperand(2)); + Select(RHSH, N->getOperand(3)); std::vector Result; Result.push_back(CurDAG->getTargetNode(PPC::SUBFC, MVT::i32, MVT::Flag, @@ -602,7 +612,8 @@ // Check for those cases here. // setcc op, 0 if (Imm == 0) { - SDOperand Op = Select(N->getOperand(0)); + SDOperand Op; + Select(Op, N->getOperand(0)); switch (CC) { default: break; case ISD::SETEQ: @@ -626,7 +637,8 @@ } } } else if (Imm == ~0U) { // setcc op, -1 - SDOperand Op = Select(N->getOperand(0)); + SDOperand Op; + Select(Op, N->getOperand(0)); switch (CC) { default: break; case ISD::SETEQ: @@ -698,7 +710,8 @@ SDOperand PPCDAGToDAGISel::SelectCALL(SDOperand Op) { SDNode *N = Op.Val; - SDOperand Chain = Select(N->getOperand(0)); + SDOperand Chain; + Select(Chain, N->getOperand(0)); unsigned CallOpcode; std::vector CallOperands; @@ -718,7 +731,8 @@ CallOperands.push_back(getI32Imm((int)C->getValue() >> 2)); } else { // Copy the callee address into the CTR register. - SDOperand Callee = Select(N->getOperand(1)); + SDOperand Callee; + Select(Callee, N->getOperand(1)); Chain = CurDAG->getTargetNode(PPC::MTCTR, MVT::Other, Callee, Chain); // Copy the callee address into R12 on darwin. @@ -755,7 +769,8 @@ } if (N->getOperand(i).getOpcode() != ISD::UNDEF) { - SDOperand Val = Select(N->getOperand(i)); + SDOperand Val; + Select(Val, N->getOperand(i)); Chain = CurDAG->getCopyToReg(Chain, DestReg, Val, InFlag); InFlag = Chain.getValue(1); CallOperands.push_back(CurDAG->getRegister(DestReg, RegTy)); @@ -807,34 +822,52 @@ // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDOperand PPCDAGToDAGISel::Select(SDOperand Op) { +void PPCDAGToDAGISel::Select(SDOperand &Result, SDOperand Op) { SDNode *N = Op.Val; if (N->getOpcode() >= ISD::BUILTIN_OP_END && - N->getOpcode() < PPCISD::FIRST_NUMBER) - return Op; // Already selected. + N->getOpcode() < PPCISD::FIRST_NUMBER) { + Result = Op; + return; // Already selected. + } // If this has already been converted, use it. std::map::iterator CGMI = CodeGenMap.find(Op); - if (CGMI != CodeGenMap.end()) return CGMI->second; + if (CGMI != CodeGenMap.end()) { + Result = CGMI->second; + return; + } switch (N->getOpcode()) { default: break; - case ISD::ADD_PARTS: return SelectADD_PARTS(Op); - case ISD::SUB_PARTS: return SelectSUB_PARTS(Op); - case ISD::SETCC: return SelectSETCC(Op); - case PPCISD::CALL: return SelectCALL(Op); - case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); + case ISD::ADD_PARTS: + Result = SelectADD_PARTS(Op); + return; + case ISD::SUB_PARTS: + Result = SelectSUB_PARTS(Op); + return; + case ISD::SETCC: + Result = SelectSETCC(Op); + return; + case PPCISD::CALL: + Result = SelectCALL(Op); + return; + case PPCISD::GlobalBaseReg: + Result = getGlobalBaseReg(); + return; case ISD::FrameIndex: { int FI = cast(N)->getIndex(); - if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, PPC::ADDI, MVT::i32, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - getI32Imm(0)); - return CodeGenMap[Op] = + if (N->hasOneUse()) { + Result = CurDAG->SelectNodeTo(N, PPC::ADDI, MVT::i32, + CurDAG->getTargetFrameIndex(FI, MVT::i32), + getI32Imm(0)); + return; + } + Result = CodeGenMap[Op] = CurDAG->getTargetNode(PPC::ADDI, MVT::i32, CurDAG->getTargetFrameIndex(FI, MVT::i32), getI32Imm(0)); + return; } case ISD::SDIV: { // FIXME: since this depends on the setting of the carry flag from the srawi @@ -844,23 +877,24 @@ // sra/addze rather than having to handle sdiv ourselves. oh well. unsigned Imm; if (isIntImmediate(N->getOperand(1), Imm)) { + SDOperand N0; + Select(N0, N->getOperand(0)); if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { SDOperand Op = CurDAG->getTargetNode(PPC::SRAWI, MVT::i32, MVT::Flag, - Select(N->getOperand(0)), - getI32Imm(Log2_32(Imm))); - return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - Op.getValue(0), Op.getValue(1)); + N0, getI32Imm(Log2_32(Imm))); + Result = CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + Op.getValue(0), Op.getValue(1)); } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) { SDOperand Op = CurDAG->getTargetNode(PPC::SRAWI, MVT::i32, MVT::Flag, - Select(N->getOperand(0)), - getI32Imm(Log2_32(-Imm))); + N0, getI32Imm(Log2_32(-Imm))); SDOperand PT = CurDAG->getTargetNode(PPC::ADDZE, MVT::i32, Op.getValue(0), Op.getValue(1)); - return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT); + Result = CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT); } + return; } // Other cases are autogenerated. @@ -875,17 +909,20 @@ SDOperand Val; unsigned SH, MB, ME; if (isRotateAndMask(N->getOperand(0).Val, Imm, false, SH, MB, ME)) { - Val = Select(N->getOperand(0).getOperand(0)); + Select(Val, N->getOperand(0).getOperand(0)); } else if (Imm == 0) { // AND X, 0 -> 0, not "rlwinm 32". - return Select(N->getOperand(1)); + Select(Result, N->getOperand(1)); + return ; } else { - Val = Select(N->getOperand(0)); + Select(Val, N->getOperand(0)); isRunOfOnes(Imm, MB, ME); SH = 0; } - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Val, getI32Imm(SH), - getI32Imm(MB), getI32Imm(ME)); + Result = CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Val, + getI32Imm(SH), getI32Imm(MB), + getI32Imm(ME)); + return; } // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert @@ -895,10 +932,13 @@ unsigned MB, ME; Imm = ~(Imm^Imm2); if (isRunOfOnes(Imm, MB, ME)) { - SDOperand Tmp1 = Select(N->getOperand(0).getOperand(0)); - SDOperand Tmp2 = Select(N->getOperand(0).getOperand(1)); - return CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Tmp1, Tmp2, - getI32Imm(0), getI32Imm(MB), getI32Imm(ME)); + SDOperand Tmp1, Tmp2; + Select(Tmp1, N->getOperand(0).getOperand(0)); + Select(Tmp2, N->getOperand(0).getOperand(1)); + Result = CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Tmp1, Tmp2, + getI32Imm(0), getI32Imm(MB), + getI32Imm(ME)); + return; } } @@ -906,8 +946,10 @@ break; } case ISD::OR: - if (SDNode *I = SelectBitfieldInsert(N)) - return CodeGenMap[Op] = SDOperand(I, 0); + if (SDNode *I = SelectBitfieldInsert(N)) { + Result = CodeGenMap[Op] = SDOperand(I, 0); + return; + } // Other cases are autogenerated. break; @@ -915,9 +957,12 @@ unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).Val, ISD::AND, Imm) && isRotateAndMask(N, Imm, true, SH, MB, ME)) { - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, - Select(N->getOperand(0).getOperand(0)), - getI32Imm(SH), getI32Imm(MB), getI32Imm(ME)); + SDOperand Val; + Select(Val, N->getOperand(0).getOperand(0)); + Result = CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, + Val, getI32Imm(SH), getI32Imm(MB), + getI32Imm(ME)); + return; } // Other cases are autogenerated. @@ -927,10 +972,12 @@ unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).Val, ISD::AND, Imm) && isRotateAndMask(N, Imm, true, SH, MB, ME)) { - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, - Select(N->getOperand(0).getOperand(0)), - getI32Imm(SH & 0x1F), getI32Imm(MB), - getI32Imm(ME)); + SDOperand Val; + Select(Val, N->getOperand(0).getOperand(0)); + Result = CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, + Val, getI32Imm(SH & 0x1F), getI32Imm(MB), + getI32Imm(ME)); + return; } // Other cases are autogenerated. @@ -945,12 +992,14 @@ if (ConstantSDNode *N3C = dyn_cast(N->getOperand(3))) if (N1C->isNullValue() && N3C->isNullValue() && N2C->getValue() == 1ULL && CC == ISD::SETNE) { - SDOperand LHS = Select(N->getOperand(0)); + SDOperand LHS; + Select(LHS, N->getOperand(0)); SDOperand Tmp = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, LHS, getI32Imm(~0U)); - return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, Tmp, LHS, - Tmp.getValue(1)); + Result = CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, Tmp, LHS, + Tmp.getValue(1)); + return; } SDOperand CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC); @@ -964,14 +1013,17 @@ SelectCCOp = PPC::SELECT_CC_F4; else SelectCCOp = PPC::SELECT_CC_F8; - return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), CCReg, - Select(N->getOperand(2)), - Select(N->getOperand(3)), - getI32Imm(BROpc)); + SDOperand N2, N3; + Select(N2, N->getOperand(2)); + Select(N3, N->getOperand(3)); + Result = CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), CCReg, + N2, N3, getI32Imm(BROpc)); + return; } case ISD::BR_CC: case ISD::BRTWOWAY_CC: { - SDOperand Chain = Select(N->getOperand(0)); + SDOperand Chain; + Select(Chain, N->getOperand(0)); MachineBasicBlock *Dest = cast(N->getOperand(4))->getBasicBlock(); ISD::CondCode CC = cast(N->getOperand(1))->get(); @@ -1000,7 +1052,7 @@ CondCode, getI32Imm(Opc), CondTrueBlock, CondFalseBlock, Chain); - return CurDAG->SelectNodeTo(N, PPC::B, MVT::Other, CondFalseBlock, CB); + Result = CurDAG->SelectNodeTo(N, PPC::B, MVT::Other, CondFalseBlock, CB); } else { // Iterate to the next basic block ilist::iterator It = BB; @@ -1011,15 +1063,16 @@ // we have nothing better to set it to, and leaving it alone will cause // the PowerPC Branch Selection pass to crash. if (It == BB->getParent()->end()) It = Dest; - return CurDAG->SelectNodeTo(N, PPC::COND_BRANCH, MVT::Other, CondCode, - getI32Imm(getBCCForSetCC(CC)), - N->getOperand(4), CurDAG->getBasicBlock(It), - Chain); + Result = CurDAG->SelectNodeTo(N, PPC::COND_BRANCH, MVT::Other, CondCode, + getI32Imm(getBCCForSetCC(CC)), + N->getOperand(4), CurDAG->getBasicBlock(It), + Chain); } + return; } } - return SelectCode(Op); + SelectCode(Result, Op); } From evan.cheng at apple.com Wed Feb 8 18:38:13 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 18:38:13 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Message-ID: <200602090038.SAA09216@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcISelDAGToDAG.cpp updated: 1.79 -> 1.80 --- Log message: Change Select() from SDOperand Select(SDOperand N); to void Select(SDOperand &Result, SDOperand N); --- Diffs of the changes: (+49 -32) SparcISelDAGToDAG.cpp | 81 ++++++++++++++++++++++++++++++-------------------- 1 files changed, 49 insertions(+), 32 deletions(-) Index: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp diff -u llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.79 llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.80 --- llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.79 Sun Feb 5 02:35:50 2006 +++ llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Wed Feb 8 18:37:58 2006 @@ -934,7 +934,7 @@ Subtarget(TM.getSubtarget()) { } - SDOperand Select(SDOperand Op); + void Select(SDOperand &Result, SDOperand Op); // Complex Pattern Selectors. bool SelectADDRrr(SDOperand N, SDOperand &R1, SDOperand &R2); @@ -1025,33 +1025,44 @@ return true; } -SDOperand SparcDAGToDAGISel::Select(SDOperand Op) { +void SparcDAGToDAGISel::Select(SDOperand &Result, SDOperand Op) { SDNode *N = Op.Val; if (N->getOpcode() >= ISD::BUILTIN_OP_END && - N->getOpcode() < SPISD::FIRST_NUMBER) - return Op; // Already selected. + N->getOpcode() < SPISD::FIRST_NUMBER) { + Result = Op; + return; // Already selected. + } + // If this has already been converted, use it. std::map::iterator CGMI = CodeGenMap.find(Op); - if (CGMI != CodeGenMap.end()) return CGMI->second; + if (CGMI != CodeGenMap.end()) { + Result = CGMI->second; + return; + } switch (N->getOpcode()) { default: break; case ISD::FrameIndex: { int FI = cast(N)->getIndex(); - if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, SP::ADDri, MVT::i32, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - CurDAG->getTargetConstant(0, MVT::i32)); - return CodeGenMap[Op] = + if (N->hasOneUse()) { + Result = CurDAG->SelectNodeTo(N, SP::ADDri, MVT::i32, + CurDAG->getTargetFrameIndex(FI, MVT::i32), + CurDAG->getTargetConstant(0, MVT::i32)); + return; + } + + Result = CodeGenMap[Op] = CurDAG->getTargetNode(SP::ADDri, MVT::i32, CurDAG->getTargetFrameIndex(FI, MVT::i32), CurDAG->getTargetConstant(0, MVT::i32)); + return; } case ISD::ADD_PARTS: { - SDOperand LHSL = Select(N->getOperand(0)); - SDOperand LHSH = Select(N->getOperand(1)); - SDOperand RHSL = Select(N->getOperand(2)); - SDOperand RHSH = Select(N->getOperand(3)); + SDOperand LHSL, LHSH, RHSL, RHSH; + Select(LHSL, N->getOperand(0)); + Select(LHSH, N->getOperand(1)); + Select(RHSL, N->getOperand(2)); + Select(RHSH, N->getOperand(3)); // FIXME, handle immediate RHS. SDOperand Low = CurDAG->getTargetNode(SP::ADDCCrr, MVT::i32, MVT::Flag, LHSL, RHSL); @@ -1059,27 +1070,30 @@ Low.getValue(1)); CodeGenMap[SDOperand(N, 0)] = Low; CodeGenMap[SDOperand(N, 1)] = Hi; - return Op.ResNo ? Hi : Low; + Result = Op.ResNo ? Hi : Low; + return; } case ISD::SUB_PARTS: { - SDOperand LHSL = Select(N->getOperand(0)); - SDOperand LHSH = Select(N->getOperand(1)); - SDOperand RHSL = Select(N->getOperand(2)); - SDOperand RHSH = Select(N->getOperand(3)); - // FIXME, handle immediate RHS. + SDOperand LHSL, LHSH, RHSL, RHSH; + Select(LHSL, N->getOperand(0)); + Select(LHSH, N->getOperand(1)); + Select(RHSL, N->getOperand(2)); + Select(RHSH, N->getOperand(3)); SDOperand Low = CurDAG->getTargetNode(SP::SUBCCrr, MVT::i32, MVT::Flag, LHSL, RHSL); SDOperand Hi = CurDAG->getTargetNode(SP::SUBXrr, MVT::i32, LHSH, RHSH, Low.getValue(1)); CodeGenMap[SDOperand(N, 0)] = Low; CodeGenMap[SDOperand(N, 1)] = Hi; - return Op.ResNo ? Hi : Low; + Result = Op.ResNo ? Hi : Low; + return; } case ISD::SDIV: case ISD::UDIV: { // FIXME: should use a custom expander to expose the SRA to the dag. - SDOperand DivLHS = Select(N->getOperand(0)); - SDOperand DivRHS = Select(N->getOperand(1)); + SDOperand DivLHS, DivRHS; + Select(DivLHS, N->getOperand(0)); + Select(DivRHS, N->getOperand(1)); // Set the Y register to the high-part. SDOperand TopPart; @@ -1094,18 +1108,21 @@ // FIXME: Handle div by immediate. unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr; - return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS, TopPart); + Result = CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS, TopPart); + return; } case ISD::MULHU: case ISD::MULHS: { // FIXME: Handle mul by immediate. - SDOperand MulLHS = Select(N->getOperand(0)); - SDOperand MulRHS = Select(N->getOperand(1)); + SDOperand MulLHS, MulRHS; + Select(MulLHS, N->getOperand(0)); + Select(MulRHS, N->getOperand(1)); unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr; SDOperand Mul = CurDAG->getTargetNode(Opcode, MVT::i32, MVT::Flag, MulLHS, MulRHS); // The high part is in the Y register. - return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, Mul.getValue(1)); + Result = CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, Mul.getValue(1)); + return; } case SPISD::CALL: // FIXME: This is a workaround for a bug in tblgen. @@ -1118,10 +1135,9 @@ SDOperand InFlag = SDOperand(0, 0); SDOperand Chain = N->getOperand(0); SDOperand Tmp0 = N1; - Chain = Select(Chain); - SDOperand Result; + Select(Chain, Chain); if (N->getNumOperands() == 3) { - InFlag = Select(N->getOperand(2)); + Select(InFlag, N->getOperand(2)); Result = CurDAG->getTargetNode(SP::CALL, MVT::Other, MVT::Flag, Tmp0, Chain, InFlag); } else { @@ -1130,13 +1146,14 @@ } Chain = CodeGenMap[SDOperand(N, 0)] = Result.getValue(0); CodeGenMap[SDOperand(N, 1)] = Result.getValue(1); - return Result.getValue(Op.ResNo); + Result = Result.getValue(Op.ResNo); + return; } P47Fail:; } - return SelectCode(Op); + SelectCode(Result, Op); } From evan.cheng at apple.com Wed Feb 8 18:38:14 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 18:38:14 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200602090038.SAA09224@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.43 -> 1.44 --- Log message: Change Select() from SDOperand Select(SDOperand N); to void Select(SDOperand &Result, SDOperand N); --- Diffs of the changes: (+53 -38) X86ISelDAGToDAG.cpp | 91 ++++++++++++++++++++++++++++++---------------------- 1 files changed, 53 insertions(+), 38 deletions(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.43 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.44 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.43 Mon Feb 6 00:02:33 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Feb 8 18:37:58 2006 @@ -104,7 +104,7 @@ #include "X86GenDAGISel.inc" private: - SDOperand Select(SDOperand N); + void Select(SDOperand &Result, SDOperand N); bool MatchAddress(SDOperand N, X86ISelAddressMode &AM); bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, @@ -445,17 +445,22 @@ return false; } -SDOperand X86DAGToDAGISel::Select(SDOperand N) { +void X86DAGToDAGISel::Select(SDOperand &Result, SDOperand N) { SDNode *Node = N.Val; MVT::ValueType NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); - if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) - return N; // Already selected. + if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { + Result = N; + return; // Already selected. + } std::map::iterator CGMI = CodeGenMap.find(N); - if (CGMI != CodeGenMap.end()) return CGMI->second; + if (CGMI != CodeGenMap.end()) { + Result = CGMI->second; + return; + } switch (Opcode) { default: break; @@ -499,33 +504,39 @@ } } - SDOperand Chain = foldedLoad ? Select(N1.getOperand(0)) - : CurDAG->getEntryNode(); + SDOperand Chain; + if (foldedLoad) + Select(Chain, N1.getOperand(0)); + else + Chain = CurDAG->getEntryNode(); - SDOperand InFlag; + SDOperand InFlag(0, 0); + Select(N0, N0); Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT), - Select(N0), InFlag); + N0, InFlag); InFlag = Chain.getValue(1); if (foldedLoad) { - Tmp0 = Select(Tmp0); - Tmp1 = Select(Tmp1); - Tmp2 = Select(Tmp2); - Tmp3 = Select(Tmp3); + Select(Tmp0, Tmp0); + Select(Tmp1, Tmp1); + Select(Tmp2, Tmp2); + Select(Tmp3, Tmp3); Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag); InFlag = Chain.getValue(1); } else { - InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, Select(N1), InFlag); + Select(N1, N1); + InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag); } - SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag); + Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag); CodeGenMap[N.getValue(0)] = Result; if (foldedLoad) { CodeGenMap[N1.getValue(1)] = Result.getValue(1); AddHandleReplacement(N1.getValue(1), Result.getValue(1)); } - return Result; + + return; } case ISD::SDIV: @@ -576,12 +587,16 @@ bool foldedLoad = false; SDOperand Tmp0, Tmp1, Tmp2, Tmp3; foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); - SDOperand Chain = foldedLoad ? Select(N1.getOperand(0)) - : CurDAG->getEntryNode(); + SDOperand Chain; + if (foldedLoad) + Select(Chain, N1.getOperand(0)); + else + Chain = CurDAG->getEntryNode(); - SDOperand InFlag; + SDOperand InFlag(0, 0); + Select(N0, N0); Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT), - Select(N0), InFlag); + N0, InFlag); InFlag = Chain.getValue(1); if (isSigned) { @@ -598,25 +613,26 @@ } if (foldedLoad) { - Tmp0 = Select(Tmp0); - Tmp1 = Select(Tmp1); - Tmp2 = Select(Tmp2); - Tmp3 = Select(Tmp3); + Select(Tmp0, Tmp0); + Select(Tmp1, Tmp1); + Select(Tmp2, Tmp2); + Select(Tmp3, Tmp3); Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag); InFlag = Chain.getValue(1); } else { - InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, Select(N1), InFlag); + Select(N1, N1); + InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag); } - SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, - NVT, InFlag); + Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, + NVT, InFlag); CodeGenMap[N.getValue(0)] = Result; if (foldedLoad) { CodeGenMap[N1.getValue(1)] = Result.getValue(1); AddHandleReplacement(N1.getValue(1), Result.getValue(1)); } - return Result; + return; } case ISD::TRUNCATE: { @@ -627,11 +643,11 @@ case MVT::i16: Reg = X86::AX; Opc = X86::MOV16rr; VT = MVT::i16; break; case MVT::i32: Reg = X86::EAX; Opc = X86::MOV32rr; VT = MVT::i32; break; } - SDOperand Tmp0 = Select(Node->getOperand(0)); - SDOperand Tmp1 = CurDAG->getTargetNode(Opc, VT, Tmp0); + SDOperand Tmp0, Tmp1; + Select(Tmp0, Node->getOperand(0)); + Select(Tmp1, CurDAG->getTargetNode(Opc, VT, Tmp0)); SDOperand InFlag = SDOperand(0,0); - SDOperand Result = CurDAG->getCopyToReg(CurDAG->getEntryNode(), - Reg, Tmp1, InFlag); + Result = CurDAG->getCopyToReg(CurDAG->getEntryNode(), Reg, Tmp1, InFlag); SDOperand Chain = Result.getValue(0); InFlag = Result.getValue(1); @@ -641,17 +657,16 @@ case MVT::i16: Reg = X86::AX; Opc = X86::MOV16rr; VT = MVT::i16; break; } - Result = CurDAG->getCopyFromReg(Chain, - Reg, VT, InFlag); + Result = CurDAG->getCopyFromReg(Chain, Reg, VT, InFlag); if (N.Val->hasOneUse()) - return CurDAG->SelectNodeTo(N.Val, Opc, VT, Result); + Result =CurDAG->SelectNodeTo(N.Val, Opc, VT, Result); else - return CodeGenMap[N] = CurDAG->getTargetNode(Opc, VT, Result); - break; + Result = CodeGenMap[N] = CurDAG->getTargetNode(Opc, VT, Result); + return; } } - return SelectCode(N); + SelectCode(Result, N); } /// createX86ISelDag - This pass converts a legalized DAG into a From evan.cheng at apple.com Wed Feb 8 18:38:14 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 18:38:14 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Message-ID: <200602090038.SAA09232@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/IA64: IA64ISelDAGToDAG.cpp updated: 1.34 -> 1.35 --- Log message: Change Select() from SDOperand Select(SDOperand N); to void Select(SDOperand &Result, SDOperand N); --- Diffs of the changes: (+82 -48) IA64ISelDAGToDAG.cpp | 130 ++++++++++++++++++++++++++++++++------------------- 1 files changed, 82 insertions(+), 48 deletions(-) Index: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp diff -u llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.34 llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.35 --- llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.34 Sun Feb 5 00:46:41 2006 +++ llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Wed Feb 8 18:37:58 2006 @@ -63,7 +63,7 @@ // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDOperand Select(SDOperand Op); + void Select(SDOperand &Result, SDOperand N); SDNode *SelectIntImmediateExpr(SDOperand LHS, SDOperand RHS, unsigned OCHi, unsigned OCLo, @@ -143,7 +143,8 @@ } // Finally, legalize this node. - Select(Node); + SDOperand Dummy; + Select(Dummy, Node); } // Select target instructions for the DAG. @@ -157,10 +158,11 @@ SDOperand IA64DAGToDAGISel::SelectDIV(SDOperand Op) { SDNode *N = Op.Val; - SDOperand Chain = Select(N->getOperand(0)); + SDOperand Chain, Tmp1, Tmp2; + Select(Chain, N->getOperand(0)); - SDOperand Tmp1 = Select(N->getOperand(0)); - SDOperand Tmp2 = Select(N->getOperand(1)); + Select(Tmp1, N->getOperand(0)); + Select(Tmp2, N->getOperand(1)); bool isFP=false; @@ -328,25 +330,31 @@ // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDOperand IA64DAGToDAGISel::Select(SDOperand Op) { +void IA64DAGToDAGISel::Select(SDOperand &Result, SDOperand Op) { SDNode *N = Op.Val; if (N->getOpcode() >= ISD::BUILTIN_OP_END && - N->getOpcode() < IA64ISD::FIRST_NUMBER) - return Op; // Already selected. + N->getOpcode() < IA64ISD::FIRST_NUMBER) { + Result = Op; + return; // Already selected. + } // If this has already been converted, use it. std::map::iterator CGMI = CodeGenMap.find(Op); - if (CGMI != CodeGenMap.end()) return CGMI->second; + if (CGMI != CodeGenMap.end()) { + Result = CGMI->second; + return; + } switch (N->getOpcode()) { default: break; case IA64ISD::BRCALL: { // XXX: this is also a hack! - SDOperand Chain = Select(N->getOperand(0)); + SDOperand Chain; SDOperand InFlag; // Null incoming flag value. + Select(Chain, N->getOperand(0)); if(N->getNumOperands()==3) // we have an incoming chain, callee and flag - InFlag = Select(N->getOperand(2)); + Select(InFlag, N->getOperand(2)); unsigned CallOpcode; SDOperand CallOperand; @@ -367,7 +375,8 @@ // otherwise we need to load the function descriptor, // load the branch target (function)'s entry point and GP, // branch (call) then restore the GP - SDOperand FnDescriptor = Select(N->getOperand(1)); + SDOperand FnDescriptor; + Select(FnDescriptor, N->getOperand(1)); // load the branch target's entry point [mem] and // GP value [mem+8] @@ -404,41 +413,47 @@ for (unsigned i = 0, e = CallResults.size(); i != e; ++i) CodeGenMap[Op.getValue(i)] = CallResults[i]; - return CallResults[Op.ResNo]; + Result = CallResults[Op.ResNo]; + return; } case IA64ISD::GETFD: { - SDOperand Input = Select(N->getOperand(0)); - SDOperand Result = CurDAG->getTargetNode(IA64::GETFD, MVT::i64, Input); + SDOperand Input; + Select(Input, N->getOperand(0)); + Result = CurDAG->getTargetNode(IA64::GETFD, MVT::i64, Input); CodeGenMap[Op] = Result; - return Result; + return; } case ISD::FDIV: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: - case ISD::UREM: return SelectDIV(Op); + case ISD::UREM: + Result = SelectDIV(Op); + return; case ISD::TargetConstantFP: { SDOperand Chain = CurDAG->getEntryNode(); // this is a constant, so.. - if (cast(N)->isExactlyValue(+0.0)) - return CurDAG->getCopyFromReg(Chain, IA64::F0, MVT::f64); - else if (cast(N)->isExactlyValue(+1.0)) - return CurDAG->getCopyFromReg(Chain, IA64::F1, MVT::f64); - else + if (cast(N)->isExactlyValue(+0.0)) { + Result = CurDAG->getCopyFromReg(Chain, IA64::F0, MVT::f64); + } else if (cast(N)->isExactlyValue(+1.0)) { + Result = CurDAG->getCopyFromReg(Chain, IA64::F1, MVT::f64); + } else assert(0 && "Unexpected FP constant!"); + return; } case ISD::FrameIndex: { // TODO: reduce creepyness int FI = cast(N)->getIndex(); if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64, + Result = CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64, CurDAG->getTargetFrameIndex(FI, MVT::i64)); else - return CodeGenMap[Op] = CurDAG->getTargetNode(IA64::MOV, MVT::i64, + Result = CodeGenMap[Op] = CurDAG->getTargetNode(IA64::MOV, MVT::i64, CurDAG->getTargetFrameIndex(FI, MVT::i64)); + return; } case ISD::ConstantPool: { // TODO: nuke the constant pool @@ -447,8 +462,9 @@ Constant *C = CP->get(); SDOperand CPI = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment()); - return CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, // ? + Result = CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, // ? CurDAG->getRegister(IA64::r1, MVT::i64), CPI); + return; } case ISD::GlobalAddress: { @@ -456,7 +472,8 @@ SDOperand GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64); SDOperand Tmp = CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, CurDAG->getRegister(IA64::r1, MVT::i64), GA); - return CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp); + Result = CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp); + return; } /* XXX case ISD::ExternalSymbol: { @@ -471,8 +488,9 @@ case ISD::LOAD: case ISD::EXTLOAD: // FIXME: load -1, not 1, for bools? case ISD::ZEXTLOAD: { - SDOperand Chain = Select(N->getOperand(0)); - SDOperand Address = Select(N->getOperand(1)); + SDOperand Chain, Address; + Select(Chain, N->getOperand(0)); + Select(Address, N->getOperand(1)); MVT::ValueType TypeBeingLoaded = (N->getOpcode() == ISD::LOAD) ? N->getValueType(0) : cast(N->getOperand(3))->getVT(); @@ -481,11 +499,13 @@ default: N->dump(); assert(0 && "Cannot load this type!"); case MVT::i1: { // this is a bool Opc = IA64::LD1; // first we load a byte, then compare for != 0 - if(N->getValueType(0) == MVT::i1) // XXX: early exit! - return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other, + if(N->getValueType(0) == MVT::i1) { // XXX: early exit! + Result = CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other, CurDAG->getTargetNode(Opc, MVT::i64, Address), CurDAG->getRegister(IA64::r0, MVT::i64), Chain).getValue(Op.ResNo); + return; + } /* otherwise, we want to load a bool into something bigger: LD1 will do that for us, so we just fall through */ } @@ -499,14 +519,16 @@ } // TODO: comment this - return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other, + Result = CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other, Address, Chain).getValue(Op.ResNo); + return; } case ISD::TRUNCSTORE: case ISD::STORE: { - SDOperand Address = Select(N->getOperand(2)); - SDOperand Chain = Select(N->getOperand(0)); + SDOperand Address, Chain; + Select(Address, N->getOperand(2)); + Select(Chain, N->getOperand(0)); unsigned Opc; if (N->getOpcode() == ISD::STORE) { @@ -518,11 +540,13 @@ SDOperand Initial = CurDAG->getCopyFromReg(Chain, IA64::r0, MVT::i64); Chain = Initial.getValue(1); // then load 1 into the same reg iff the predicate to store is 1 - SDOperand Tmp = - CurDAG->getTargetNode(IA64::TPCADDS, MVT::i64, Initial, - CurDAG->getConstant(1, MVT::i64), - Select(N->getOperand(1))); - return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain); + SDOperand Tmp; + Select(Tmp, N->getOperand(1)); + CurDAG->getTargetNode(IA64::TPCADDS, MVT::i64, Initial, + CurDAG->getConstant(1, MVT::i64), + Tmp); + Result = CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain); + return; } case MVT::i64: Opc = IA64::ST8; break; case MVT::f64: Opc = IA64::STF8; break; @@ -537,18 +561,23 @@ } } - return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Select(N->getOperand(2)), - Select(N->getOperand(1)), Chain); + SDOperand N1, N2; + Select(N1, N->getOperand(1)); + Select(N2, N->getOperand(2)); + Result = CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain); + return; } case ISD::BRCOND: { - SDOperand Chain = Select(N->getOperand(0)); - SDOperand CC = Select(N->getOperand(1)); + SDOperand Chain, CC; + Select(Chain, N->getOperand(0)); + Select(CC, N->getOperand(1)); MachineBasicBlock *Dest = cast(N->getOperand(2))->getBasicBlock(); //FIXME - we do NOT need long branches all the time - return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC, + Result = CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC, CurDAG->getBasicBlock(Dest), Chain); + return; } case ISD::CALLSEQ_START: @@ -556,17 +585,22 @@ int64_t Amt = cast(N->getOperand(1))->getValue(); unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ? IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP; - return CurDAG->SelectNodeTo(N, Opc, MVT::Other, - getI64Imm(Amt), Select(N->getOperand(0))); + SDOperand N0; + Select(N0, N->getOperand(0)); + Result = CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0); + return; } case ISD::BR: // FIXME: we don't need long branches all the time! - return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other, - N->getOperand(1), Select(N->getOperand(0))); + SDOperand N0; + Select(N0, N->getOperand(0)); + Result = CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other, + N->getOperand(1), N0); + return; } - return SelectCode(Op); + SelectCode(Result, Op); } From evan.cheng at apple.com Wed Feb 8 18:38:14 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 18:38:14 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Message-ID: <200602090038.SAA09220@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Alpha: AlphaISelDAGToDAG.cpp updated: 1.32 -> 1.33 --- Log message: Change Select() from SDOperand Select(SDOperand N); to void Select(SDOperand &Result, SDOperand N); --- Diffs of the changes: (+74 -42) AlphaISelDAGToDAG.cpp | 116 +++++++++++++++++++++++++++++++------------------- 1 files changed, 74 insertions(+), 42 deletions(-) Index: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp diff -u llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.32 llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.33 --- llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.32 Sun Feb 5 00:46:41 2006 +++ llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Wed Feb 8 18:37:58 2006 @@ -96,7 +96,7 @@ // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDOperand Select(SDOperand Op); + void Select(SDOperand &Result, SDOperand Op); /// InstructionSelectBasicBlock - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. @@ -150,55 +150,73 @@ // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDOperand AlphaDAGToDAGISel::Select(SDOperand Op) { +void AlphaDAGToDAGISel::Select(SDOperand &Result, SDOperand Op) { SDNode *N = Op.Val; if (N->getOpcode() >= ISD::BUILTIN_OP_END && - N->getOpcode() < AlphaISD::FIRST_NUMBER) - return Op; // Already selected. + N->getOpcode() < AlphaISD::FIRST_NUMBER) { + Result = Op; + return; // Already selected. + } // If this has already been converted, use it. std::map::iterator CGMI = CodeGenMap.find(Op); - if (CGMI != CodeGenMap.end()) return CGMI->second; - + if (CGMI != CodeGenMap.end()) { + Result = CGMI->second; + return; + } + switch (N->getOpcode()) { default: break; - case AlphaISD::CALL: return SelectCALL(Op); + case AlphaISD::CALL: + Result = SelectCALL(Op); + return; case ISD::FrameIndex: { int FI = cast(N)->getIndex(); - return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - getI64Imm(0)); + Result = CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64, + CurDAG->getTargetFrameIndex(FI, MVT::i32), + getI64Imm(0)); + return; } case AlphaISD::GlobalBaseReg: - return getGlobalBaseReg(); + Result = getGlobalBaseReg(); + return; case AlphaISD::DivCall: { SDOperand Chain = CurDAG->getEntryNode(); - Chain = CurDAG->getCopyToReg(Chain, Alpha::R24, Select(Op.getOperand(1)), + SDOperand N0, N1, N2; + Select(N0, Op.getOperand(0)); + Select(N1, Op.getOperand(1)); + Select(N2, Op.getOperand(2)); + Chain = CurDAG->getCopyToReg(Chain, Alpha::R24, N1, SDOperand(0,0)); - Chain = CurDAG->getCopyToReg(Chain, Alpha::R25, Select(Op.getOperand(2)), + Chain = CurDAG->getCopyToReg(Chain, Alpha::R25, N2, Chain.getValue(1)); - Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, Select(Op.getOperand(0)), + Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, N0, Chain.getValue(1)); Chain = CurDAG->getTargetNode(Alpha::JSRs, MVT::Other, MVT::Flag, Chain, Chain.getValue(1)); Chain = CurDAG->getCopyFromReg(Chain, Alpha::R27, MVT::i64, Chain.getValue(1)); - return CurDAG->SelectNodeTo(N, Alpha::BIS, MVT::i64, Chain, Chain); + Result = CurDAG->SelectNodeTo(N, Alpha::BIS, MVT::i64, Chain, Chain); + return; } case ISD::READCYCLECOUNTER: { - SDOperand Chain = Select(N->getOperand(0)); //Select chain - return CurDAG->SelectNodeTo(N, Alpha::RPCC, MVT::i64, Chain); + SDOperand Chain; + Select(Chain, N->getOperand(0)); //Select chain + Result = CurDAG->SelectNodeTo(N, Alpha::RPCC, MVT::i64, Chain); + return; } case ISD::RET: { - SDOperand Chain = Select(N->getOperand(0)); // Token chain. + SDOperand Chain; + Select(Chain, N->getOperand(0)); // Token chain. SDOperand InFlag; if (N->getNumOperands() == 2) { - SDOperand Val = Select(N->getOperand(1)); + SDOperand Val; + Select(Val, N->getOperand(1)); if (N->getOperand(1).getValueType() == MVT::i64) { Chain = CurDAG->getCopyToReg(Chain, Alpha::R0, Val, InFlag); InFlag = Chain.getValue(1); @@ -212,13 +230,17 @@ InFlag = Chain.getValue(1); // Finally, select this to a ret instruction. - return CurDAG->SelectNodeTo(N, Alpha::RETDAG, MVT::Other, Chain, InFlag); + Result = CurDAG->SelectNodeTo(N, Alpha::RETDAG, MVT::Other, Chain, InFlag); + return; } case ISD::Constant: { uint64_t uval = cast(N)->getValue(); - if (uval == 0) - return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), Alpha::R31, MVT::i64); + if (uval == 0) { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), Alpha::R31, + MVT::i64); + return; + } int64_t val = (int64_t)uval; int32_t val32 = (int32_t)val; @@ -235,21 +257,24 @@ ConstantUInt::get(Type::getPrimitiveType(Type::ULongTyID) , uval); SDOperand Tmp, CPI = CurDAG->getTargetConstantPool(C, MVT::i64); Tmp = CurDAG->getTargetNode(Alpha::LDAHr, MVT::i64, CPI, getGlobalBaseReg()); - return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, - CPI, Tmp, CurDAG->getEntryNode()); + Result = CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, + CPI, Tmp, CurDAG->getEntryNode()); + return; } case ISD::TargetConstantFP: { ConstantFPSDNode *CN = cast(N); bool isDouble = N->getValueType(0) == MVT::f64; MVT::ValueType T = isDouble ? MVT::f64 : MVT::f32; if (CN->isExactlyValue(+0.0)) { - return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS, - T, CurDAG->getRegister(Alpha::F31, T), - CurDAG->getRegister(Alpha::F31, T)); + Result = CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS, + T, CurDAG->getRegister(Alpha::F31, T), + CurDAG->getRegister(Alpha::F31, T)); + return; } else if ( CN->isExactlyValue(-0.0)) { - return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS, - T, CurDAG->getRegister(Alpha::F31, T), - CurDAG->getRegister(Alpha::F31, T)); + Result = CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS, + T, CurDAG->getRegister(Alpha::F31, T), + CurDAG->getRegister(Alpha::F31, T)); + return; } else { abort(); } @@ -271,8 +296,9 @@ case ISD::SETGE: Opc = Alpha::CMPTLE; rev = true; break; case ISD::SETNE: Opc = Alpha::CMPTEQ; isNE = true; break; }; - SDOperand tmp1 = Select(N->getOperand(0)), - tmp2 = Select(N->getOperand(1)); + SDOperand tmp1, tmp2; + Select(tmp1, N->getOperand(0)); + Select(tmp2, N->getOperand(1)); SDOperand cmp = CurDAG->getTargetNode(Opc, MVT::f64, rev?tmp2:tmp1, rev?tmp1:tmp2); @@ -296,7 +322,8 @@ SDOperand FP = CurDAG->getTargetNode(Alpha::CMPULT, MVT::i64, CurDAG->getRegister(Alpha::R31, MVT::i64), LD); - return FP; + Result = FP; + return; } break; @@ -309,10 +336,10 @@ // so that things like this can be caught in fall though code //move int to fp bool isDouble = N->getValueType(0) == MVT::f64; - SDOperand LD, - cond = Select(N->getOperand(0)), - TV = Select(N->getOperand(1)), - FV = Select(N->getOperand(2)); + SDOperand LD, cond, TV, FV; + Select(cond, N->getOperand(0)); + Select(TV, N->getOperand(1)); + Select(FV, N->getOperand(2)); if (AlphaLowering.hasITOF()) { LD = CurDAG->getNode(AlphaISD::ITOFT_, MVT::f64, cond); @@ -328,30 +355,34 @@ } SDOperand FP = CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES, MVT::f64, FV, TV, LD); - return FP; + Result = FP; + return; } break; } - return SelectCode(Op); + SelectCode(Result, Op); } SDOperand AlphaDAGToDAGISel::SelectCALL(SDOperand Op) { //TODO: add flag stuff to prevent nondeturministic breakage! SDNode *N = Op.Val; - SDOperand Chain = Select(N->getOperand(0)); + SDOperand Chain; SDOperand Addr = N->getOperand(1); SDOperand InFlag; // Null incoming flag value. + Select(Chain, N->getOperand(0)); std::vector CallOperands; std::vector TypeOperands; //grab the arguments for(int i = 2, e = N->getNumOperands(); i < e; ++i) { + SDOperand Tmp; TypeOperands.push_back(N->getOperand(i).getValueType()); - CallOperands.push_back(Select(N->getOperand(i))); + Select(Tmp, N->getOperand(i)); + CallOperands.push_back(Tmp); } int count = N->getNumOperands() - 2; @@ -396,7 +427,8 @@ Chain = CurDAG->getTargetNode(Alpha::BSR, MVT::Other, MVT::Flag, Addr.getOperand(0), Chain, InFlag); } else { - Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, Select(Addr), InFlag); + Select(Addr, Addr); + Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, Addr, InFlag); InFlag = Chain.getValue(1); Chain = CurDAG->getTargetNode(Alpha::JSR, MVT::Other, MVT::Flag, Chain, InFlag ); From evan.cheng at apple.com Wed Feb 8 18:38:14 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 8 Feb 2006 18:38:14 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/DAGISelEmitter.cpp Message-ID: <200602090038.SAA09226@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: DAGISelEmitter.cpp updated: 1.169 -> 1.170 --- Log message: Change Select() from SDOperand Select(SDOperand N); to void Select(SDOperand &Result, SDOperand N); --- Diffs of the changes: (+98 -69) DAGISelEmitter.cpp | 167 +++++++++++++++++++++++++++++++---------------------- 1 files changed, 98 insertions(+), 69 deletions(-) Index: llvm/utils/TableGen/DAGISelEmitter.cpp diff -u llvm/utils/TableGen/DAGISelEmitter.cpp:1.169 llvm/utils/TableGen/DAGISelEmitter.cpp:1.170 --- llvm/utils/TableGen/DAGISelEmitter.cpp:1.169 Mon Feb 6 18:37:41 2006 +++ llvm/utils/TableGen/DAGISelEmitter.cpp Wed Feb 8 18:37:58 2006 @@ -2196,13 +2196,13 @@ emitCheck(Code + ")"); for (unsigned i = 0; i < NumRes; ++i) - emitCode("Tmp" + utostr(i+ResNo) + " = Select(Tmp" + + emitCode("Select(Tmp" + utostr(i+ResNo) + ", Tmp" + utostr(i+ResNo) + ");"); TmpNo = ResNo + NumRes; } else { emitDecl("Tmp" + utostr(ResNo)); - emitCode("Tmp" + utostr(ResNo) + " = Select(" + Val + ");"); + emitCode("Select(Tmp" + utostr(ResNo) + ", " + Val + ");"); } // Add Tmp to VariableMap, so that we don't multiply select this // value if used multiple times by this pattern result. @@ -2301,7 +2301,7 @@ // Emit all the chain and CopyToReg stuff. bool ChainEmitted = HasChain; if (HasChain) - emitCode(ChainName + " = Select(" + ChainName + ");"); + emitCode("Select(" + ChainName + ", " + ChainName + ");"); if (HasInFlag || HasOptInFlag || HasImpInputs) EmitInFlagSelectCode(Pattern, "N", ChainEmitted, true); @@ -2329,9 +2329,7 @@ utostr(NumResults) + ");"); } } else if (HasChain || NodeHasOutFlag) { - emitDecl("Result"); if (HasOptInFlag) { - emitCode("Result = SDOperand(0, 0);"); unsigned FlagNo = (unsigned) NodeHasChain + Pattern->getNumChildren(); emitCode("if (HasOptInFlag)"); std::string Code = " Result = CurDAG->getTargetNode(" + @@ -2445,21 +2443,21 @@ if (AddedChain && NodeHasOutFlag) { if (NumExpectedResults == 0) { - emitCode("return Result.getValue(N.ResNo+1);"); + emitCode("Result = Result.getValue(N.ResNo+1);"); } else { emitCode("if (N.ResNo < " + utostr(NumExpectedResults) + ")"); - emitCode(" return Result.getValue(N.ResNo);"); + emitCode(" Result = Result.getValue(N.ResNo);"); emitCode("else"); - emitCode(" return Result.getValue(N.ResNo+1);"); + emitCode(" Result = Result.getValue(N.ResNo+1);"); } } else { - emitCode("return Result.getValue(N.ResNo);"); + emitCode("Result = Result.getValue(N.ResNo);"); } } else { // If this instruction is the root, and if there is only one use of it, // use SelectNodeTo instead of getTargetNode to avoid an allocation. emitCode("if (N.Val->hasOneUse()) {"); - std::string Code = " return CurDAG->SelectNodeTo(N.Val, " + + std::string Code = " Result = CurDAG->SelectNodeTo(N.Val, " + II.Namespace + "::" + II.TheDef->getName(); if (N->getTypeNum(0) != MVT::isVoid) Code += ", MVT::" + getEnumName(N->getTypeNum(0)); @@ -2471,7 +2469,7 @@ Code += ", InFlag"; emitCode(Code + ");"); emitCode("} else {"); - Code = " return CodeGenMap[N] = CurDAG->getTargetNode(" + + Code = " Result = CodeGenMap[N] = CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); if (N->getTypeNum(0) != MVT::isVoid) Code += ", MVT::" + getEnumName(N->getTypeNum(0)); @@ -2485,6 +2483,8 @@ emitCode("}"); } + if (isRoot) + emitCode("return;"); return std::make_pair(1, ResNo); } else if (Op->isSubClassOf("SDNodeXForm")) { assert(N->getNumChildren() == 1 && "node xform should have one child!"); @@ -2495,7 +2495,8 @@ + "(Tmp" + utostr(OpVal) + ".Val);"); if (isRoot) { emitCode("CodeGenMap[N] = Tmp" +utostr(ResNo) + ";"); - emitCode("return Tmp" + utostr(ResNo) + ";"); + emitCode("Result = Tmp" + utostr(ResNo) + ";"); + emitCode("return;"); } return std::make_pair(1, ResNo); } else { @@ -2556,7 +2557,7 @@ if (RR->isSubClassOf("Register")) { MVT::ValueType RVT = getRegisterValueType(RR, T); if (RVT == MVT::Flag) { - emitCode("InFlag = Select(" + RootName + utostr(OpNo) + ");"); + emitCode("Select(InFlag, " + RootName + utostr(OpNo) + ");"); } else { if (!ChainEmitted) { emitDecl("Chain"); @@ -2564,16 +2565,15 @@ ChainName = "Chain"; ChainEmitted = true; } - emitDecl(RootName + "CR" + utostr(i)); - emitCode(RootName + "CR" + utostr(i) + - " = CurDAG->getCopyToReg(" + ChainName + + emitCode("Select(" + RootName + utostr(OpNo) + ", " + + RootName + utostr(OpNo) + ");"); + emitDecl("Copy"); + emitCode("Copy = CurDAG->getCopyToReg(" + ChainName + ", CurDAG->getRegister(" + ISE.getQualifiedName(RR) + - ", MVT::" + getEnumName(RVT) + "), Select(" + RootName + - utostr(OpNo) + "), InFlag);"); - emitCode(ChainName + " = " + RootName + "CR" + utostr(i) + - ".getValue(0);"); - emitCode("InFlag = " + RootName + "CR" + utostr(i) + - ".getValue(1);"); + ", MVT::" + getEnumName(RVT) + "), " + + RootName + utostr(OpNo) + ", InFlag);"); + emitCode(ChainName + " = Copy.getValue(0);"); + emitCode("InFlag = Copy.getValue(1);"); } } } @@ -2587,8 +2587,8 @@ ") {"); Code = " "; } - emitCode(Code + "InFlag = Select(" + RootName + ".getOperand(" + - utostr(OpNo) + "));"); + emitCode(Code + "Select(InFlag, " + RootName + + ".getOperand(" + utostr(OpNo) + "));"); if (HasOptInFlag) { emitCode(" HasOptInFlag = true;"); emitCode("}"); @@ -2866,7 +2866,7 @@ CompareByRecordName>::iterator PBOI = PatternsByOpcode.begin(), E = PatternsByOpcode.end(); PBOI != E; ++PBOI) { const std::string &OpName = PBOI->first->getName(); - OS << "SDOperand Select_" << OpName << "(SDOperand N) {\n"; + OS << "void Select_" << OpName << "(SDOperand &Result, SDOperand N) {\n"; const SDNodeInfo &OpcodeInfo = getSDNodeInfo(PBOI->first); bool OptSlctOrder = @@ -2874,7 +2874,6 @@ OpcodeInfo.getNumResults() > 0); if (OptSlctOrder) { - OS << " SDOperand RetVal;\n"; OS << " if (N.ResNo == " << OpcodeInfo.getNumResults() << " && N.getValue(0).hasOneUse()) {\n" << " SDOperand Dummy = " @@ -2883,7 +2882,8 @@ << ")] = Dummy;\n" << " HandleMap[N.getValue(" << OpcodeInfo.getNumResults() << ")] = Dummy;\n" - << " return Dummy;\n" + << " Result = Dummy;\n" + << " return;\n" << " }\n"; } @@ -2935,7 +2935,7 @@ // Print all declarations. for (std::set::iterator I = GeneratedDecl.begin(), E = GeneratedDecl.end(); I != E; ++I) - OS << " SDOperand " << *I << ";\n"; + OS << " SDOperand " << *I << "(0, 0);\n"; // Loop through and reverse all of the CodeList vectors, as we will be // accessing them from their logical front, but accessing the end of a @@ -2963,29 +2963,35 @@ } // Emit boilerplate. - OS << "SDOperand Select_INLINEASM(SDOperand N) {\n" + OS << "void Select_INLINEASM(SDOperand& Result, SDOperand N) {\n" << " std::vector Ops(N.Val->op_begin(), N.Val->op_end());\n" - << " Ops[0] = Select(N.getOperand(0)); // Select the chain.\n\n" + << " Select(Ops[0], N.getOperand(0)); // Select the chain.\n\n" << " // Select the flag operand.\n" << " if (Ops.back().getValueType() == MVT::Flag)\n" - << " Ops.back() = Select(Ops.back());\n" + << " Select(Ops.back(), Ops.back());\n" << " std::vector VTs;\n" << " VTs.push_back(MVT::Other);\n" << " VTs.push_back(MVT::Flag);\n" << " SDOperand New = CurDAG->getNode(ISD::INLINEASM, VTs, Ops);\n" << " CodeGenMap[N.getValue(0)] = New;\n" << " CodeGenMap[N.getValue(1)] = New.getValue(1);\n" - << " return New.getValue(N.ResNo);\n" + << " Result = New.getValue(N.ResNo);\n" + << " return;\n" << "}\n\n"; OS << "// The main instruction selector code.\n" - << "SDOperand SelectCode(SDOperand N) {\n" + << "void SelectCode(SDOperand &Result, SDOperand N) {\n" << " if (N.getOpcode() >= ISD::BUILTIN_OP_END &&\n" << " N.getOpcode() < (ISD::BUILTIN_OP_END+" << InstNS - << "INSTRUCTION_LIST_END))\n" - << " return N; // Already selected.\n\n" + << "INSTRUCTION_LIST_END)) {\n" + << " Result = N;\n" + << " return; // Already selected.\n" + << " }\n\n" << " std::map::iterator CGMI = CodeGenMap.find(N);\n" - << " if (CGMI != CodeGenMap.end()) return CGMI->second;\n" + << " if (CGMI != CodeGenMap.end()) {\n" + << " Result = CGMI->second;\n" + << " return;\n" + << " }\n\n" << " switch (N.getOpcode()) {\n" << " default: break;\n" << " case ISD::EntryToken: // These leaves remain the same.\n" @@ -2995,71 +3001,91 @@ << " case ISD::TargetConstant:\n" << " case ISD::TargetConstantPool:\n" << " case ISD::TargetFrameIndex:\n" - << " case ISD::TargetGlobalAddress:\n" - << " return N;\n" + << " case ISD::TargetGlobalAddress: {\n" + << " Result = N;\n" + << " return;\n" + << " }\n" << " case ISD::AssertSext:\n" << " case ISD::AssertZext: {\n" - << " SDOperand Tmp0 = Select(N.getOperand(0));\n" + << " SDOperand Tmp0;\n" + << " Select(Tmp0, N.getOperand(0));\n" << " if (!N.Val->hasOneUse()) CodeGenMap[N] = Tmp0;\n" - << " return Tmp0;\n" + << " Result = Tmp0;\n" + << " return;\n" << " }\n" << " case ISD::TokenFactor:\n" << " if (N.getNumOperands() == 2) {\n" - << " SDOperand Op0 = Select(N.getOperand(0));\n" - << " SDOperand Op1 = Select(N.getOperand(1));\n" - << " return CodeGenMap[N] =\n" + << " SDOperand Op0, Op1;\n" + << " Select(Op0, N.getOperand(0));\n" + << " Select(Op1, N.getOperand(1));\n" + << " Result = CodeGenMap[N] =\n" << " CurDAG->getNode(ISD::TokenFactor, MVT::Other, Op0, Op1);\n" << " } else {\n" << " std::vector Ops;\n" - << " for (unsigned i = 0, e = N.getNumOperands(); i != e; ++i)\n" - << " Ops.push_back(Select(N.getOperand(i)));\n" - << " return CodeGenMap[N] = \n" + << " for (unsigned i = 0, e = N.getNumOperands(); i != e; ++i) {\n" + << " SDOperand Val;\n" + << " Select(Val, N.getOperand(i));\n" + << " Ops.push_back(Val);\n" + << " }\n" + << " Result = CodeGenMap[N] = \n" << " CurDAG->getNode(ISD::TokenFactor, MVT::Other, Ops);\n" << " }\n" + << " return;\n" << " case ISD::CopyFromReg: {\n" - << " SDOperand Chain = Select(N.getOperand(0));\n" + << " SDOperand Chain;\n" + << " Select(Chain, N.getOperand(0));\n" << " unsigned Reg = cast(N.getOperand(1))->getReg();\n" << " MVT::ValueType VT = N.Val->getValueType(0);\n" << " if (N.Val->getNumValues() == 2) {\n" - << " if (Chain == N.getOperand(0)) return N; // No change\n" + << " if (Chain == N.getOperand(0)) {\n" + << " Result = N; // No change\n" + << " return;\n" + << " }\n" << " SDOperand New = CurDAG->getCopyFromReg(Chain, Reg, VT);\n" << " CodeGenMap[N.getValue(0)] = New;\n" << " CodeGenMap[N.getValue(1)] = New.getValue(1);\n" - << " return New.getValue(N.ResNo);\n" + << " Result = New.getValue(N.ResNo);\n" + << " return;\n" << " } else {\n" - << " SDOperand Flag(0, 0);\n" - << " if (N.getNumOperands() == 3) Flag = Select(N.getOperand(2));\n" + << " SDOperand Flag;\n" + << " if (N.getNumOperands() == 3) Select(Flag, N.getOperand(2));\n" << " if (Chain == N.getOperand(0) &&\n" - << " (N.getNumOperands() == 2 || Flag == N.getOperand(2)))\n" - << " return N; // No change\n" + << " (N.getNumOperands() == 2 || Flag == N.getOperand(2))) {\n" + << " Result = N; // No change\n" + << " return;\n" + << " }\n" << " SDOperand New = CurDAG->getCopyFromReg(Chain, Reg, VT, Flag);\n" << " CodeGenMap[N.getValue(0)] = New;\n" << " CodeGenMap[N.getValue(1)] = New.getValue(1);\n" << " CodeGenMap[N.getValue(2)] = New.getValue(2);\n" - << " return New.getValue(N.ResNo);\n" + << " Result = New.getValue(N.ResNo);\n" + << " return;\n" << " }\n" << " }\n" << " case ISD::CopyToReg: {\n" - << " SDOperand Chain = Select(N.getOperand(0));\n" + << " SDOperand Chain;\n" + << " Select(Chain, N.getOperand(0));\n" << " unsigned Reg = cast(N.getOperand(1))->getReg();\n" - << " SDOperand Val = Select(N.getOperand(2));\n" - << " SDOperand Result = N;\n" + << " SDOperand Val;\n" + << " Select(Val, N.getOperand(2));\n" + << " SDOperand ResNode = N;\n" << " if (N.Val->getNumValues() == 1) {\n" << " if (Chain != N.getOperand(0) || Val != N.getOperand(2))\n" - << " Result = CurDAG->getCopyToReg(Chain, Reg, Val);\n" - << " return CodeGenMap[N] = Result;\n" + << " ResNode = CurDAG->getCopyToReg(Chain, Reg, Val);\n" + << " Result = CodeGenMap[N] = ResNode;\n" << " } else {\n" << " SDOperand Flag(0, 0);\n" - << " if (N.getNumOperands() == 4) Flag = Select(N.getOperand(3));\n" + << " if (N.getNumOperands() == 4) Select(Flag, N.getOperand(3));\n" << " if (Chain != N.getOperand(0) || Val != N.getOperand(2) ||\n" << " (N.getNumOperands() == 4 && Flag != N.getOperand(3)))\n" - << " Result = CurDAG->getCopyToReg(Chain, Reg, Val, Flag);\n" - << " CodeGenMap[N.getValue(0)] = Result;\n" - << " CodeGenMap[N.getValue(1)] = Result.getValue(1);\n" - << " return Result.getValue(N.ResNo);\n" + << " ResNode = CurDAG->getCopyToReg(Chain, Reg, Val, Flag);\n" + << " CodeGenMap[N.getValue(0)] = ResNode;\n" + << " CodeGenMap[N.getValue(1)] = ResNode.getValue(1);\n" + << " Result = ResNode.getValue(N.ResNo);\n" << " }\n" + << " return;\n" << " }\n" - << " case ISD::INLINEASM: return Select_INLINEASM(N);\n"; + << " case ISD::INLINEASM: Select_INLINEASM(Result, N); return;\n"; // Loop over all of the case statements, emiting a call to each method we @@ -3070,7 +3096,7 @@ const SDNodeInfo &OpcodeInfo = getSDNodeInfo(PBOI->first); OS << " case " << OpcodeInfo.getEnumName() << ": " << std::string(std::max(0, int(24-OpcodeInfo.getEnumName().size())), ' ') - << "return Select_" << PBOI->first->getName() << "(N);\n"; + << "Select_" << PBOI->first->getName() << "(Result, N); return;\n"; } OS << " } // end of big switch.\n\n" @@ -3151,7 +3177,9 @@ << "HandleMap.begin(),\n" << " E = HandleMap.end(); I != E; ++I) {\n"; OS << " SDOperand N = I->first;\n"; - OS << " AddHandleReplacement(N, Select(N.getValue(0)));\n"; + OS << " SDOperand R;\n"; + OS << " Select(R, N.getValue(0));\n"; + OS << " AddHandleReplacement(N, R);\n"; OS << " }\n"; OS << "}\n"; OS << "\n"; @@ -3183,11 +3211,12 @@ OS << "\n"; OS << "// SelectRoot - Top level entry to DAG isel.\n"; OS << "SDOperand SelectRoot(SDOperand N) {\n"; - OS << " SDOperand RetVal = Select(N);\n"; + OS << " SDOperand ResNode;\n"; + OS << " Select(ResNode, N);\n"; OS << " SelectDanglingHandles();\n"; OS << " ReplaceHandles();\n"; OS << " ReplaceMap.clear();\n"; - OS << " return RetVal;\n"; + OS << " return ResNode;\n"; OS << "}\n"; ParseNodeInfo(); From lattner at cs.uiuc.edu Wed Feb 8 20:10:27 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 20:10:27 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/SelectionDAGNodes.h Message-ID: <200602090210.UAA09738@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: SelectionDAGNodes.h updated: 1.100 -> 1.101 --- Log message: Add a comment: value is log2 --- Diffs of the changes: (+3 -0) SelectionDAGNodes.h | 3 +++ 1 files changed, 3 insertions(+) Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h diff -u llvm/include/llvm/CodeGen/SelectionDAGNodes.h:1.100 llvm/include/llvm/CodeGen/SelectionDAGNodes.h:1.101 --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h:1.100 Sun Feb 5 00:29:23 2006 +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h Wed Feb 8 20:10:15 2006 @@ -1074,6 +1074,9 @@ public: Constant *get() const { return C; } + + // Return the alignment of this constant pool object, which is either 0 (for + // default alignment) or log2 of the desired value. unsigned getAlignment() const { return Alignment; } static bool classof(const ConstantPoolSDNode *) { return true; } From lattner at cs.uiuc.edu Wed Feb 8 20:15:42 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 20:15:42 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelPattern.cpp Message-ID: <200602090215.UAA09822@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelPattern.cpp updated: 1.203 -> 1.204 --- Log message: provide an explicit alignment for cp entries --- Diffs of the changes: (+22 -6) X86ISelPattern.cpp | 28 ++++++++++++++++++++++------ 1 files changed, 22 insertions(+), 6 deletions(-) Index: llvm/lib/Target/X86/X86ISelPattern.cpp diff -u llvm/lib/Target/X86/X86ISelPattern.cpp:1.203 llvm/lib/Target/X86/X86ISelPattern.cpp:1.204 --- llvm/lib/Target/X86/X86ISelPattern.cpp:1.203 Sun Jan 29 00:45:31 2006 +++ llvm/lib/Target/X86/X86ISelPattern.cpp Wed Feb 8 20:15:30 2006 @@ -101,11 +101,14 @@ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; + + const TargetData &TD; /// X86ScalarSSE - Select between SSE2 or x87 floating point ops. bool X86ScalarSSE; public: - ISel(TargetMachine &TM) : SelectionDAGISel(X86Lowering), X86Lowering(TM) { + ISel(TargetMachine &TM) : SelectionDAGISel(X86Lowering), + X86Lowering(TM), TD(TM.getTargetData()) { Subtarget = &TM.getSubtarget(); X86ScalarSSE = Subtarget->hasSSE2(); } @@ -1312,11 +1315,17 @@ Tmp1 = cast(N)->getIndex(); addFrameReference(BuildMI(BB, X86::LEA32r, 4, Result), (int)Tmp1); return Result; - case ISD::ConstantPool: - Tmp1 = BB->getParent()->getConstantPool()-> - getConstantPoolIndex(cast(N)->get()); + case ISD::ConstantPool: { + Constant *C = cast(N)->get(); + unsigned Align = cast(N)->getAlignment(); + if (Align == 0) { + Align = C->getType() == Type::DoubleTy ? 3 : + TD.getTypeAlignmentShift(C->getType()); + } + Tmp1 = BB->getParent()->getConstantPool()->getConstantPoolIndex(C, Align); addConstantPoolReference(BuildMI(BB, X86::LEA32r, 4, Result), Tmp1); return Result; + } case ISD::ConstantFP: if (X86ScalarSSE) { assert(cast(N)->isExactlyValue(+0.0) && @@ -2240,8 +2249,15 @@ } if (ConstantPoolSDNode *CP = dyn_cast(N.getOperand(1))){ + Constant *C = CP->get(); + unsigned Align = CP->getAlignment(); + if (Align == 0) { + Align = C->getType() == Type::DoubleTy ? 3 : + TD.getTypeAlignmentShift(C->getType()); + } + unsigned CPIdx = BB->getParent()->getConstantPool()-> - getConstantPoolIndex(CP->get()); + getConstantPoolIndex(C, Align); Select(N.getOperand(0)); addConstantPoolReference(BuildMI(BB, Opc, 4, Result), CPIdx); } else { @@ -2296,7 +2312,7 @@ assert(cast(Node->getOperand(3))->getVT() == MVT::f32 && "Bad EXTLOAD!"); unsigned CPIdx = BB->getParent()->getConstantPool()-> - getConstantPoolIndex(CP->get()); + getConstantPoolIndex(CP->get(), 2); addConstantPoolReference(BuildMI(BB, X86::FpLD32m, 4, Result), CPIdx); return Result; From lattner at cs.uiuc.edu Wed Feb 8 20:19:28 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 20:19:28 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/SparcV9/SparcV9BurgISel.cpp Message-ID: <200602090219.UAA09866@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/SparcV9: SparcV9BurgISel.cpp updated: 1.25 -> 1.26 --- Log message: Always pass in an alignment. --- Diffs of the changes: (+2 -1) SparcV9BurgISel.cpp | 3 ++- 1 files changed, 2 insertions(+), 1 deletion(-) Index: llvm/lib/Target/SparcV9/SparcV9BurgISel.cpp diff -u llvm/lib/Target/SparcV9/SparcV9BurgISel.cpp:1.25 llvm/lib/Target/SparcV9/SparcV9BurgISel.cpp:1.26 --- llvm/lib/Target/SparcV9/SparcV9BurgISel.cpp:1.25 Tue Aug 2 14:25:03 2005 +++ llvm/lib/Target/SparcV9/SparcV9BurgISel.cpp Wed Feb 8 20:19:16 2006 @@ -1139,7 +1139,8 @@ // Get the constant pool index for this constant MachineConstantPool *CP = MachineFunction::get(F).getConstantPool(); Constant *C = cast(val); - unsigned CPI = CP->getConstantPoolIndex(C); + unsigned Align = target.getTargetData().getTypeAlignmentShift(C->getType()); + unsigned CPI = CP->getConstantPoolIndex(C, Align); // Put the address of the constant into a register MachineInstr* MI; From lattner at cs.uiuc.edu Wed Feb 8 20:23:25 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 20:23:25 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp Message-ID: <200602090223.UAA09938@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: ScheduleDAG.cpp updated: 1.65 -> 1.66 --- Log message: Make MachineConstantPool entries alignments explicit --- Diffs of the changes: (+12 -2) ScheduleDAG.cpp | 14 ++++++++++++-- 1 files changed, 12 insertions(+), 2 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.65 llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.66 --- llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.65 Sat Feb 4 10:20:31 2006 +++ llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp Wed Feb 8 20:23:13 2006 @@ -23,6 +23,7 @@ #include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Support/Debug.h" +#include "llvm/Constant.h" #include using namespace llvm; @@ -194,8 +195,17 @@ MI->addFrameIndexOperand(FI->getIndex()); } else if (ConstantPoolSDNode *CP = dyn_cast(Node->getOperand(i))) { - unsigned Idx = ConstPool->getConstantPoolIndex(CP->get(), - CP->getAlignment()); + unsigned Align = CP->getAlignment(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + if (CP->get()->getType() == Type::DoubleTy) + Align = 3; // always 8-byte align doubles. + else + Align = TM.getTargetData() + .getTypeAlignmentShift(CP->get()->getType()); + } + + unsigned Idx = ConstPool->getConstantPoolIndex(CP->get(), Align); MI->addConstantPoolIndexOperand(Idx); } else if (ExternalSymbolSDNode *ES = dyn_cast(Node->getOperand(i))) { From lattner at cs.uiuc.edu Wed Feb 8 20:24:36 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 20:24:36 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineConstantPool.h Message-ID: <200602090224.UAA10001@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineConstantPool.h updated: 1.10 -> 1.11 --- Log message: Require an alignment. --- Diffs of the changes: (+3 -6) MachineConstantPool.h | 9 +++------ 1 files changed, 3 insertions(+), 6 deletions(-) Index: llvm/include/llvm/CodeGen/MachineConstantPool.h diff -u llvm/include/llvm/CodeGen/MachineConstantPool.h:1.10 llvm/include/llvm/CodeGen/MachineConstantPool.h:1.11 --- llvm/include/llvm/CodeGen/MachineConstantPool.h:1.10 Tue Jan 31 16:21:33 2006 +++ llvm/include/llvm/CodeGen/MachineConstantPool.h Wed Feb 8 20:24:25 2006 @@ -34,18 +34,15 @@ public: /// getConstantPoolIndex - Create a new entry in the constant pool or return - /// an existing one. User may specify an alignment that is greater than the - /// default alignment. If one is not specified, it will be 0. + /// an existing one. User must specify an alignment in bytes for the object. /// - unsigned getConstantPoolIndex(Constant *C, unsigned Alignment = 0) { + unsigned getConstantPoolIndex(Constant *C, unsigned Alignment) { // Check to see if we already have this constant. // // FIXME, this could be made much more efficient for large constant pools. for (unsigned i = 0, e = Constants.size(); i != e; ++i) - if (Constants[i].first == C) { - Constants[i].second = std::max(Constants[i].second, Alignment); + if (Constants[i].first == C && Constants[i].second >= Alignment) return i; - } Constants.push_back(std::make_pair(C, Alignment)); return Constants.size()-1; } From lattner at cs.uiuc.edu Wed Feb 8 20:25:55 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 20:25:55 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineConstantPool.h Message-ID: <200602090225.UAA10075@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineConstantPool.h updated: 1.11 -> 1.12 --- Log message: Assert invariants --- Diffs of the changes: (+3 -0) MachineConstantPool.h | 3 +++ 1 files changed, 3 insertions(+) Index: llvm/include/llvm/CodeGen/MachineConstantPool.h diff -u llvm/include/llvm/CodeGen/MachineConstantPool.h:1.11 llvm/include/llvm/CodeGen/MachineConstantPool.h:1.12 --- llvm/include/llvm/CodeGen/MachineConstantPool.h:1.11 Wed Feb 8 20:24:25 2006 +++ llvm/include/llvm/CodeGen/MachineConstantPool.h Wed Feb 8 20:25:42 2006 @@ -24,6 +24,7 @@ #include #include +#include namespace llvm { @@ -37,6 +38,8 @@ /// an existing one. User must specify an alignment in bytes for the object. /// unsigned getConstantPoolIndex(Constant *C, unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + // Check to see if we already have this constant. // // FIXME, this could be made much more efficient for large constant pools. From lattner at cs.uiuc.edu Wed Feb 8 20:26:15 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 20:26:15 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/AsmPrinter.cpp Message-ID: <200602090226.UAA10108@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: AsmPrinter.cpp updated: 1.49 -> 1.50 --- Log message: Simplify code, alignment must be specified now. --- Diffs of the changes: (+1 -10) AsmPrinter.cpp | 11 +---------- 1 files changed, 1 insertion(+), 10 deletions(-) Index: llvm/lib/CodeGen/AsmPrinter.cpp diff -u llvm/lib/CodeGen/AsmPrinter.cpp:1.49 llvm/lib/CodeGen/AsmPrinter.cpp:1.50 --- llvm/lib/CodeGen/AsmPrinter.cpp:1.49 Wed Feb 8 17:41:56 2006 +++ llvm/lib/CodeGen/AsmPrinter.cpp Wed Feb 8 20:26:04 2006 @@ -112,16 +112,7 @@ SwitchSection(ConstantPoolSection, 0); for (unsigned i = 0, e = CP.size(); i != e; ++i) { - // FIXME: force doubles to be naturally aligned. We should handle this - // more correctly in the future. - unsigned Alignment = CP[i].second; - if (Alignment == 0) { - Alignment = TD.getTypeAlignmentShift(CP[i].first->getType()); - if (CP[i].first->getType() == Type::DoubleTy && Alignment < 3) - Alignment = 3; - } - - EmitAlignment(Alignment); + EmitAlignment(CP[i].second); O << PrivateGlobalPrefix << "CPI" << getFunctionNumber() << '_' << i << ":\t\t\t\t\t" << CommentString << *CP[i].first << '\n'; EmitGlobalConstant(CP[i].first); From lattner at cs.uiuc.edu Wed Feb 8 22:22:01 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:22:01 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineConstantPool.h Message-ID: <200602090422.WAA10511@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineConstantPool.h updated: 1.12 -> 1.13 --- Log message: Use a MachineConstantPoolEntry struct instead of a pair to hold constant pool entries. --- Diffs of the changes: (+15 -5) MachineConstantPool.h | 20 +++++++++++++++----- 1 files changed, 15 insertions(+), 5 deletions(-) Index: llvm/include/llvm/CodeGen/MachineConstantPool.h diff -u llvm/include/llvm/CodeGen/MachineConstantPool.h:1.12 llvm/include/llvm/CodeGen/MachineConstantPool.h:1.13 --- llvm/include/llvm/CodeGen/MachineConstantPool.h:1.12 Wed Feb 8 20:25:42 2006 +++ llvm/include/llvm/CodeGen/MachineConstantPool.h Wed Feb 8 22:21:49 2006 @@ -30,10 +30,20 @@ class Constant; +/// MachineConstantPoolEntry - One entry in the constant pool. +/// +struct MachineConstantPoolEntry { + /// Val - The constant itself. + Constant *Val; + /// Alignment - The alignment of the constant. + unsigned Alignment; + + MachineConstantPoolEntry(Constant *V, unsigned A) : Val(V), Alignment(A) {} +}; + class MachineConstantPool { - std::vector > Constants; + std::vector Constants; public: - /// getConstantPoolIndex - Create a new entry in the constant pool or return /// an existing one. User must specify an alignment in bytes for the object. /// @@ -44,9 +54,9 @@ // // FIXME, this could be made much more efficient for large constant pools. for (unsigned i = 0, e = Constants.size(); i != e; ++i) - if (Constants[i].first == C && Constants[i].second >= Alignment) + if (Constants[i].Val == C && Constants[i].Alignment >= Alignment) return i; - Constants.push_back(std::make_pair(C, Alignment)); + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); return Constants.size()-1; } @@ -54,7 +64,7 @@ /// bool isEmpty() const { return Constants.empty(); } - const std::vector > &getConstants() const { + const std::vector &getConstants() const { return Constants; } From lattner at cs.uiuc.edu Wed Feb 8 22:23:05 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:23:05 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp Message-ID: <200602090423.WAA10602@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/SparcV9: SparcV9AsmPrinter.cpp updated: 1.128 -> 1.129 --- Log message: rename fields of constant pool entries --- Diffs of the changes: (+2 -2) SparcV9AsmPrinter.cpp | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp diff -u llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp:1.128 llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp:1.129 --- llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp:1.128 Tue Jan 31 16:23:14 2006 +++ llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp Wed Feb 8 22:22:52 2006 @@ -724,12 +724,12 @@ // Emit constant pool for this function const MachineConstantPool *MCP = MF.getConstantPool(); - const std::vector > &CP = MCP->getConstants(); + const std::vector &CP = MCP->getConstants(); enterSection(ReadOnlyData); for (unsigned i = 0, e = CP.size(); i != e; ++i) { std::string cpiName = ".CPI_" + CurrentFnName + "_" + utostr(i); - printConstant(CP[i].first, CP[i].second, cpiName); + printConstant(CP[i].Val, CP[i].Alignment, cpiName); } enterSection(Text); From lattner at cs.uiuc.edu Wed Feb 8 22:23:06 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:23:06 -0600 Subject: [llvm-commits] CVS: llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp Message-ID: <200602090423.WAA10606@zion.cs.uiuc.edu> Changes in directory llvm/lib/ExecutionEngine/JIT: JITEmitter.cpp updated: 1.79 -> 1.80 --- Log message: rename fields of constant pool entries --- Diffs of the changes: (+4 -6) JITEmitter.cpp | 10 ++++------ 1 files changed, 4 insertions(+), 6 deletions(-) Index: llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp diff -u llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.79 llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.80 --- llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.79 Tue Jan 31 16:21:33 2006 +++ llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp Wed Feb 8 22:22:52 2006 @@ -566,18 +566,16 @@ } void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { - const std::vector > &Constants = MCP->getConstants(); + const std::vector &Constants = MCP->getConstants(); if (Constants.empty()) return; for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - const Type *Ty = Constants[i].first->getType(); + const Type *Ty = Constants[i].Val->getType(); unsigned Size = (unsigned)TheJIT->getTargetData().getTypeSize(Ty); - unsigned Alignment = (Constants[i].second == 0) - ? TheJIT->getTargetData().getTypeAlignment(Ty) - : Constants[i].second; + unsigned Alignment = Constants[i].Alignment; void *Addr = MemMgr.allocateConstant(Size, Alignment); - TheJIT->InitializeMemory(Constants[i].first, Addr); + TheJIT->InitializeMemory(Constants[i].Val, Addr); ConstantPoolAddresses.push_back(Addr); } } From lattner at cs.uiuc.edu Wed Feb 8 22:23:06 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:23:06 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/AsmPrinter.cpp MachineFunction.cpp Message-ID: <200602090423.WAA10612@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: AsmPrinter.cpp updated: 1.50 -> 1.51 MachineFunction.cpp updated: 1.84 -> 1.85 --- Log message: rename fields of constant pool entries --- Diffs of the changes: (+6 -6) AsmPrinter.cpp | 8 ++++---- MachineFunction.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) Index: llvm/lib/CodeGen/AsmPrinter.cpp diff -u llvm/lib/CodeGen/AsmPrinter.cpp:1.50 llvm/lib/CodeGen/AsmPrinter.cpp:1.51 --- llvm/lib/CodeGen/AsmPrinter.cpp:1.50 Wed Feb 8 20:26:04 2006 +++ llvm/lib/CodeGen/AsmPrinter.cpp Wed Feb 8 22:22:52 2006 @@ -106,16 +106,16 @@ /// the code generator. /// void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { - const std::vector > &CP = MCP->getConstants(); + const std::vector &CP = MCP->getConstants(); if (CP.empty()) return; const TargetData &TD = TM.getTargetData(); SwitchSection(ConstantPoolSection, 0); for (unsigned i = 0, e = CP.size(); i != e; ++i) { - EmitAlignment(CP[i].second); + EmitAlignment(CP[i].Alignment); O << PrivateGlobalPrefix << "CPI" << getFunctionNumber() << '_' << i - << ":\t\t\t\t\t" << CommentString << *CP[i].first << '\n'; - EmitGlobalConstant(CP[i].first); + << ":\t\t\t\t\t" << CommentString << *CP[i].Val << '\n'; + EmitGlobalConstant(CP[i].Val); } } Index: llvm/lib/CodeGen/MachineFunction.cpp diff -u llvm/lib/CodeGen/MachineFunction.cpp:1.84 llvm/lib/CodeGen/MachineFunction.cpp:1.85 --- llvm/lib/CodeGen/MachineFunction.cpp:1.84 Tue Jan 31 16:21:33 2006 +++ llvm/lib/CodeGen/MachineFunction.cpp Wed Feb 8 22:22:52 2006 @@ -347,8 +347,8 @@ void MachineConstantPool::print(std::ostream &OS) const { for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - OS << " is" << *(Value*)Constants[i].first; - if (Constants[i].second != 0) OS << " , align=" << Constants[i].second; + OS << " is" << *(Value*)Constants[i].Val; + OS << " , align=" << Constants[i].Alignment; OS << "\n"; } } From lattner at cs.uiuc.edu Wed Feb 8 22:44:44 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:44:44 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineConstantPool.h Message-ID: <200602090444.WAA10793@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineConstantPool.h updated: 1.13 -> 1.14 --- Log message: instead of keeping track of Constant/alignment pairs, actually compute the offset of each entry from the start of the constant pool. --- Diffs of the changes: (+15 -17) MachineConstantPool.h | 32 +++++++++++++++----------------- 1 files changed, 15 insertions(+), 17 deletions(-) Index: llvm/include/llvm/CodeGen/MachineConstantPool.h diff -u llvm/include/llvm/CodeGen/MachineConstantPool.h:1.13 llvm/include/llvm/CodeGen/MachineConstantPool.h:1.14 --- llvm/include/llvm/CodeGen/MachineConstantPool.h:1.13 Wed Feb 8 22:21:49 2006 +++ llvm/include/llvm/CodeGen/MachineConstantPool.h Wed Feb 8 22:44:32 2006 @@ -24,42 +24,39 @@ #include #include -#include namespace llvm { class Constant; +class TargetData; /// MachineConstantPoolEntry - One entry in the constant pool. /// struct MachineConstantPoolEntry { /// Val - The constant itself. Constant *Val; - /// Alignment - The alignment of the constant. - unsigned Alignment; + /// Offset - The offset of the constant from the start of the constant pool. + unsigned Offset; - MachineConstantPoolEntry(Constant *V, unsigned A) : Val(V), Alignment(A) {} + MachineConstantPoolEntry(Constant *V, unsigned O) : Val(V), Offset(O) {} }; class MachineConstantPool { + const TargetData &TD; + unsigned PoolAlignment; std::vector Constants; public: + MachineConstantPool(const TargetData &td) : TD(td), PoolAlignment(1) {} + + /// getConstantPoolAlignment - Return the log2 of the alignment required by + /// the whole constant pool, of which the first element must be aligned. + unsigned getConstantPoolAlignment() const { return PoolAlignment; } + /// getConstantPoolIndex - Create a new entry in the constant pool or return /// an existing one. User must specify an alignment in bytes for the object. /// - unsigned getConstantPoolIndex(Constant *C, unsigned Alignment) { - assert(Alignment && "Alignment must be specified!"); - - // Check to see if we already have this constant. - // - // FIXME, this could be made much more efficient for large constant pools. - for (unsigned i = 0, e = Constants.size(); i != e; ++i) - if (Constants[i].Val == C && Constants[i].Alignment >= Alignment) - return i; - Constants.push_back(MachineConstantPoolEntry(C, Alignment)); - return Constants.size()-1; - } - + unsigned getConstantPoolIndex(Constant *C, unsigned Alignment); + /// isEmpty - Return true if this constant pool contains no constants. /// bool isEmpty() const { return Constants.empty(); } @@ -74,6 +71,7 @@ void print(std::ostream &OS) const; /// dump - Call print(std::cerr) to be called from the debugger. + /// void dump() const; }; From lattner at cs.uiuc.edu Wed Feb 8 22:46:17 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:46:17 -0600 Subject: [llvm-commits] CVS: llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp Message-ID: <200602090446.WAA10851@zion.cs.uiuc.edu> Changes in directory llvm/lib/ExecutionEngine/JIT: JITEmitter.cpp updated: 1.80 -> 1.81 --- Log message: Adjust to MachineConstantPool interface change: instead of keeping a value/alignment pair for each constant, keep a value/offset pair. --- Diffs of the changes: (+10 -7) JITEmitter.cpp | 17 ++++++++++------- 1 files changed, 10 insertions(+), 7 deletions(-) Index: llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp diff -u llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.80 llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.81 --- llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.80 Wed Feb 8 22:22:52 2006 +++ llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp Wed Feb 8 22:46:04 2006 @@ -569,14 +569,17 @@ const std::vector &Constants = MCP->getConstants(); if (Constants.empty()) return; + unsigned Size = Constants.back().Offset; + Size += TheJIT->getTargetData().getTypeSize(Constants.back().Val->getType()); + + void *Addr = MemMgr.allocateConstant(Size, + 1 << MCP->getConstantPoolAlignment()); + + // FIXME: Can eliminate ConstantPoolAddresses! for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - const Type *Ty = Constants[i].Val->getType(); - unsigned Size = (unsigned)TheJIT->getTargetData().getTypeSize(Ty); - unsigned Alignment = Constants[i].Alignment; - - void *Addr = MemMgr.allocateConstant(Size, Alignment); - TheJIT->InitializeMemory(Constants[i].Val, Addr); - ConstantPoolAddresses.push_back(Addr); + void *CAddr = (char*)Addr+Constants[i].Offset; + TheJIT->InitializeMemory(Constants[i].Val, CAddr); + ConstantPoolAddresses.push_back(CAddr); } } From lattner at cs.uiuc.edu Wed Feb 8 22:46:17 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:46:17 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp Message-ID: <200602090446.WAA10853@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/SparcV9: SparcV9AsmPrinter.cpp updated: 1.129 -> 1.130 --- Log message: Adjust to MachineConstantPool interface change: instead of keeping a value/alignment pair for each constant, keep a value/offset pair. --- Diffs of the changes: (+12 -2) SparcV9AsmPrinter.cpp | 14 ++++++++++++-- 1 files changed, 12 insertions(+), 2 deletions(-) Index: llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp diff -u llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp:1.129 llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp:1.130 --- llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp:1.129 Wed Feb 8 22:22:52 2006 +++ llvm/lib/Target/SparcV9/SparcV9AsmPrinter.cpp Wed Feb 8 22:46:04 2006 @@ -216,7 +216,8 @@ if (Alignment == 0) Alignment = ConstantToAlignment(CV, TM); - O << "\t.align\t" << Alignment << "\n"; + if (Alignment != 1) + O << "\t.align\t" << Alignment << "\n"; // Print .size and .type only if it is not a string. if (const ConstantArray *CVA = dyn_cast(CV)) @@ -727,9 +728,18 @@ const std::vector &CP = MCP->getConstants(); enterSection(ReadOnlyData); + O << "\t.align\t" << (1 << MCP->getConstantPoolAlignment()) << "\n"; for (unsigned i = 0, e = CP.size(); i != e; ++i) { std::string cpiName = ".CPI_" + CurrentFnName + "_" + utostr(i); - printConstant(CP[i].Val, CP[i].Alignment, cpiName); + printConstant(CP[i].Val, 1, cpiName); + + if (i != e-1) { + unsigned EntSize = TM.getTargetData().getTypeSize(CP[i].Val->getType()); + unsigned ValEnd = CP[i].Offset + EntSize; + // Emit inter-object padding for alignment. + for (unsigned NumZeros = CP[i+1].Offset-ValEnd; NumZeros; --NumZeros) + O << "\t.byte 0\n"; + } } enterSection(Text); From lattner at cs.uiuc.edu Wed Feb 8 22:46:18 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:46:18 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Message-ID: <200602090446.WAA10859@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcISelDAGToDAG.cpp updated: 1.80 -> 1.81 --- Log message: Adjust to MachineConstantPool interface change: instead of keeping a value/alignment pair for each constant, keep a value/offset pair. --- Diffs of the changes: (+1 -1) SparcISelDAGToDAG.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp diff -u llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.80 llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.81 --- llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.80 Wed Feb 8 18:37:58 2006 +++ llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Wed Feb 8 22:46:04 2006 @@ -968,7 +968,7 @@ } bool SparcDAGToDAGISel::SelectADDRri(SDOperand Addr, SDOperand &Base, - SDOperand &Offset) { + SDOperand &Offset) { if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); Offset = CurDAG->getTargetConstant(0, MVT::i32); From lattner at cs.uiuc.edu Wed Feb 8 22:46:18 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:46:18 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/AsmPrinter.cpp MachineFunction.cpp Message-ID: <200602090446.WAA10865@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: AsmPrinter.cpp updated: 1.51 -> 1.52 MachineFunction.cpp updated: 1.85 -> 1.86 --- Log message: Adjust to MachineConstantPool interface change: instead of keeping a value/alignment pair for each constant, keep a value/offset pair. --- Diffs of the changes: (+37 -3) AsmPrinter.cpp | 8 +++++++- MachineFunction.cpp | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) Index: llvm/lib/CodeGen/AsmPrinter.cpp diff -u llvm/lib/CodeGen/AsmPrinter.cpp:1.51 llvm/lib/CodeGen/AsmPrinter.cpp:1.52 --- llvm/lib/CodeGen/AsmPrinter.cpp:1.51 Wed Feb 8 22:22:52 2006 +++ llvm/lib/CodeGen/AsmPrinter.cpp Wed Feb 8 22:46:04 2006 @@ -111,11 +111,17 @@ const TargetData &TD = TM.getTargetData(); SwitchSection(ConstantPoolSection, 0); + EmitAlignment(MCP->getConstantPoolAlignment()); for (unsigned i = 0, e = CP.size(); i != e; ++i) { - EmitAlignment(CP[i].Alignment); O << PrivateGlobalPrefix << "CPI" << getFunctionNumber() << '_' << i << ":\t\t\t\t\t" << CommentString << *CP[i].Val << '\n'; EmitGlobalConstant(CP[i].Val); + if (i != e-1) { + unsigned EntSize = TM.getTargetData().getTypeSize(CP[i].Val->getType()); + unsigned ValEnd = CP[i].Offset + EntSize; + // Emit inter-object padding for alignment. + EmitZeros(CP[i+1].Offset-ValEnd); + } } } Index: llvm/lib/CodeGen/MachineFunction.cpp diff -u llvm/lib/CodeGen/MachineFunction.cpp:1.85 llvm/lib/CodeGen/MachineFunction.cpp:1.86 --- llvm/lib/CodeGen/MachineFunction.cpp:1.85 Wed Feb 8 22:22:52 2006 +++ llvm/lib/CodeGen/MachineFunction.cpp Wed Feb 8 22:46:04 2006 @@ -112,7 +112,7 @@ SSARegMapping = new SSARegMap(); MFInfo = 0; FrameInfo = new MachineFrameInfo(); - ConstantPool = new MachineConstantPool(); + ConstantPool = new MachineConstantPool(TM.getTargetData()); BasicBlocks.Parent = this; } @@ -345,10 +345,38 @@ // MachineConstantPool implementation //===----------------------------------------------------------------------===// +/// getConstantPoolIndex - Create a new entry in the constant pool or return +/// an existing one. User must specify an alignment in bytes for the object. +/// +unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + unsigned AlignMask = (1 << Alignment)-1; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].Val == C && (Constants[i].Offset & AlignMask) == 0) + return i; + + unsigned Offset = 0; + if (!Constants.empty()) { + Offset = Constants.back().Offset; + Offset += TD.getTypeSize(Constants.back().Val->getType()); + Offset = (Offset+AlignMask)&~AlignMask; + } + + Constants.push_back(MachineConstantPoolEntry(C, Offset)); + return Constants.size()-1; +} + + void MachineConstantPool::print(std::ostream &OS) const { for (unsigned i = 0, e = Constants.size(); i != e; ++i) { OS << " is" << *(Value*)Constants[i].Val; - OS << " , align=" << Constants[i].Alignment; + OS << " , offset=" << Constants[i].Offset; OS << "\n"; } } From lattner at cs.uiuc.edu Wed Feb 8 22:50:11 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 22:50:11 -0600 Subject: [llvm-commits] CVS: llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp Message-ID: <200602090450.WAA10932@zion.cs.uiuc.edu> Changes in directory llvm/lib/ExecutionEngine/JIT: JITEmitter.cpp updated: 1.81 -> 1.82 --- Log message: simplify this code now that each constant pool entry is not separately allocated --- Diffs of the changes: (+16 -15) JITEmitter.cpp | 31 ++++++++++++++++--------------- 1 files changed, 16 insertions(+), 15 deletions(-) Index: llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp diff -u llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.81 llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.82 --- llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp:1.81 Wed Feb 8 22:46:04 2006 +++ llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp Wed Feb 8 22:49:59 2006 @@ -419,18 +419,19 @@ // save CurBlock and CurByte here. unsigned char *SavedCurBlock, *SavedCurByte; - // ConstantPoolAddresses - Contains the location for each entry in the - // constant pool. - std::vector ConstantPoolAddresses; - /// Relocations - These are the relocations that the function needs, as /// emitted. std::vector Relocations; + /// ConstantPool - The constant pool for the current function. + /// + MachineConstantPool *ConstantPool; + + /// ConstantPoolBase - A pointer to the first entry in the constant pool. + /// + void *ConstantPoolBase; public: - JITEmitter(JIT &jit) - :MemMgr(jit.getJITInfo().needsGOT()) - { + JITEmitter(JIT &jit) : MemMgr(jit.getJITInfo().needsGOT()) { TheJIT = &jit; DEBUG(std::cerr << (MemMgr.isManagingGOT() ? "JIT is managing GOT\n" @@ -562,7 +563,6 @@ << ": " << CurByte-CurBlock << " bytes of text, " << Relocations.size() << " relocations\n"); Relocations.clear(); - ConstantPoolAddresses.clear(); } void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { @@ -572,14 +572,14 @@ unsigned Size = Constants.back().Offset; Size += TheJIT->getTargetData().getTypeSize(Constants.back().Val->getType()); - void *Addr = MemMgr.allocateConstant(Size, + ConstantPoolBase = MemMgr.allocateConstant(Size, 1 << MCP->getConstantPoolAlignment()); - - // FIXME: Can eliminate ConstantPoolAddresses! + ConstantPool = MCP; + + // Initialize the memory for all of the constant pool entries. for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - void *CAddr = (char*)Addr+Constants[i].Offset; + void *CAddr = (char*)ConstantPoolBase+Constants[i].Offset; TheJIT->InitializeMemory(Constants[i].Val, CAddr); - ConstantPoolAddresses.push_back(CAddr); } } @@ -615,9 +615,10 @@ // method. // uint64_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) { - assert(ConstantNum < ConstantPoolAddresses.size() && + assert(ConstantNum < ConstantPool->getConstants().size() && "Invalid ConstantPoolIndex!"); - return (intptr_t)ConstantPoolAddresses[ConstantNum]; + return (intptr_t)ConstantPoolBase + + ConstantPool->getConstants()[ConstantNum].Offset; } unsigned char* JITEmitter::allocateGlobal(unsigned size, unsigned alignment) From lattner at cs.uiuc.edu Wed Feb 8 23:06:48 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 23:06:48 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcInstrInfo.td SparcTargetMachine.cpp Message-ID: <200602090506.XAA11093@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcInstrInfo.td updated: 1.118 -> 1.119 SparcTargetMachine.cpp updated: 1.40 -> 1.41 --- Log message: add an option to turn on LSR. --- Diffs of the changes: (+62 -1) SparcInstrInfo.td | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ SparcTargetMachine.cpp | 7 +++++- 2 files changed, 62 insertions(+), 1 deletion(-) Index: llvm/lib/Target/Sparc/SparcInstrInfo.td diff -u llvm/lib/Target/Sparc/SparcInstrInfo.td:1.118 llvm/lib/Target/Sparc/SparcInstrInfo.td:1.119 --- llvm/lib/Target/Sparc/SparcInstrInfo.td:1.118 Sat Feb 4 23:50:24 2006 +++ llvm/lib/Target/Sparc/SparcInstrInfo.td Wed Feb 8 23:06:36 2006 @@ -493,6 +493,7 @@ def UMULri : F3_2<2, 0b001010, (ops IntRegs:$dst, IntRegs:$b, i32imm:$c), "umul $b, $c, $dst", []>; + def SMULrr : F3_1<2, 0b001011, (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c), "smul $b, $c, $dst", @@ -502,6 +503,61 @@ "smul $b, $c, $dst", [(set IntRegs:$dst, (mul IntRegs:$b, simm13:$c))]>; +/* +//===------------------------- +// Sparc Example +defm intinst { + def OPC1 : F3_1<2, Opc, asmstr, (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c), + [(set IntRegs:$dst, (code IntRegs:$b, IntRegs:$c))]>; + def OPC2 : F3_2<2, Opc, asmstr, (ops IntRegs:$dst, IntRegs:$b, i32imm:$c), + [(set IntRegs:$dst, (code IntRegs:$b, simm13:$c))]>; +} +defm intinst_np { + def OPC1 : F3_1<2, Opc, asmstr, (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c), + []>; + def OPC2 : F3_2<2, Opc, asmstr, (ops IntRegs:$dst, IntRegs:$b, i32imm:$c), + []>; +} + +def intinstnp< ADDXrr, ADDXri, 0b001000, "addx $b, $c, $dst">; +def intinst < SUBrr, SUBri, 0b000100, "sub $b, $c, $dst", sub>; +def intinstnp< SUBXrr, SUBXri, 0b001100, "subx $b, $c, $dst">; +def intinst ; +def intinst < SMULrr, SMULri, 0b001011, "smul $b, $c, $dst", mul>; + +//===------------------------- +// X86 Example +defm cmov32 { + def OPC1 : I, TB; + def OPC2 : I, TB; +} + +def cmov; +def cmov; + +//===------------------------- +// PPC Example + +def fpunop { + def OPC1 : FORM; + def OPC2 : FORM; +} + +def fpunop< FABSS, FABSD, XForm_26, 63, 63, 264, "fabs", fabs>; +def fpunop; +def fpunop< FNEGS, FNEGD, XForm_26, 63, 63, 40, "fneg", fneg>; +*/ + // Section B.19 - Divide Instructions, p. 115 def UDIVrr : F3_1<2, 0b001110, (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c), Index: llvm/lib/Target/Sparc/SparcTargetMachine.cpp diff -u llvm/lib/Target/Sparc/SparcTargetMachine.cpp:1.40 llvm/lib/Target/Sparc/SparcTargetMachine.cpp:1.41 --- llvm/lib/Target/Sparc/SparcTargetMachine.cpp:1.40 Sat Feb 4 23:50:24 2006 +++ llvm/lib/Target/Sparc/SparcTargetMachine.cpp Wed Feb 8 23:06:36 2006 @@ -27,6 +27,8 @@ namespace { // Register the target. RegisterTarget X("sparc", " SPARC"); + + cl::opt EnableLSR("enable-sparc-lsr", cl::Hidden); } /// SparcTargetMachine ctor - Create an ILP32 architecture model @@ -65,6 +67,9 @@ bool Fast) { if (FileType != TargetMachine::AssemblyFile) return true; + // Run loop strength reduction before anything else. + if (EnableLSR && !Fast) PM.add(createLoopStrengthReducePass()); + // FIXME: Implement efficient support for garbage collection intrinsics. PM.add(createLowerGCPass()); @@ -73,7 +78,7 @@ // FIXME: implement the switch instruction in the instruction selector. PM.add(createLowerSwitchPass()); - + // Print LLVM code input to instruction selector: if (PrintMachineCode) PM.add(new PrintFunctionPass()); From lattner at cs.uiuc.edu Wed Feb 8 23:08:09 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Wed, 8 Feb 2006 23:08:09 -0600 Subject: [llvm-commits] CVS: llvm-test/Makefile.programs Message-ID: <200602090508.XAA11179@zion.cs.uiuc.edu> Changes in directory llvm-test: Makefile.programs updated: 1.189 -> 1.190 --- Log message: turn on lsr for Sparc llc-beta --- Diffs of the changes: (+1 -1) Makefile.programs | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm-test/Makefile.programs diff -u llvm-test/Makefile.programs:1.189 llvm-test/Makefile.programs:1.190 --- llvm-test/Makefile.programs:1.189 Thu Feb 2 01:53:24 2006 +++ llvm-test/Makefile.programs Wed Feb 8 23:07:57 2006 @@ -200,7 +200,7 @@ LLCBETAOPTION := -enable-x86-sse endif ifeq ($(ARCH),Sparc) -LLCBETAOPTION := -enable-sparc-v9-insts +LLCBETAOPTION := -enable-sparc-lsr endif # Given a version of the entire program linked together into a single unit of From lattner at cs.uiuc.edu Thu Feb 9 01:12:25 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 01:12:25 -0600 Subject: [llvm-commits] CVS: llvm/test/Regression/CodeGen/Generic/DebugStuff.ll Message-ID: <200602090712.BAA11575@zion.cs.uiuc.edu> Changes in directory llvm/test/Regression/CodeGen/Generic: DebugStuff.ll updated: 1.2 -> 1.3 --- Log message: this apparently passes on linux --- Diffs of the changes: (+1 -1) DebugStuff.ll | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/test/Regression/CodeGen/Generic/DebugStuff.ll diff -u llvm/test/Regression/CodeGen/Generic/DebugStuff.ll:1.2 llvm/test/Regression/CodeGen/Generic/DebugStuff.ll:1.3 --- llvm/test/Regression/CodeGen/Generic/DebugStuff.ll:1.2 Wed Feb 8 12:17:06 2006 +++ llvm/test/Regression/CodeGen/Generic/DebugStuff.ll Thu Feb 9 01:12:13 2006 @@ -1,6 +1,6 @@ ; Verify debugger intrinsics are ignored or accepted. ; RUN: llvm-as < %s | llc -; XFAIL: * +; XFAIL: powerpc ;; Debugger type declarations %lldb.compile_unit = type { uint, ushort, ushort, sbyte*, sbyte*, sbyte*, {}* } From evan.cheng at apple.com Thu Feb 9 01:15:35 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:15:35 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/SelectionDAG.h Message-ID: <200602090715.BAA11619@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: SelectionDAG.h updated: 1.95 -> 1.96 --- Log message: More changes to reduce frame size. Move all getTargetNode() out of SelectionDAG.h into SelectionDAG.cpp. This prevents them from being inlined. Change getTargetNode() so they return SDNode * instead of SDOperand to prevent copying. It should also help compilation speed. --- Diffs of the changes: (+72 -236) SelectionDAG.h | 308 +++++++++++++-------------------------------------------- 1 files changed, 72 insertions(+), 236 deletions(-) Index: llvm/include/llvm/CodeGen/SelectionDAG.h diff -u llvm/include/llvm/CodeGen/SelectionDAG.h:1.95 llvm/include/llvm/CodeGen/SelectionDAG.h:1.96 --- llvm/include/llvm/CodeGen/SelectionDAG.h:1.95 Tue Jan 31 16:21:33 2006 +++ llvm/include/llvm/CodeGen/SelectionDAG.h Thu Feb 9 01:15:23 2006 @@ -307,242 +307,78 @@ MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, SDOperand Op3, SDOperand Op4, SDOperand Op5); - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT) { - return getNode(ISD::BUILTIN_OP_END+Opcode, VT); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1) { - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1, SDOperand Op2) { - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1, SDOperand Op2, SDOperand Op3) { - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1, SDOperand Op2, SDOperand Op3, - SDOperand Op4) { - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3, Op4); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1, SDOperand Op2, SDOperand Op3, - SDOperand Op4, SDOperand Op5) { - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3, Op4, Op5); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1, SDOperand Op2, SDOperand Op3, - SDOperand Op4, SDOperand Op5, SDOperand Op6) { - std::vector Ops; - Ops.reserve(6); - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - Ops.push_back(Op6); - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1, SDOperand Op2, SDOperand Op3, - SDOperand Op4, SDOperand Op5, SDOperand Op6, - SDOperand Op7) { - std::vector Ops; - Ops.reserve(7); - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - Ops.push_back(Op6); - Ops.push_back(Op7); - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - SDOperand Op1, SDOperand Op2, SDOperand Op3, - SDOperand Op4, SDOperand Op5, SDOperand Op6, - SDOperand Op7, SDOperand Op8) { - std::vector Ops; - Ops.reserve(8); - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - Ops.push_back(Op6); - Ops.push_back(Op7); - Ops.push_back(Op8); - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT, - std::vector &Ops) { - return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, SDOperand Op1) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - std::vector Ops; - Ops.push_back(Op1); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, SDOperand Op1, SDOperand Op2) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, - SDOperand Op3) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, - SDOperand Op3, SDOperand Op4) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, - SDOperand Op3, SDOperand Op4, SDOperand Op5) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, - SDOperand Op3, SDOperand Op4, SDOperand Op5, - SDOperand Op6) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - Ops.push_back(Op6); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, - SDOperand Op3, SDOperand Op4, SDOperand Op5, - SDOperand Op6, SDOperand Op7) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - Ops.push_back(Op6); - Ops.push_back(Op7); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, MVT::ValueType VT3, - SDOperand Op1, SDOperand Op2) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - ResultTys.push_back(VT3); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, MVT::ValueType VT3, - SDOperand Op1, SDOperand Op2, - SDOperand Op3, SDOperand Op4, SDOperand Op5) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - ResultTys.push_back(VT3); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, MVT::ValueType VT3, - SDOperand Op1, SDOperand Op2, - SDOperand Op3, SDOperand Op4, SDOperand Op5, - SDOperand Op6) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - ResultTys.push_back(VT3); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - Ops.push_back(Op6); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, MVT::ValueType VT3, - SDOperand Op1, SDOperand Op2, - SDOperand Op3, SDOperand Op4, SDOperand Op5, - SDOperand Op6, SDOperand Op7) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - ResultTys.push_back(VT3); - std::vector Ops; - Ops.push_back(Op1); - Ops.push_back(Op2); - Ops.push_back(Op3); - Ops.push_back(Op4); - Ops.push_back(Op5); - Ops.push_back(Op6); - Ops.push_back(Op7); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } - SDOperand getTargetNode(unsigned Opcode, MVT::ValueType VT1, - MVT::ValueType VT2, std::vector &Ops) { - std::vector ResultTys; - ResultTys.push_back(VT1); - ResultTys.push_back(VT2); - return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops); - } + /// getTargetNode - These are used for target selectors to create a new node + /// with specified return type(s), target opcode, and operands. + /// + /// Note that getTargetNode returns the resultant node. If there is already a + /// node of the specified opcode and operands, it returns that node instead of + /// the current one. + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5, SDOperand Op6); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5, SDOperand Op6, + SDOperand Op7); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5, SDOperand Op6, + SDOperand Op7, SDOperand Op8); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, + std::vector &Ops); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6, SDOperand Op7); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6, SDOperand Op7); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, std::vector &Ops); /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. Use the first From evan.cheng at apple.com Thu Feb 9 01:15:35 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:15:35 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <200602090715.BAA11623@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: SelectionDAG.cpp updated: 1.254 -> 1.255 --- Log message: More changes to reduce frame size. Move all getTargetNode() out of SelectionDAG.h into SelectionDAG.cpp. This prevents them from being inlined. Change getTargetNode() so they return SDNode * instead of SDOperand to prevent copying. It should also help compilation speed. --- Diffs of the changes: (+243 -0) SelectionDAG.cpp | 243 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 243 insertions(+) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.254 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.255 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.254 Sun Feb 5 00:29:23 2006 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Feb 9 01:15:23 2006 @@ -2037,6 +2037,249 @@ return SDOperand(N, 0); } +/// getTargetNode - These are used for target selectors to create a new node +/// with specified return type(s), target opcode, and operands. +/// +/// Note that getTargetNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3, Op4).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3, Op4, Op5).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5, SDOperand Op6) { + std::vector Ops; + Ops.reserve(6); + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + Ops.push_back(Op6); + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5, SDOperand Op6, + SDOperand Op7) { + std::vector Ops; + Ops.reserve(7); + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + Ops.push_back(Op6); + Ops.push_back(Op7); + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, SDOperand Op3, + SDOperand Op4, SDOperand Op5, SDOperand Op6, + SDOperand Op7, SDOperand Op8) { + std::vector Ops; + Ops.reserve(8); + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + Ops.push_back(Op6); + Ops.push_back(Op7); + Ops.push_back(Op8); + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + std::vector &Ops) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + std::vector Ops; + Ops.push_back(Op1); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + Ops.push_back(Op6); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6, SDOperand Op7) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + Ops.push_back(Op6); + Ops.push_back(Op7); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + ResultTys.push_back(VT3); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + ResultTys.push_back(VT3); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + ResultTys.push_back(VT3); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + Ops.push_back(Op6); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5, + SDOperand Op6, SDOperand Op7) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + ResultTys.push_back(VT3); + std::vector Ops; + Ops.push_back(Op1); + Ops.push_back(Op2); + Ops.push_back(Op3); + Ops.push_back(Op4); + Ops.push_back(Op5); + Ops.push_back(Op6); + Ops.push_back(Op7); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, std::vector &Ops) { + std::vector ResultTys; + ResultTys.push_back(VT1); + ResultTys.push_back(VT2); + return getNode(ISD::BUILTIN_OP_END+Opcode, ResultTys, Ops).Val; +} + // ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. /// From evan.cheng at apple.com Thu Feb 9 01:16:21 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:16:21 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/DAGISelEmitter.cpp DAGISelEmitter.h Message-ID: <200602090716.BAA11638@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: DAGISelEmitter.cpp updated: 1.170 -> 1.171 DAGISelEmitter.h updated: 1.56 -> 1.57 --- Log message: Match getTargetNode() changes (now returns SDNode* instead of SDOperand). --- Diffs of the changes: (+47 -40) DAGISelEmitter.cpp | 85 ++++++++++++++++++++++++++++------------------------- DAGISelEmitter.h | 2 - 2 files changed, 47 insertions(+), 40 deletions(-) Index: llvm/utils/TableGen/DAGISelEmitter.cpp diff -u llvm/utils/TableGen/DAGISelEmitter.cpp:1.170 llvm/utils/TableGen/DAGISelEmitter.cpp:1.171 --- llvm/utils/TableGen/DAGISelEmitter.cpp:1.170 Wed Feb 8 18:37:58 2006 +++ llvm/utils/TableGen/DAGISelEmitter.cpp Thu Feb 9 01:16:09 2006 @@ -1856,7 +1856,7 @@ std::vector > &GeneratedCode; /// GeneratedDecl - This is the set of all SDOperand declarations needed for /// the set of patterns for each top-level opcode. - std::set &GeneratedDecl; + std::set > &GeneratedDecl; std::string ChainName; bool DoReplace; @@ -1870,15 +1870,15 @@ if (!S.empty()) GeneratedCode.push_back(std::make_pair(false, S)); } - void emitDecl(const std::string &S) { + void emitDecl(const std::string &S, bool isSDNode=false) { assert(!S.empty() && "Invalid declaration"); - GeneratedDecl.insert(S); + GeneratedDecl.insert(std::make_pair(isSDNode, S)); } public: PatternCodeEmitter(DAGISelEmitter &ise, ListInit *preds, TreePatternNode *pattern, TreePatternNode *instr, std::vector > &gc, - std::set &gd, + std::set > &gd, bool dorep) : ISE(ise), Predicates(preds), Pattern(pattern), Instruction(instr), GeneratedCode(gc), GeneratedDecl(gd), DoReplace(dorep), TmpNo(0) {} @@ -2310,7 +2310,7 @@ if (!isRoot) { emitDecl("Tmp" + utostr(ResNo)); std::string Code = - "Tmp" + utostr(ResNo) + " = CurDAG->getTargetNode(" + + "Tmp" + utostr(ResNo) + " = SDOperand(CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); if (N->getTypeNum(0) != MVT::isVoid) Code += ", MVT::" + getEnumName(N->getTypeNum(0)); @@ -2322,7 +2322,7 @@ LastOp = Ops[i]; Code += ", Tmp" + utostr(LastOp); } - emitCode(Code + ");"); + emitCode(Code + "), 0);"); if (HasChain) { // Must have at least one result emitCode(ChainName + " = Tmp" + utostr(LastOp) + ".getValue(" + @@ -2331,8 +2331,9 @@ } else if (HasChain || NodeHasOutFlag) { if (HasOptInFlag) { unsigned FlagNo = (unsigned) NodeHasChain + Pattern->getNumChildren(); + emitDecl("ResNode", true); emitCode("if (HasOptInFlag)"); - std::string Code = " Result = CurDAG->getTargetNode(" + + std::string Code = " ResNode = CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); // Output order: results, chain, flags @@ -2353,7 +2354,7 @@ emitCode(Code + ", InFlag);"); emitCode("else"); - Code = " Result = CurDAG->getTargetNode(" + II.Namespace + "::" + + Code = " ResNode = CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); // Output order: results, chain, flags @@ -2371,7 +2372,8 @@ if (HasChain) Code += ", " + ChainName + ");"; emitCode(Code); } else { - std::string Code = "Result = CurDAG->getTargetNode(" + + emitDecl("ResNode", true); + std::string Code = "ResNode = CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); // Output order: results, chain, flags @@ -2393,25 +2395,25 @@ unsigned ValNo = 0; for (unsigned i = 0; i < NumResults; i++) { - emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + ")] = Result" - ".getValue(" + utostr(ValNo) + ");"); + emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + + ")] = SDOperand(ResNode, " + utostr(ValNo) + ");"); ValNo++; } if (HasChain) - emitCode(ChainName + " = Result.getValue(" + utostr(ValNo) + ");"); + emitCode(ChainName + " = SDOperand(ResNode, " + utostr(ValNo) + ");"); if (NodeHasOutFlag) - emitCode("InFlag = Result.getValue(" + + emitCode("InFlag = SDOperand(ResNode, " + utostr(ValNo + (unsigned)HasChain) + ");"); if (HasImpResults && EmitCopyFromRegs(N, ChainEmitted)) { emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + ")] = " - "Result.getValue(" + utostr(ValNo) + ");"); + "SDOperand(ResNode, " + utostr(ValNo) + ");"); ValNo++; } - // User does not expect that the instruction produces a chain! + // User does not expect the instruction would produce a chain! bool AddedChain = HasChain && !NodeHasChain; if (NodeHasChain) { emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + ")] = " + @@ -2443,15 +2445,15 @@ if (AddedChain && NodeHasOutFlag) { if (NumExpectedResults == 0) { - emitCode("Result = Result.getValue(N.ResNo+1);"); + emitCode("Result = SDOperand(ResNode, N.ResNo+1);"); } else { emitCode("if (N.ResNo < " + utostr(NumExpectedResults) + ")"); - emitCode(" Result = Result.getValue(N.ResNo);"); + emitCode(" Result = SDOperand(ResNode, N.ResNo);"); emitCode("else"); - emitCode(" Result = Result.getValue(N.ResNo+1);"); + emitCode(" Result = SDOperand(ResNode, N.ResNo+1);"); } } else { - emitCode("Result = Result.getValue(N.ResNo);"); + emitCode("Result = SDOperand(ResNode, N.ResNo);"); } } else { // If this instruction is the root, and if there is only one use of it, @@ -2469,7 +2471,8 @@ Code += ", InFlag"; emitCode(Code + ");"); emitCode("} else {"); - Code = " Result = CodeGenMap[N] = CurDAG->getTargetNode(" + + emitDecl("ResNode", true); + Code = " ResNode = CurDAG->getTargetNode(" + II.Namespace + "::" + II.TheDef->getName(); if (N->getTypeNum(0) != MVT::isVoid) Code += ", MVT::" + getEnumName(N->getTypeNum(0)); @@ -2480,6 +2483,7 @@ if (HasInFlag || HasImpInputs) Code += ", InFlag"; emitCode(Code + ");"); + emitCode(" Result = CodeGenMap[N] = SDOperand(ResNode, 0);"); emitCode("}"); } @@ -2567,13 +2571,13 @@ } emitCode("Select(" + RootName + utostr(OpNo) + ", " + RootName + utostr(OpNo) + ");"); - emitDecl("Copy"); + emitDecl("Copy", true); emitCode("Copy = CurDAG->getCopyToReg(" + ChainName + ", CurDAG->getRegister(" + ISE.getQualifiedName(RR) + ", MVT::" + getEnumName(RVT) + "), " + - RootName + utostr(OpNo) + ", InFlag);"); - emitCode(ChainName + " = Copy.getValue(0);"); - emitCode("InFlag = Copy.getValue(1);"); + RootName + utostr(OpNo) + ", InFlag).Val;"); + emitCode(ChainName + " = SDOperand(Copy, 0);"); + emitCode("InFlag = SDOperand(Copy, 1);"); } } } @@ -2618,11 +2622,11 @@ ChainEmitted = true; ChainName = "Chain"; } - emitCode("Result = CurDAG->getCopyFromReg(" + ChainName + ", " + + emitCode("ResNode = CurDAG->getCopyFromReg(" + ChainName + ", " + ISE.getQualifiedName(RR) + ", MVT::" + getEnumName(RVT) + - ", InFlag);"); - emitCode(ChainName + " = Result.getValue(1);"); - emitCode("InFlag = Result.getValue(2);"); + ", InFlag).Val;"); + emitCode(ChainName + " = SDOperand(ResNode, 1);"); + emitCode("InFlag = SDOperand(ResNode, 2);"); RetVal = true; } } @@ -2637,7 +2641,7 @@ /// succeeds. Returns true if the pattern is not guaranteed to match. void DAGISelEmitter::GenerateCodeForPattern(PatternToMatch &Pattern, std::vector > &GeneratedCode, - std::set &GeneratedDecl, + std::set > &GeneratedDecl, bool DoReplace) { PatternCodeEmitter Emitter(*this, Pattern.getPredicates(), Pattern.getSrcPattern(), Pattern.getDstPattern(), @@ -2900,7 +2904,7 @@ typedef std::set DeclSet; std::vector > CodeForPatterns; - std::set GeneratedDecl; + std::set > GeneratedDecl; for (unsigned i = 0, e = Patterns.size(); i != e; ++i) { CodeList GeneratedCode; GenerateCodeForPattern(*Patterns[i], GeneratedCode, GeneratedDecl, @@ -2933,9 +2937,12 @@ } // Print all declarations. - for (std::set::iterator I = GeneratedDecl.begin(), + for (std::set >::iterator I = GeneratedDecl.begin(), E = GeneratedDecl.end(); I != E; ++I) - OS << " SDOperand " << *I << "(0, 0);\n"; + if (I->first) + OS << " SDNode *" << I->second << ";\n"; + else + OS << " SDOperand " << I->second << "(0, 0);\n"; // Loop through and reverse all of the CodeList vectors, as we will be // accessing them from their logical front, but accessing the end of a @@ -3068,20 +3075,20 @@ << " unsigned Reg = cast(N.getOperand(1))->getReg();\n" << " SDOperand Val;\n" << " Select(Val, N.getOperand(2));\n" - << " SDOperand ResNode = N;\n" + << " Result = N;\n" << " if (N.Val->getNumValues() == 1) {\n" << " if (Chain != N.getOperand(0) || Val != N.getOperand(2))\n" - << " ResNode = CurDAG->getCopyToReg(Chain, Reg, Val);\n" - << " Result = CodeGenMap[N] = ResNode;\n" + << " Result = CurDAG->getCopyToReg(Chain, Reg, Val);\n" + << " CodeGenMap[N] = Result;\n" << " } else {\n" << " SDOperand Flag(0, 0);\n" << " if (N.getNumOperands() == 4) Select(Flag, N.getOperand(3));\n" << " if (Chain != N.getOperand(0) || Val != N.getOperand(2) ||\n" << " (N.getNumOperands() == 4 && Flag != N.getOperand(3)))\n" - << " ResNode = CurDAG->getCopyToReg(Chain, Reg, Val, Flag);\n" - << " CodeGenMap[N.getValue(0)] = ResNode;\n" - << " CodeGenMap[N.getValue(1)] = ResNode.getValue(1);\n" - << " Result = ResNode.getValue(N.ResNo);\n" + << " Result = CurDAG->getCopyToReg(Chain, Reg, Val, Flag);\n" + << " CodeGenMap[N.getValue(0)] = Result;\n" + << " CodeGenMap[N.getValue(1)] = Result.getValue(1);\n" + << " Result = Result.getValue(N.ResNo);\n" << " }\n" << " return;\n" << " }\n" Index: llvm/utils/TableGen/DAGISelEmitter.h diff -u llvm/utils/TableGen/DAGISelEmitter.h:1.56 llvm/utils/TableGen/DAGISelEmitter.h:1.57 --- llvm/utils/TableGen/DAGISelEmitter.h:1.56 Mon Feb 6 18:37:41 2006 +++ llvm/utils/TableGen/DAGISelEmitter.h Thu Feb 9 01:16:09 2006 @@ -471,7 +471,7 @@ std::vector &InstImpResults); void GenerateCodeForPattern(PatternToMatch &Pattern, std::vector > &GeneratedCode, - std::set &GeneratedDecl, + std::set > &GeneratedDecl, bool UseGoto); void EmitPatterns(std::vector > > > &Patterns, From evan.cheng at apple.com Thu Feb 9 01:18:02 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:18:02 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Message-ID: <200602090718.BAA11684@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcISelDAGToDAG.cpp updated: 1.81 -> 1.82 --- Log message: Match getTargetNode() changes (now return SDNode* instead of SDOperand). --- Diffs of the changes: (+29 -24) SparcISelDAGToDAG.cpp | 53 +++++++++++++++++++++++++++----------------------- 1 files changed, 29 insertions(+), 24 deletions(-) Index: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp diff -u llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.81 llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.82 --- llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.81 Wed Feb 8 22:46:04 2006 +++ llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Thu Feb 9 01:17:49 2006 @@ -1052,9 +1052,9 @@ } Result = CodeGenMap[Op] = - CurDAG->getTargetNode(SP::ADDri, MVT::i32, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - CurDAG->getTargetConstant(0, MVT::i32)); + SDOperand(CurDAG->getTargetNode(SP::ADDri, MVT::i32, + CurDAG->getTargetFrameIndex(FI, MVT::i32), + CurDAG->getTargetConstant(0, MVT::i32)), 0); return; } case ISD::ADD_PARTS: { @@ -1064,10 +1064,12 @@ Select(RHSL, N->getOperand(2)); Select(RHSH, N->getOperand(3)); // FIXME, handle immediate RHS. - SDOperand Low = CurDAG->getTargetNode(SP::ADDCCrr, MVT::i32, MVT::Flag, - LHSL, RHSL); - SDOperand Hi = CurDAG->getTargetNode(SP::ADDXrr, MVT::i32, LHSH, RHSH, - Low.getValue(1)); + SDOperand Low = + SDOperand(CurDAG->getTargetNode(SP::ADDCCrr, MVT::i32, MVT::Flag, + LHSL, RHSL), 0); + SDOperand Hi = + SDOperand(CurDAG->getTargetNode(SP::ADDXrr, MVT::i32, LHSH, RHSH, + Low.getValue(1)), 0); CodeGenMap[SDOperand(N, 0)] = Low; CodeGenMap[SDOperand(N, 1)] = Hi; Result = Op.ResNo ? Hi : Low; @@ -1079,10 +1081,12 @@ Select(LHSH, N->getOperand(1)); Select(RHSL, N->getOperand(2)); Select(RHSH, N->getOperand(3)); - SDOperand Low = CurDAG->getTargetNode(SP::SUBCCrr, MVT::i32, MVT::Flag, - LHSL, RHSL); - SDOperand Hi = CurDAG->getTargetNode(SP::SUBXrr, MVT::i32, LHSH, RHSH, - Low.getValue(1)); + SDOperand Low = + SDOperand(CurDAG->getTargetNode(SP::SUBCCrr, MVT::i32, MVT::Flag, + LHSL, RHSL), 0); + SDOperand Hi = + SDOperand(CurDAG->getTargetNode(SP::SUBXrr, MVT::i32, LHSH, RHSH, + Low.getValue(1)), 0); CodeGenMap[SDOperand(N, 0)] = Low; CodeGenMap[SDOperand(N, 1)] = Hi; Result = Op.ResNo ? Hi : Low; @@ -1098,13 +1102,13 @@ // Set the Y register to the high-part. SDOperand TopPart; if (N->getOpcode() == ISD::SDIV) { - TopPart = CurDAG->getTargetNode(SP::SRAri, MVT::i32, DivLHS, - CurDAG->getTargetConstant(31, MVT::i32)); + TopPart = SDOperand(CurDAG->getTargetNode(SP::SRAri, MVT::i32, DivLHS, + CurDAG->getTargetConstant(31, MVT::i32)), 0); } else { TopPart = CurDAG->getRegister(SP::G0, MVT::i32); } - TopPart = CurDAG->getTargetNode(SP::WRYrr, MVT::Flag, TopPart, - CurDAG->getRegister(SP::G0, MVT::i32)); + TopPart = SDOperand(CurDAG->getTargetNode(SP::WRYrr, MVT::Flag, TopPart, + CurDAG->getRegister(SP::G0, MVT::i32)), 0); // FIXME: Handle div by immediate. unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr; @@ -1118,10 +1122,10 @@ Select(MulLHS, N->getOperand(0)); Select(MulRHS, N->getOperand(1)); unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr; - SDOperand Mul = CurDAG->getTargetNode(Opcode, MVT::i32, MVT::Flag, + SDNode *Mul = CurDAG->getTargetNode(Opcode, MVT::i32, MVT::Flag, MulLHS, MulRHS); // The high part is in the Y register. - Result = CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, Mul.getValue(1)); + Result = CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDOperand(Mul, 1)); return; } case SPISD::CALL: @@ -1136,17 +1140,18 @@ SDOperand Chain = N->getOperand(0); SDOperand Tmp0 = N1; Select(Chain, Chain); + SDNode *ResNode; if (N->getNumOperands() == 3) { Select(InFlag, N->getOperand(2)); - Result = CurDAG->getTargetNode(SP::CALL, MVT::Other, MVT::Flag, Tmp0, - Chain, InFlag); + ResNode = CurDAG->getTargetNode(SP::CALL, MVT::Other, MVT::Flag, Tmp0, + Chain, InFlag); } else { - Result = CurDAG->getTargetNode(SP::CALL, MVT::Other, MVT::Flag, Tmp0, - Chain); + ResNode = CurDAG->getTargetNode(SP::CALL, MVT::Other, MVT::Flag, Tmp0, + Chain); } - Chain = CodeGenMap[SDOperand(N, 0)] = Result.getValue(0); - CodeGenMap[SDOperand(N, 1)] = Result.getValue(1); - Result = Result.getValue(Op.ResNo); + Chain = CodeGenMap[SDOperand(N, 0)] = SDOperand(ResNode, 0); + CodeGenMap[SDOperand(N, 1)] = SDOperand(ResNode, 1); + Result = SDOperand(ResNode, Op.ResNo); return; } P47Fail:; From evan.cheng at apple.com Thu Feb 9 01:18:02 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:18:02 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Message-ID: <200602090718.BAA11688@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCISelDAGToDAG.cpp updated: 1.156 -> 1.157 --- Log message: Match getTargetNode() changes (now return SDNode* instead of SDOperand). --- Diffs of the changes: (+79 -68) PPCISelDAGToDAG.cpp | 147 +++++++++++++++++++++++++++------------------------- 1 files changed, 79 insertions(+), 68 deletions(-) Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp diff -u llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.156 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.157 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.156 Wed Feb 8 18:37:58 2006 +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Thu Feb 9 01:17:49 2006 @@ -368,17 +368,15 @@ N->getOperand(0).getOperand(0) == N->getOperand(1).getOperand(0)) { SDOperand Tmp; Select(Tmp, N->getOperand(0).getOperand(0)); - Op0 = CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, Tmp, - getI32Imm(SH), getI32Imm(0), getI32Imm(31)); - return Op0.Val; + return CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, Tmp, + getI32Imm(SH), getI32Imm(0), getI32Imm(31)); } SDOperand Tmp1, Tmp2; Select(Tmp1, ((Op0IsAND && fullMask) ? Op0.getOperand(0) : Op0)); Select(Tmp2, (IsAndWithShiftOp ? Op1.getOperand(0).getOperand(0) : Op1.getOperand(0))); - Op0 = CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Tmp1, Tmp2, - getI32Imm(SH), getI32Imm(MB), getI32Imm(ME)); - return Op0.Val; + return CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Tmp1, Tmp2, + getI32Imm(SH), getI32Imm(MB), getI32Imm(ME)); } return 0; } @@ -467,17 +465,17 @@ unsigned Imm; if (isIntImmediate(RHS, Imm) && ((U && isUInt16(Imm)) || (!U && isInt16(Imm)))) - return CurDAG->getTargetNode(U ? PPC::CMPLWI : PPC::CMPWI, MVT::i32, - LHS, getI32Imm(Imm & 0xFFFF)); + return SDOperand(CurDAG->getTargetNode(U ? PPC::CMPLWI : PPC::CMPWI, + MVT::i32, LHS, getI32Imm(Imm & 0xFFFF)), 0); Select(RHS, RHS); - return CurDAG->getTargetNode(U ? PPC::CMPLW : PPC::CMPW, MVT::i32, - LHS, RHS); + return SDOperand(CurDAG->getTargetNode(U ? PPC::CMPLW : PPC::CMPW, MVT::i32, + LHS, RHS), 0); } else if (LHS.getValueType() == MVT::f32) { Select(RHS, RHS); - return CurDAG->getTargetNode(PPC::FCMPUS, MVT::i32, LHS, RHS); + return SDOperand(CurDAG->getTargetNode(PPC::FCMPUS, MVT::i32, LHS, RHS), 0); } else { Select(RHS, RHS); - return CurDAG->getTargetNode(PPC::FCMPUD, MVT::i32, LHS, RHS); + return SDOperand(CurDAG->getTargetNode(PPC::FCMPUD, MVT::i32, LHS, RHS), 0); } } @@ -552,7 +550,8 @@ } std::vector Result; - SDOperand CarryFromLo, Tmp; + SDOperand Tmp; + SDNode *CarryFromLo; if (isIntImmediate(N->getOperand(2), Imm) && ((signed)Imm >= -32768 || (signed)Imm < 32768)) { // Codegen the low 32 bits of the add. Interestingly, there is no @@ -564,21 +563,22 @@ CarryFromLo = CurDAG->getTargetNode(PPC::ADDC, MVT::i32, MVT::Flag, LHSL, Tmp); } - CarryFromLo = CarryFromLo.getValue(1); // Codegen the high 32 bits, adding zero, minus one, or the full value // along with the carry flag produced by addc/addic. SDOperand ResultHi; if (ZE) - ResultHi = CurDAG->getTargetNode(PPC::ADDZE, MVT::i32, LHSH, CarryFromLo); + ResultHi = SDOperand(CurDAG->getTargetNode(PPC::ADDZE, MVT::i32, LHSH, + SDOperand(CarryFromLo, 1)), 0); else if (ME) - ResultHi = CurDAG->getTargetNode(PPC::ADDME, MVT::i32, LHSH, CarryFromLo); + ResultHi = SDOperand(CurDAG->getTargetNode(PPC::ADDME, MVT::i32, LHSH, + SDOperand(CarryFromLo, 1)), 0); else { Select(Tmp, N->getOperand(3)); - ResultHi = CurDAG->getTargetNode(PPC::ADDE, MVT::i32, LHSH, - Tmp, CarryFromLo); + ResultHi = SDOperand(CurDAG->getTargetNode(PPC::ADDE, MVT::i32, LHSH, + Tmp, SDOperand(CarryFromLo, 1)), 0); } - Result.push_back(CarryFromLo.getValue(0)); + Result.push_back(SDOperand(CarryFromLo, 0)); Result.push_back(ResultHi); CodeGenMap[Op.getValue(0)] = Result[0]; @@ -594,10 +594,11 @@ Select(RHSH, N->getOperand(3)); std::vector Result; - Result.push_back(CurDAG->getTargetNode(PPC::SUBFC, MVT::i32, MVT::Flag, - RHSL, LHSL)); - Result.push_back(CurDAG->getTargetNode(PPC::SUBFE, MVT::i32, RHSH, LHSH, - Result[0].getValue(1))); + Result.push_back(SDOperand(CurDAG->getTargetNode(PPC::SUBFC, MVT::i32, + MVT::Flag, RHSL, LHSL), 0)); + Result.push_back(SDOperand(CurDAG->getTargetNode(PPC::SUBFE, MVT::i32, + RHSH, LHSH, + Result[0].getValue(1)), 0)); CodeGenMap[Op.getValue(0)] = Result[0]; CodeGenMap[Op.getValue(1)] = Result[1]; return Result[Op.ResNo]; @@ -617,12 +618,13 @@ switch (CC) { default: break; case ISD::SETEQ: - Op = CurDAG->getTargetNode(PPC::CNTLZW, MVT::i32, Op); + Op = SDOperand(CurDAG->getTargetNode(PPC::CNTLZW, MVT::i32, Op), 0); return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Op, getI32Imm(27), getI32Imm(5), getI32Imm(31)); case ISD::SETNE: { - SDOperand AD = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, - Op, getI32Imm(~0U)); + SDOperand AD = + SDOperand(CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, + Op, getI32Imm(~0U)), 0); return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); } @@ -630,8 +632,9 @@ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Op, getI32Imm(1), getI32Imm(31), getI32Imm(31)); case ISD::SETGT: { - SDOperand T = CurDAG->getTargetNode(PPC::NEG, MVT::i32, Op); - T = CurDAG->getTargetNode(PPC::ANDC, MVT::i32, T, Op);; + SDOperand T = + SDOperand(CurDAG->getTargetNode(PPC::NEG, MVT::i32, Op), 0); + T = SDOperand(CurDAG->getTargetNode(PPC::ANDC, MVT::i32, T, Op), 0); return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, T, getI32Imm(1), getI32Imm(31), getI32Imm(31)); } @@ -642,29 +645,31 @@ switch (CC) { default: break; case ISD::SETEQ: - Op = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, - Op, getI32Imm(1)); + Op = SDOperand(CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, + Op, getI32Imm(1)), 0); return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - CurDAG->getTargetNode(PPC::LI, MVT::i32, - getI32Imm(0)), + SDOperand(CurDAG->getTargetNode(PPC::LI, MVT::i32, + getI32Imm(0)), 0), Op.getValue(1)); case ISD::SETNE: { - Op = CurDAG->getTargetNode(PPC::NOR, MVT::i32, Op, Op); - SDOperand AD = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, - Op, getI32Imm(~0U)); - return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, - AD.getValue(1)); + Op = SDOperand(CurDAG->getTargetNode(PPC::NOR, MVT::i32, Op, Op), 0); + SDNode *AD = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, + Op, getI32Imm(~0U)); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDOperand(AD, 0), Op, + SDOperand(AD, 1)); } case ISD::SETLT: { - SDOperand AD = CurDAG->getTargetNode(PPC::ADDI, MVT::i32, Op, - getI32Imm(1)); - SDOperand AN = CurDAG->getTargetNode(PPC::AND, MVT::i32, AD, Op); + SDOperand AD = SDOperand(CurDAG->getTargetNode(PPC::ADDI, MVT::i32, Op, + getI32Imm(1)), 0); + SDOperand AN = SDOperand(CurDAG->getTargetNode(PPC::AND, MVT::i32, AD, + Op), 0); return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, AN, getI32Imm(1), getI32Imm(31), getI32Imm(31)); } case ISD::SETGT: - Op = CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, Op, getI32Imm(1), - getI32Imm(31), getI32Imm(31)); + Op = SDOperand(CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, Op, + getI32Imm(1), getI32Imm(31), + getI32Imm(31)), 0); return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1)); } } @@ -683,9 +688,10 @@ InFlag).getValue(1); if (TLI.getTargetMachine().getSubtarget().isGigaProcessor()) - IntCR = CurDAG->getTargetNode(PPC::MFOCRF, MVT::i32, CR7Reg, CCReg); + IntCR = SDOperand(CurDAG->getTargetNode(PPC::MFOCRF, MVT::i32, CR7Reg, + CCReg), 0); else - IntCR = CurDAG->getTargetNode(PPC::MFCR, MVT::i32, CCReg); + IntCR = SDOperand(CurDAG->getTargetNode(PPC::MFCR, MVT::i32, CCReg), 0); if (!Inv) { return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, IntCR, @@ -693,9 +699,9 @@ getI32Imm(31), getI32Imm(31)); } else { SDOperand Tmp = - CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, IntCR, - getI32Imm((32-(3-Idx)) & 31), - getI32Imm(31),getI32Imm(31)); + SDOperand(CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, IntCR, + getI32Imm((32-(3-Idx)) & 31), + getI32Imm(31),getI32Imm(31)), 0); return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); } } @@ -733,7 +739,8 @@ // Copy the callee address into the CTR register. SDOperand Callee; Select(Callee, N->getOperand(1)); - Chain = CurDAG->getTargetNode(PPC::MTCTR, MVT::Other, Callee, Chain); + Chain = SDOperand(CurDAG->getTargetNode(PPC::MTCTR, MVT::Other, Callee, + Chain), 0); // Copy the callee address into R12 on darwin. SDOperand R12 = CurDAG->getRegister(PPC::R12, MVT::i32); @@ -783,8 +790,8 @@ CallOperands.push_back(InFlag); // Strong dep on register copies. else CallOperands.push_back(Chain); // Weak dep on whatever occurs before - Chain = CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag, - CallOperands); + Chain = SDOperand(CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag, + CallOperands), 0); std::vector CallResults; @@ -864,9 +871,9 @@ return; } Result = CodeGenMap[Op] = - CurDAG->getTargetNode(PPC::ADDI, MVT::i32, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - getI32Imm(0)); + SDOperand(CurDAG->getTargetNode(PPC::ADDI, MVT::i32, + CurDAG->getTargetFrameIndex(FI, MVT::i32), + getI32Imm(0)), 0); return; } case ISD::SDIV: { @@ -880,18 +887,19 @@ SDOperand N0; Select(N0, N->getOperand(0)); if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { - SDOperand Op = + SDNode *Op = CurDAG->getTargetNode(PPC::SRAWI, MVT::i32, MVT::Flag, N0, getI32Imm(Log2_32(Imm))); Result = CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - Op.getValue(0), Op.getValue(1)); + SDOperand(Op, 0), SDOperand(Op, 1)); } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) { - SDOperand Op = + SDNode *Op = CurDAG->getTargetNode(PPC::SRAWI, MVT::i32, MVT::Flag, N0, getI32Imm(Log2_32(-Imm))); SDOperand PT = - CurDAG->getTargetNode(PPC::ADDZE, MVT::i32, Op.getValue(0), - Op.getValue(1)); + SDOperand(CurDAG->getTargetNode(PPC::ADDZE, MVT::i32, + SDOperand(Op, 0), SDOperand(Op, 1)), + 0); Result = CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT); } return; @@ -935,9 +943,10 @@ SDOperand Tmp1, Tmp2; Select(Tmp1, N->getOperand(0).getOperand(0)); Select(Tmp2, N->getOperand(0).getOperand(1)); - Result = CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Tmp1, Tmp2, - getI32Imm(0), getI32Imm(MB), - getI32Imm(ME)); + Result = SDOperand(CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, + Tmp1, Tmp2, + getI32Imm(0), getI32Imm(MB), + getI32Imm(ME)), 0); return; } } @@ -994,11 +1003,12 @@ N2C->getValue() == 1ULL && CC == ISD::SETNE) { SDOperand LHS; Select(LHS, N->getOperand(0)); - SDOperand Tmp = + SDNode *Tmp = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag, LHS, getI32Imm(~0U)); - Result = CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, Tmp, LHS, - Tmp.getValue(1)); + Result = CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, + SDOperand(Tmp, 0), LHS, + SDOperand(Tmp, 1)); return; } @@ -1048,10 +1058,11 @@ } unsigned Opc = getBCCForSetCC(CC); - SDOperand CB = CurDAG->getTargetNode(PPC::COND_BRANCH, MVT::Other, - CondCode, getI32Imm(Opc), - CondTrueBlock, CondFalseBlock, - Chain); + SDOperand CB = + SDOperand(CurDAG->getTargetNode(PPC::COND_BRANCH, MVT::Other, + CondCode, getI32Imm(Opc), + CondTrueBlock, CondFalseBlock, + Chain), 0); Result = CurDAG->SelectNodeTo(N, PPC::B, MVT::Other, CondFalseBlock, CB); } else { // Iterate to the next basic block From evan.cheng at apple.com Thu Feb 9 01:18:03 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:18:03 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Message-ID: <200602090718.BAA11692@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Alpha: AlphaISelDAGToDAG.cpp updated: 1.33 -> 1.34 --- Log message: Match getTargetNode() changes (now return SDNode* instead of SDOperand). --- Diffs of the changes: (+39 -36) AlphaISelDAGToDAG.cpp | 75 ++++++++++++++++++++++++++------------------------ 1 files changed, 39 insertions(+), 36 deletions(-) Index: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp diff -u llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.33 llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.34 --- llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.33 Wed Feb 8 18:37:58 2006 +++ llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Thu Feb 9 01:17:49 2006 @@ -194,10 +194,11 @@ Chain.getValue(1)); Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, N0, Chain.getValue(1)); - Chain = CurDAG->getTargetNode(Alpha::JSRs, MVT::Other, MVT::Flag, - Chain, Chain.getValue(1)); + SDNode *CNode = + CurDAG->getTargetNode(Alpha::JSRs, MVT::Other, MVT::Flag, + Chain, Chain.getValue(1)); Chain = CurDAG->getCopyFromReg(Chain, Alpha::R27, MVT::i64, - Chain.getValue(1)); + SDOperand(CNode, 1)); Result = CurDAG->SelectNodeTo(N, Alpha::BIS, MVT::i64, Chain, Chain); return; } @@ -255,10 +256,11 @@ MachineConstantPool *CP = BB->getParent()->getConstantPool(); ConstantUInt *C = ConstantUInt::get(Type::getPrimitiveType(Type::ULongTyID) , uval); - SDOperand Tmp, CPI = CurDAG->getTargetConstantPool(C, MVT::i64); - Tmp = CurDAG->getTargetNode(Alpha::LDAHr, MVT::i64, CPI, getGlobalBaseReg()); + SDOperand CPI = CurDAG->getTargetConstantPool(C, MVT::i64); + SDNode *Tmp = CurDAG->getTargetNode(Alpha::LDAHr, MVT::i64, CPI, + getGlobalBaseReg()); Result = CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, - CPI, Tmp, CurDAG->getEntryNode()); + CPI, SDOperand(Tmp, 0), CurDAG->getEntryNode()); return; } case ISD::TargetConstantFP: { @@ -299,30 +301,31 @@ SDOperand tmp1, tmp2; Select(tmp1, N->getOperand(0)); Select(tmp2, N->getOperand(1)); - SDOperand cmp = CurDAG->getTargetNode(Opc, MVT::f64, - rev?tmp2:tmp1, - rev?tmp1:tmp2); + SDNode *cmp = CurDAG->getTargetNode(Opc, MVT::f64, + rev?tmp2:tmp1, + rev?tmp1:tmp2); if (isNE) - cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, MVT::f64, cmp, + cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, MVT::f64, SDOperand(cmp, 0), CurDAG->getRegister(Alpha::F31, MVT::f64)); SDOperand LD; if (AlphaLowering.hasITOF()) { - LD = CurDAG->getNode(AlphaISD::FTOIT_, MVT::i64, cmp); + LD = CurDAG->getNode(AlphaISD::FTOIT_, MVT::i64, SDOperand(cmp, 0)); } else { int FrameIdx = CurDAG->getMachineFunction().getFrameInfo()->CreateStackObject(8, 8); SDOperand FI = CurDAG->getFrameIndex(FrameIdx, MVT::i64); - SDOperand ST = CurDAG->getTargetNode(Alpha::STT, MVT::Other, - cmp, FI, CurDAG->getRegister(Alpha::R31, MVT::i64)); - LD = CurDAG->getTargetNode(Alpha::LDQ, MVT::i64, FI, - CurDAG->getRegister(Alpha::R31, MVT::i64), - ST); + SDOperand ST = + SDOperand(CurDAG->getTargetNode(Alpha::STT, MVT::Other, + SDOperand(cmp, 0), FI, + CurDAG->getRegister(Alpha::R31, MVT::i64)), 0); + LD = SDOperand(CurDAG->getTargetNode(Alpha::LDQ, MVT::i64, FI, + CurDAG->getRegister(Alpha::R31, MVT::i64), + ST), 0); } - SDOperand FP = CurDAG->getTargetNode(Alpha::CMPULT, MVT::i64, - CurDAG->getRegister(Alpha::R31, MVT::i64), - LD); - Result = FP; + Result = SDOperand(CurDAG->getTargetNode(Alpha::CMPULT, MVT::i64, + CurDAG->getRegister(Alpha::R31, MVT::i64), + LD), 0); return; } break; @@ -347,15 +350,15 @@ int FrameIdx = CurDAG->getMachineFunction().getFrameInfo()->CreateStackObject(8, 8); SDOperand FI = CurDAG->getFrameIndex(FrameIdx, MVT::i64); - SDOperand ST = CurDAG->getTargetNode(Alpha::STQ, MVT::Other, - cond, FI, CurDAG->getRegister(Alpha::R31, MVT::i64)); - LD = CurDAG->getTargetNode(Alpha::LDT, MVT::f64, FI, - CurDAG->getRegister(Alpha::R31, MVT::i64), - ST); + SDOperand ST = + SDOperand(CurDAG->getTargetNode(Alpha::STQ, MVT::Other, + cond, FI, CurDAG->getRegister(Alpha::R31, MVT::i64)), 0); + LD = SDOperand(CurDAG->getTargetNode(Alpha::LDT, MVT::f64, FI, + CurDAG->getRegister(Alpha::R31, MVT::i64), + ST), 0); } - SDOperand FP = CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES, - MVT::f64, FV, TV, LD); - Result = FP; + Result = SDOperand(CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES, + MVT::f64, FV, TV, LD), 0); return; } break; @@ -401,10 +404,10 @@ Opc = Alpha::STT; } else assert(0 && "Unknown operand"); - Chain = CurDAG->getTargetNode(Opc, MVT::Other, CallOperands[i], - getI64Imm((i - 6) * 8), - CurDAG->getCopyFromReg(Chain, Alpha::R30, MVT::i64), - Chain); + Chain = SDOperand(CurDAG->getTargetNode(Opc, MVT::Other, CallOperands[i], + getI64Imm((i - 6) * 8), + CurDAG->getCopyFromReg(Chain, Alpha::R30, MVT::i64), + Chain), 0); } for (int i = 0; i < std::min(6, count); ++i) { if (MVT::isInteger(TypeOperands[i])) { @@ -424,14 +427,14 @@ SDOperand GOT = getGlobalBaseReg(); Chain = CurDAG->getCopyToReg(Chain, Alpha::R29, GOT, InFlag); InFlag = Chain.getValue(1); - Chain = CurDAG->getTargetNode(Alpha::BSR, MVT::Other, MVT::Flag, - Addr.getOperand(0), Chain, InFlag); + Chain = SDOperand(CurDAG->getTargetNode(Alpha::BSR, MVT::Other, MVT::Flag, + Addr.getOperand(0), Chain, InFlag), 0); } else { Select(Addr, Addr); Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, Addr, InFlag); InFlag = Chain.getValue(1); - Chain = CurDAG->getTargetNode(Alpha::JSR, MVT::Other, MVT::Flag, - Chain, InFlag ); + Chain = SDOperand(CurDAG->getTargetNode(Alpha::JSR, MVT::Other, MVT::Flag, + Chain, InFlag), 0); } InFlag = Chain.getValue(1); From evan.cheng at apple.com Thu Feb 9 01:18:03 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:18:03 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Message-ID: <200602090718.BAA11696@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/IA64: IA64ISelDAGToDAG.cpp updated: 1.35 -> 1.36 --- Log message: Match getTargetNode() changes (now return SDNode* instead of SDOperand). --- Diffs of the changes: (+78 -64) IA64ISelDAGToDAG.cpp | 142 ++++++++++++++++++++++++++++----------------------- 1 files changed, 78 insertions(+), 64 deletions(-) Index: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp diff -u llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.35 llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.36 --- llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.35 Wed Feb 8 18:37:58 2006 +++ llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Thu Feb 9 01:17:49 2006 @@ -186,7 +186,7 @@ SDOperand TmpPR, TmpPR2; SDOperand TmpF1, TmpF2, TmpF3, TmpF4, TmpF5, TmpF6, TmpF7, TmpF8; SDOperand TmpF9, TmpF10,TmpF11,TmpF12,TmpF13,TmpF14,TmpF15; - SDOperand Result; + SDNode *Result; // we'll need copies of F0 and F1 SDOperand F0 = CurDAG->getRegister(IA64::F0, MVT::f64); @@ -196,21 +196,27 @@ if(!isFP) { // first, load the inputs into FP regs. - TmpF1 = CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp1); + TmpF1 = + SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp1), 0); Chain = TmpF1.getValue(1); - TmpF2 = CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp2); + TmpF2 = + SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp2), 0); Chain = TmpF2.getValue(1); // next, convert the inputs to FP if(isSigned) { - TmpF3 = CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF1); + TmpF3 = + SDOperand(CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF1), 0); Chain = TmpF3.getValue(1); - TmpF4 = CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF2); + TmpF4 = + SDOperand(CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF2), 0); Chain = TmpF4.getValue(1); } else { // is unsigned - TmpF3 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1); + TmpF3 = + SDOperand(CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1), 0); Chain = TmpF3.getValue(1); - TmpF4 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2); + TmpF4 = + SDOperand(CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2), 0); Chain = TmpF4.getValue(1); } @@ -223,11 +229,11 @@ // we start by computing an approximate reciprocal (good to 9 bits?) // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate) if(isFP) - TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS0, MVT::f64, MVT::i1, - TmpF3, TmpF4); + TmpF5 = SDOperand(CurDAG->getTargetNode(IA64::FRCPAS0, MVT::f64, MVT::i1, + TmpF3, TmpF4), 0); else - TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1, - TmpF3, TmpF4); + TmpF5 = SDOperand(CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1, + TmpF3, TmpF4), 0); TmpPR = TmpF5.getValue(1); Chain = TmpF5.getValue(2); @@ -235,24 +241,24 @@ SDOperand minusB; if(isModulus) { // for remainders, it'll be handy to have // copies of -input_b - minusB = CurDAG->getTargetNode(IA64::SUB, MVT::i64, - CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2); + minusB = SDOperand(CurDAG->getTargetNode(IA64::SUB, MVT::i64, + CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2), 0); Chain = minusB.getValue(1); } SDOperand TmpE0, TmpY1, TmpE1, TmpY2; - TmpE0 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, - TmpF4, TmpF5, F1, TmpPR); + TmpE0 = SDOperand(CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, + TmpF4, TmpF5, F1, TmpPR), 0); Chain = TmpE0.getValue(1); - TmpY1 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF5, TmpE0, TmpF5, TmpPR); + TmpY1 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpF5, TmpE0, TmpF5, TmpPR), 0); Chain = TmpY1.getValue(1); - TmpE1 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpE0, TmpE0, F0, TmpPR); + TmpE1 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpE0, TmpE0, F0, TmpPR), 0); Chain = TmpE1.getValue(1); - TmpY2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpY1, TmpE1, TmpY1, TmpPR); + TmpY2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpY1, TmpE1, TmpY1, TmpPR), 0); Chain = TmpY2.getValue(1); if(isFP) { // if this is an FP divide, we finish up here and exit early @@ -261,42 +267,44 @@ SDOperand TmpE2, TmpY3, TmpQ0, TmpR0; - TmpE2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpE1, TmpE1, F0, TmpPR); + TmpE2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpE1, TmpE1, F0, TmpPR), 0); Chain = TmpE2.getValue(1); - TmpY3 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpY2, TmpE2, TmpY2, TmpPR); + TmpY3 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpY2, TmpE2, TmpY2, TmpPR), 0); Chain = TmpY3.getValue(1); - TmpQ0 = CurDAG->getTargetNode(IA64::CFMADS1, MVT::f64, // double prec! - Tmp1, TmpY3, F0, TmpPR); + TmpQ0 = + SDOperand(CurDAG->getTargetNode(IA64::CFMADS1, MVT::f64, // double prec! + Tmp1, TmpY3, F0, TmpPR), 0); Chain = TmpQ0.getValue(1); - TmpR0 = CurDAG->getTargetNode(IA64::CFNMADS1, MVT::f64, // double prec! - Tmp2, TmpQ0, Tmp1, TmpPR); + TmpR0 = + SDOperand(CurDAG->getTargetNode(IA64::CFNMADS1, MVT::f64, // double prec! + Tmp2, TmpQ0, Tmp1, TmpPR), 0); Chain = TmpR0.getValue(1); // we want Result to have the same target register as the frcpa, so // we two-address hack it. See the comment "for this to work..." on // page 48 of Intel application note #245415 Result = CurDAG->getTargetNode(IA64::TCFMADS0, MVT::f64, // d.p. s0 rndg! - TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR); - Chain = Result.getValue(1); - return Result; // XXX: early exit! + TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR); + Chain = SDOperand(Result, 1); + return SDOperand(Result, 0); // XXX: early exit! } else { // this is *not* an FP divide, so there's a bit left to do: SDOperand TmpQ2, TmpR2, TmpQ3, TmpQ; - TmpQ2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF3, TmpY2, F0, TmpPR); + TmpQ2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpF3, TmpY2, F0, TmpPR), 0); Chain = TmpQ2.getValue(1); - TmpR2 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, - TmpF4, TmpQ2, TmpF3, TmpPR); + TmpR2 = SDOperand(CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, + TmpF4, TmpQ2, TmpF3, TmpPR), 0); Chain = TmpR2.getValue(1); // we want TmpQ3 to have the same target register as the frcpa? maybe we // should two-address hack it. See the comment "for this to work..." on page // 48 of Intel application note #245415 - TmpQ3 = CurDAG->getTargetNode(IA64::TCFMAS1, MVT::f64, - TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR); + TmpQ3 = SDOperand(CurDAG->getTargetNode(IA64::TCFMAS1, MVT::f64, + TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR), 0); Chain = TmpQ3.getValue(1); // STORY: without these two-address instructions (TCFMAS1 and TCFMADS0) @@ -304,27 +312,30 @@ // arguments. Other fun bugs may also appear, e.g. 0/x = x, not 0. if(isSigned) - TmpQ = CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, MVT::f64, TmpQ3); + TmpQ = SDOperand(CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, + MVT::f64, TmpQ3), 0); else - TmpQ = CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, MVT::f64, TmpQ3); + TmpQ = SDOperand(CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, + MVT::f64, TmpQ3), 0); Chain = TmpQ.getValue(1); if(isModulus) { - SDOperand FPminusB = CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, - minusB); + SDOperand FPminusB = + SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, minusB), 0); Chain = FPminusB.getValue(1); - SDOperand Remainder = CurDAG->getTargetNode(IA64::XMAL, MVT::f64, - TmpQ, FPminusB, TmpF1); + SDOperand Remainder = + SDOperand(CurDAG->getTargetNode(IA64::XMAL, MVT::f64, + TmpQ, FPminusB, TmpF1), 0); Chain = Remainder.getValue(1); Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, Remainder); - Chain = Result.getValue(1); + Chain = SDOperand(Result, 1); } else { // just an integer divide Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, TmpQ); - Chain = Result.getValue(1); + Chain = SDOperand(Result, 1); } - return Result; + return SDOperand(Result, 0); } // wasn't an FP divide } @@ -380,14 +391,15 @@ // load the branch target's entry point [mem] and // GP value [mem+8] - SDOperand targetEntryPoint=CurDAG->getTargetNode(IA64::LD8, MVT::i64, - FnDescriptor); + SDOperand targetEntryPoint= + SDOperand(CurDAG->getTargetNode(IA64::LD8, MVT::i64, FnDescriptor), 0); Chain = targetEntryPoint.getValue(1); - SDOperand targetGPAddr=CurDAG->getTargetNode(IA64::ADDS, MVT::i64, - FnDescriptor, CurDAG->getConstant(8, MVT::i64)); + SDOperand targetGPAddr= + SDOperand(CurDAG->getTargetNode(IA64::ADDS, MVT::i64, + FnDescriptor, CurDAG->getConstant(8, MVT::i64)), 0); Chain = targetGPAddr.getValue(1); - SDOperand targetGP=CurDAG->getTargetNode(IA64::LD8, MVT::i64, - targetGPAddr); + SDOperand targetGP= + SDOperand(CurDAG->getTargetNode(IA64::LD8, MVT::i64, targetGPAddr), 0); Chain = targetGP.getValue(1); Chain = CurDAG->getCopyToReg(Chain, IA64::r1, targetGP, InFlag); @@ -401,9 +413,11 @@ // Finally, once everything is setup, emit the call itself if(InFlag.Val) - Chain = CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag, CallOperand, InFlag); + Chain = SDOperand(CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag, + CallOperand, InFlag), 0); else // there might be no arguments - Chain = CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag, CallOperand, Chain); + Chain = SDOperand(CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag, + CallOperand, Chain), 0); InFlag = Chain.getValue(1); std::vector CallResults; @@ -420,7 +434,7 @@ case IA64ISD::GETFD: { SDOperand Input; Select(Input, N->getOperand(0)); - Result = CurDAG->getTargetNode(IA64::GETFD, MVT::i64, Input); + Result = SDOperand(CurDAG->getTargetNode(IA64::GETFD, MVT::i64, Input), 0); CodeGenMap[Op] = Result; return; } @@ -451,8 +465,8 @@ Result = CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64, CurDAG->getTargetFrameIndex(FI, MVT::i64)); else - Result = CodeGenMap[Op] = CurDAG->getTargetNode(IA64::MOV, MVT::i64, - CurDAG->getTargetFrameIndex(FI, MVT::i64)); + Result = CodeGenMap[Op] = SDOperand(CurDAG->getTargetNode(IA64::MOV, MVT::i64, + CurDAG->getTargetFrameIndex(FI, MVT::i64)), 0); return; } @@ -462,17 +476,17 @@ Constant *C = CP->get(); SDOperand CPI = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment()); - Result = CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, // ? - CurDAG->getRegister(IA64::r1, MVT::i64), CPI); + Result = SDOperand(CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, // ? + CurDAG->getRegister(IA64::r1, MVT::i64), CPI), 0); return; } case ISD::GlobalAddress: { GlobalValue *GV = cast(N)->getGlobal(); SDOperand GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64); - SDOperand Tmp = CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, - CurDAG->getRegister(IA64::r1, MVT::i64), GA); - Result = CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp); + SDOperand Tmp = SDOperand(CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, + CurDAG->getRegister(IA64::r1, MVT::i64), GA), 0); + Result = SDOperand(CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp), 0); return; } @@ -501,7 +515,7 @@ Opc = IA64::LD1; // first we load a byte, then compare for != 0 if(N->getValueType(0) == MVT::i1) { // XXX: early exit! Result = CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other, - CurDAG->getTargetNode(Opc, MVT::i64, Address), + SDOperand(CurDAG->getTargetNode(Opc, MVT::i64, Address), 0), CurDAG->getRegister(IA64::r0, MVT::i64), Chain).getValue(Op.ResNo); return; From evan.cheng at apple.com Thu Feb 9 01:18:03 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 01:18:03 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200602090718.BAA11700@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.44 -> 1.45 --- Log message: Match getTargetNode() changes (now return SDNode* instead of SDOperand). --- Diffs of the changes: (+22 -14) X86ISelDAGToDAG.cpp | 36 ++++++++++++++++++++++-------------- 1 files changed, 22 insertions(+), 14 deletions(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.44 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.45 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.44 Wed Feb 8 18:37:58 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Feb 9 01:17:49 2006 @@ -521,12 +521,15 @@ Select(Tmp1, Tmp1); Select(Tmp2, Tmp2); Select(Tmp3, Tmp3); - Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, - Tmp2, Tmp3, Chain, InFlag); - InFlag = Chain.getValue(1); + SDNode *CNode = + CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, + Tmp2, Tmp3, Chain, InFlag); + Chain = SDOperand(CNode, 0); + InFlag = SDOperand(CNode, 1); } else { Select(N1, N1); - InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag); + InFlag = + SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); } Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag); @@ -601,12 +604,13 @@ if (isSigned) { // Sign extend the low part into the high part. - InFlag = CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag); + InFlag = + SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0); } else { // Zero out the high part, effectively zero extending the input. SDOperand ClrNode = - CurDAG->getTargetNode(ClrOpcode, NVT, - CurDAG->getTargetConstant(0, NVT)); + SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT, + CurDAG->getTargetConstant(0, NVT)), 0); Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT), ClrNode, InFlag); InFlag = Chain.getValue(1); @@ -617,12 +621,15 @@ Select(Tmp1, Tmp1); Select(Tmp2, Tmp2); Select(Tmp3, Tmp3); - Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, - Tmp2, Tmp3, Chain, InFlag); - InFlag = Chain.getValue(1); + SDNode *CNode = + CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1, + Tmp2, Tmp3, Chain, InFlag); + Chain = SDOperand(CNode, 0); + InFlag = SDOperand(CNode, 1); } else { Select(N1, N1); - InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag); + InFlag = + SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); } Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, @@ -645,7 +652,7 @@ } SDOperand Tmp0, Tmp1; Select(Tmp0, Node->getOperand(0)); - Select(Tmp1, CurDAG->getTargetNode(Opc, VT, Tmp0)); + Select(Tmp1, SDOperand(CurDAG->getTargetNode(Opc, VT, Tmp0), 0)); SDOperand InFlag = SDOperand(0,0); Result = CurDAG->getCopyToReg(CurDAG->getEntryNode(), Reg, Tmp1, InFlag); SDOperand Chain = Result.getValue(0); @@ -659,9 +666,10 @@ Result = CurDAG->getCopyFromReg(Chain, Reg, VT, InFlag); if (N.Val->hasOneUse()) - Result =CurDAG->SelectNodeTo(N.Val, Opc, VT, Result); + Result = CurDAG->SelectNodeTo(N.Val, Opc, VT, Result); else - Result = CodeGenMap[N] = CurDAG->getTargetNode(Opc, VT, Result); + Result = CodeGenMap[N] = + SDOperand(CurDAG->getTargetNode(Opc, VT, Result), 0); return; } } From lattner at cs.uiuc.edu Thu Feb 9 01:38:42 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 01:38:42 -0600 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/InstCombine/bittest.ll Message-ID: <200602090738.BAA11873@zion.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/InstCombine: bittest.ll added (r1.1) --- Log message: new testcase --- Diffs of the changes: (+29 -0) bittest.ll | 29 +++++++++++++++++++++++++++++ 1 files changed, 29 insertions(+) Index: llvm/test/Regression/Transforms/InstCombine/bittest.ll diff -c /dev/null llvm/test/Regression/Transforms/InstCombine/bittest.ll:1.1 *** /dev/null Thu Feb 9 01:38:40 2006 --- llvm/test/Regression/Transforms/InstCombine/bittest.ll Thu Feb 9 01:38:30 2006 *************** *** 0 **** --- 1,29 ---- + ; RUN: llvm-as < %s | opt -instcombine -simplifycfg -disable-output && + ; RUN: llvm-as < %s | opt -instcombine -simplifycfg | llvm-dis | not grep 'call void %abort' + + %b_rec.0 = external global int + + void %_Z12h000007_testv(uint *%P) { + entry: + %tmp.2 = load int* %b_rec.0 ; [#uses=1] + %tmp.9 = or int %tmp.2, -989855744 ; [#uses=2] + %tmp.16 = and int %tmp.9, -805306369 ; [#uses=2] + %tmp.17 = and int %tmp.9, -973078529 ; [#uses=1] + store int %tmp.17, int* %b_rec.0 + %tmp.17.shrunk = cast int %tmp.16 to uint ; [#uses=1] + %tmp.22 = and uint %tmp.17.shrunk, 3221225472 ; [#uses=1] + %tmp.23 = seteq uint %tmp.22, 3221225472 ; [#uses=1] + br bool %tmp.23, label %endif.0, label %then.0 + + then.0: ; preds = %entry + tail call void %abort( ) + unreachable + + endif.0: ; preds = %entry + %tmp.17.shrunk2 = cast int %tmp.16 to uint ; [#uses=1] + %tmp.27.mask = and uint %tmp.17.shrunk2, 100663295 ; [#uses=1] + store uint %tmp.27.mask, uint* %P + ret void + } + + declare void %abort() From lattner at cs.uiuc.edu Thu Feb 9 01:39:10 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 01:39:10 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602090739.BAA11886@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.426 -> 1.427 --- Log message: Enhance MVIZ in three ways: 1. Teach it new tricks: in particular how to propagate through signed shr and sexts. 2. Teach it to return a bitset of known-1 and known-0 bits, instead of just zero. 3. Teach instcombine (AND X, C) to fold when we know all C bits of X. This implements Regression/Transforms/InstCombine/bittest.ll, and allows future things to be simplified. --- Diffs of the changes: (+157 -55) InstructionCombining.cpp | 212 ++++++++++++++++++++++++++++++++++------------- 1 files changed, 157 insertions(+), 55 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.426 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.427 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.426 Wed Feb 8 01:34:50 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Thu Feb 9 01:38:58 2006 @@ -406,88 +406,182 @@ ConstantInt::get(C->getType(), 1))); } -/// ComputeMaskedNonZeroBits - Determine which of the bits specified in Mask are -/// not known to be zero and return them as a bitmask. The bits that we can -/// guarantee to be zero are returned as zero bits in the result. -static uint64_t ComputeMaskedNonZeroBits(Value *V, uint64_t Mask, - unsigned Depth = 0) { +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +static void ComputeMaskedBits(Value *V, uint64_t Mask, uint64_t &KnownZero, + uint64_t &KnownOne, unsigned Depth = 0) { // Note, we cannot consider 'undef' to be "IsZero" here. The problem is that // we cannot optimize based on the assumption that it is zero without changing // it to be an explicit zero. If we don't change it to zero, other code could // optimized based on the contradictory assumption that it is non-zero. // Because instcombine aggressively folds operations with undef args anyway, // this won't lose us code quality. - if (ConstantIntegral *CI = dyn_cast(V)) - return CI->getRawValue() & Mask; + if (ConstantIntegral *CI = dyn_cast(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getZExtValue(); + KnownZero = ~KnownOne & Mask; + return; + } + + KnownZero = KnownOne = 0; // Don't know anything. if (Depth == 6 || Mask == 0) - return Mask; // Limit search depth. - + return; // Limit search depth. + + uint64_t KnownZero2, KnownOne2; if (Instruction *I = dyn_cast(V)) { switch (I->getOpcode()) { case Instruction::And: - // (X & C1) & C2 == 0 iff C1 & C2 == 0. - if (ConstantIntegral *CI = dyn_cast(I->getOperand(1))) - return ComputeMaskedNonZeroBits(I->getOperand(0), - CI->getRawValue() & Mask, Depth+1); - // If either the LHS or the RHS are MaskedValueIsZero, the result is zero. - Mask = ComputeMaskedNonZeroBits(I->getOperand(1), Mask, Depth+1); - Mask = ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); - return Mask; + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + Mask &= ~KnownZero; + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; case Instruction::Or: - case Instruction::Xor: - // Any non-zero bits in the LHS or RHS are potentially non-zero in the - // result. - return ComputeMaskedNonZeroBits(I->getOperand(1), Mask, Depth+1) | - ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case Instruction::Xor: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } case Instruction::Select: - // Any non-zero bits in the T or F values are potentially non-zero in the - // result. - return ComputeMaskedNonZeroBits(I->getOperand(2), Mask, Depth+1) | - ComputeMaskedNonZeroBits(I->getOperand(1), Mask, Depth+1); + ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; case Instruction::Cast: { const Type *SrcTy = I->getOperand(0)->getType(); - if (SrcTy == Type::BoolTy) - return ComputeMaskedNonZeroBits(I->getOperand(0), Mask & 1, Depth+1); - if (!SrcTy->isInteger()) return Mask; + if (!SrcTy->isIntegral()) return; - // (cast X to int) & C2 == 0 iff could not have contained C2. - if (SrcTy->isUnsigned() || // Only handle zero ext/trunc/noop - SrcTy->getPrimitiveSizeInBits() >= - I->getType()->getPrimitiveSizeInBits()) { - Mask &= SrcTy->getIntegralTypeMask(); - return ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1); + // If this is an integer truncate or noop, just look in the input. + if (SrcTy->getPrimitiveSizeInBits() >= + I->getType()->getPrimitiveSizeInBits()) { + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + return; } - // FIXME: handle sext casts. - break; + // Sign or Zero extension. Compute the bits in the result that are not + // present in the input. + uint64_t NotIn = ~SrcTy->getIntegralTypeMask(); + uint64_t NewBits = I->getType()->getIntegralTypeMask() & NotIn; + + // Handle zero extension. + if (!SrcTy->isSigned()) { + Mask &= SrcTy->getIntegralTypeMask(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + // The top bits are known to be zero. + KnownZero |= NewBits; + } else { + // Sign extension. + Mask &= SrcTy->getIntegralTypeMask(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + uint64_t InSignBit = 1ULL << (SrcTy->getPrimitiveSizeInBits()-1); + if (KnownZero & InSignBit) { // Input sign bit known zero + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne & InSignBit) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + } + return; } case Instruction::Shl: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) - return ComputeMaskedNonZeroBits(I->getOperand(0),Mask >> SA->getValue(), - Depth+1) << SA->getValue(); + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { + Mask >> SA->getValue(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= SA->getValue(); + KnownOne <<= SA->getValue(); + KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. + return; + } break; case Instruction::Shr: // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) - if (I->getType()->isUnsigned()) { - Mask <<= SA->getValue(); - Mask &= I->getType()->getIntegralTypeMask(); - return ComputeMaskedNonZeroBits(I->getOperand(0), Mask, Depth+1) - >> SA->getValue(); + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t HighBits = (1ULL << SA->getValue())-1; + HighBits <<= I->getType()->getPrimitiveSizeInBits()-SA->getValue(); + + if (I->getType()->isUnsigned()) { // Unsigned shift right. + Mask << SA->getValue(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + KnownZero >>= SA->getValue(); + KnownOne >>= SA->getValue(); + KnownZero |= HighBits; // high bits known zero. + } else { + Mask << SA->getValue(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + KnownZero >>= SA->getValue(); + KnownOne >>= SA->getValue(); + + // Handle the sign bits. + uint64_t SignBit = 1ULL << (I->getType()->getPrimitiveSizeInBits()-1); + SignBit >>= SA->getValue(); // Adjust to where it is now in the mask. + + if (KnownZero & SignBit) { // New bits are known zero. + KnownZero |= HighBits; + } else if (KnownOne & SignBit) { // New bits are known one. + KnownOne |= HighBits; + } } + return; + } break; } } - - return Mask; } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. static bool MaskedValueIsZero(Value *V, uint64_t Mask, unsigned Depth = 0) { - return ComputeMaskedNonZeroBits(V, Mask, Depth) == 0; + uint64_t KnownZero, KnownOne; + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; } /// SimplifyDemandedBits - Look at V. At this point, we know that only the Mask @@ -879,8 +973,9 @@ } if (Found) { // This is a sign extend if the top bits are known zero. - uint64_t Mask = XorLHS->getType()->getIntegralTypeMask(); + uint64_t Mask = ~0ULL; Mask <<= 64-(TySizeBits-Size); + Mask &= XorLHS->getType()->getIntegralTypeMask(); if (!MaskedValueIsZero(XorLHS, Mask)) Size = 0; // Not a sign ext, but can't be any others either. goto FoundSExt; @@ -1949,22 +2044,29 @@ // Figure out which of the input bits are not known to be zero, and which // bits are known to be zero. - uint64_t NonZeroBits = ComputeMaskedNonZeroBits(Op0, TypeMask); - uint64_t ZeroBits = NonZeroBits^TypeMask; + uint64_t KnownZeroBits, KnownOneBits; + ComputeMaskedBits(Op0, TypeMask, KnownZeroBits, KnownOneBits); // If the mask is not masking out any bits (i.e. all of the zeros in the // mask are already known to be zero), there is no reason to do the and in // the first place. uint64_t NotAndRHS = AndRHSMask^TypeMask; - if ((NotAndRHS & ZeroBits) == NotAndRHS) + if ((NotAndRHS & KnownZeroBits) == NotAndRHS) return ReplaceInstUsesWith(I, Op0); + // If the AND'd bits are all known, turn this AND into a constant. + if ((AndRHSMask & (KnownOneBits|KnownZeroBits)) == AndRHSMask) { + Constant *NewRHS = ConstantUInt::get(Type::ULongTy, + AndRHSMask & KnownOneBits); + return ReplaceInstUsesWith(I, ConstantExpr::getCast(NewRHS, I.getType())); + } + // If the AND mask contains bits that are known zero, remove them. A // special case is when there are no bits in common, in which case we // implicitly turn this into an AND X, 0, which is later simplified into 0. - if ((AndRHSMask & NonZeroBits) != AndRHSMask) { + if ((AndRHSMask & ~KnownZeroBits) != AndRHSMask) { Constant *NewRHS = - ConstantUInt::get(Type::ULongTy, AndRHSMask & NonZeroBits); + ConstantUInt::get(Type::ULongTy, AndRHSMask & ~KnownZeroBits); I.setOperand(1, ConstantExpr::getCast(NewRHS, I.getType())); return &I; } From lattner at cs.uiuc.edu Thu Feb 9 01:41:25 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 01:41:25 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602090741.BAA11929@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.427 -> 1.428 --- Log message: Fix 80-column violations --- Diffs of the changes: (+9 -8) InstructionCombining.cpp | 17 +++++++++-------- 1 files changed, 9 insertions(+), 8 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.427 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.428 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.427 Thu Feb 9 01:38:58 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Thu Feb 9 01:41:14 2006 @@ -3819,9 +3819,9 @@ Op0BO->getOperand(0), Op1, Op0BO->getName()); InsertNewInstBefore(YS, I); // (Y << C) - Instruction *X = BinaryOperator::create(Op0BO->getOpcode(), YS, - V1, - Op0BO->getOperand(1)->getName()); + Instruction *X = + BinaryOperator::create(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); InsertNewInstBefore(X, I); // (X + (Y << C)) Constant *C2 = ConstantInt::getAllOnesValue(X->getType()); C2 = ConstantExpr::getShl(C2, Op1); @@ -3833,7 +3833,7 @@ match(Op0BO->getOperand(1), m_And(m_Shr(m_Value(V1), m_Value(V2)), m_ConstantInt(CC))) && V2 == Op1 && - cast(Op0BO->getOperand(1))->getOperand(0)->hasOneUse()) { + cast(Op0BO->getOperand(1))->getOperand(0)->hasOneUse()) { Instruction *YS = new ShiftInst(Instruction::Shl, Op0BO->getOperand(0), Op1, Op0BO->getName()); @@ -3856,9 +3856,9 @@ Op0BO->getOperand(1), Op1, Op0BO->getName()); InsertNewInstBefore(YS, I); // (Y << C) - Instruction *X = BinaryOperator::create(Op0BO->getOpcode(), YS, - V1, - Op0BO->getOperand(0)->getName()); + Instruction *X = + BinaryOperator::create(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(0)->getName()); InsertNewInstBefore(X, I); // (X + (Y << C)) Constant *C2 = ConstantInt::getAllOnesValue(X->getType()); C2 = ConstantExpr::getShl(C2, Op1); @@ -3869,7 +3869,8 @@ match(Op0BO->getOperand(0), m_And(m_Shr(m_Value(V1), m_Value(V2)), m_ConstantInt(CC))) && V2 == Op1 && - cast(Op0BO->getOperand(0))->getOperand(0)->hasOneUse()) { + cast(Op0BO->getOperand(0)) + ->getOperand(0)->hasOneUse()) { Instruction *YS = new ShiftInst(Instruction::Shl, Op0BO->getOperand(1), Op1, Op0BO->getName()); From lattner at cs.uiuc.edu Thu Feb 9 13:15:04 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 13:15:04 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602091915.NAA25431@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.6 -> 1.7 --- Log message: Simplify the loop-unswitch pass, by not even trying to unswitch loops with uses of loop values outside the loop. We need loop-closed SSA form to do this right, or to use SSA rewriting if we really care. --- Diffs of the changes: (+74 -86) LoopUnswitch.cpp | 160 +++++++++++++++++++++++++------------------------------ 1 files changed, 74 insertions(+), 86 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.6 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.7 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.6 Sun Jan 22 17:32:06 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Feb 9 13:14:52 2006 @@ -31,7 +31,6 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -39,6 +38,7 @@ #include "llvm/ADT/Statistic.h" #include #include +#include using namespace llvm; namespace { @@ -46,7 +46,6 @@ class LoopUnswitch : public FunctionPass { LoopInfo *LI; // Loop information - DominatorSet *DS; public: virtual bool runOnFunction(Function &F); bool visitLoop(Loop *L); @@ -56,7 +55,6 @@ /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); - //AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -74,7 +72,6 @@ bool LoopUnswitch::runOnFunction(Function &F) { bool Changed = false; LI = &getAnalysis(); - DS = 0; //&getAnalysis(); // Transform all the top-level loops. Copy the loop list so that the child // can update the loop tree if it needs to delete the loop. @@ -85,6 +82,36 @@ return Changed; } + +/// InsertPHINodesForUsesOutsideLoop - If this instruction is used outside of +/// the specified loop, insert a PHI node in the appropriate exit block to merge +/// the values in the two different loop versions. +/// +/// Most values are not used outside of the loop they are defined in, so be +/// efficient for this case. +/// +static bool LoopValuesUsedOutsideLoop(Loop *L) { + // We will be doing lots of "loop contains block" queries. Loop::contains is + // linear time, use a set to speed this up. + std::set LoopBlocks; + + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) + LoopBlocks.insert(*BB); + + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + BasicBlock *UserBB = cast(*UI)->getParent(); + if (!LoopBlocks.count(UserBB)) + return true; + } + } + return false; +} + bool LoopUnswitch::visitLoop(Loop *L) { bool Changed = false; @@ -103,27 +130,50 @@ TerminatorInst *TI = (*I)->getTerminator(); if (SwitchInst *SI = dyn_cast(TI)) { if (!isa(SI) && L->isLoopInvariant(SI->getCondition())) - DEBUG(std::cerr << "Can't unswitching 'switch' loop %" + DEBUG(std::cerr << "TODO: Implement unswitching 'switch' loop %" << L->getHeader()->getName() << ", cost = " << L->getBlocks().size() << "\n" << **I); - } else if (BranchInst *BI = dyn_cast(TI)) - if (BI->isConditional() && !isa(BI->getCondition()) && - L->isLoopInvariant(BI->getCondition())) { - // Check to see if it would be profitable to unswitch this loop. - if (L->getBlocks().size() > 10) { - DEBUG(std::cerr << "NOT unswitching loop %" - << L->getHeader()->getName() << ", cost too high: " - << L->getBlocks().size() << "\n"); - } else { - // FIXME: check for profitability. - //std::cerr << "BEFORE:\n"; LI->dump(); - - VersionLoop(BI->getCondition(), L); - - //std::cerr << "AFTER:\n"; LI->dump(); - return true; - } - } + continue; + } + + BranchInst *BI = dyn_cast(TI); + if (!BI) continue; + + // If this isn't branching on an invariant condition, we can't unswitch it. + if (!BI->isConditional() || isa(BI->getCondition()) || + !L->isLoopInvariant(BI->getCondition())) + continue; + + // Check to see if it would be profitable to unswitch this loop. + if (L->getBlocks().size() > 10) { + // FIXME: this should estimate growth by the amount of code shared by the + // resultant unswitched loops. This should have no code growth: + // for () { if (iv) {...} } + // as one copy of the loop will be empty. + // + DEBUG(std::cerr << "NOT unswitching loop %" + << L->getHeader()->getName() << ", cost too high: " + << L->getBlocks().size() << "\n"); + continue; + } + + // If this loop has live-out values, we can't unswitch it. We need something + // like loop-closed SSA form in order to know how to insert PHI nodes for + // these values. + if (LoopValuesUsedOutsideLoop(L)) { + DEBUG(std::cerr << "NOT unswitching loop %" + << L->getHeader()->getName() + << ", a loop value is used outside loop!\n"); + continue; + } + + //std::cerr << "BEFORE:\n"; LI->dump(); + VersionLoop(BI->getCondition(), L); + //std::cerr << "AFTER:\n"; LI->dump(); + + // FIXME: Why return here? What if we have: + // "for () { if (iv1) { if (iv2) { } } }" ? + return true; } return Changed; @@ -191,52 +241,6 @@ } -/// InsertPHINodesForUsesOutsideLoop - If this instruction is used outside of -/// the specified loop, insert a PHI node in the appropriate exit block to merge -/// the values in the two different loop versions. -/// -/// Most values are not used outside of the loop they are defined in, so be -/// efficient for this case. -/// -static AllocaInst * -InsertPHINodesForUsesOutsideLoop(Instruction *OI, Instruction *NI, - DominatorSet &DS, Loop *OL, Loop *NL, - std::vector &OldExitBlocks, - std::map &ValueMap) { - assert(OI->getType() == NI->getType() && OI->getOpcode() == NI->getOpcode() && - "Hrm, should be mapping between identical instructions!"); - for (Value::use_iterator UI = OI->use_begin(), E = OI->use_end(); UI != E; - ++UI) - if (!OL->contains(cast(*UI)->getParent()) && - !NL->contains(cast(*UI)->getParent())) - goto UsedOutsideOfLoop; - return 0; - -UsedOutsideOfLoop: - // Okay, this instruction is used outside of the current loop. Insert a PHI - // nodes for the instruction merging the values together. - - // FIXME: For now we just spill the object to the stack, assuming that a - // subsequent mem2reg pass will clean up after us. This should be improved in - // two ways: - // 1. If there is only one exit block, trivially insert the PHI nodes - // 2. Once we update domfrontier, we should do the promotion after everything - // is stable again. - AllocaInst *Result = DemoteRegToStack(*OI); - - // Store to the stack location right after the new instruction. - BasicBlock::iterator InsertPoint = NI; - if (InvokeInst *II = dyn_cast(NI)) - InsertPoint = II->getNormalDest()->begin(); - else - ++InsertPoint; - while (isa(InsertPoint)) ++InsertPoint; - new StoreInst(NI, Result, InsertPoint); - return Result; -} - - - /// VersionLoop - We determined that the loop is profitable to unswitch and /// contains a branch on a loop invariant condition. Split it into loop /// versions and test the condition outside of either loop. @@ -298,23 +302,7 @@ for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, ValueMap); - - // If the instructions are used outside of the loop, insert a PHI node in any - // exit blocks dominated by the instruction. - for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) - for (BasicBlock::iterator OI = LoopBlocks[i]->begin(), - E = LoopBlocks[i]->end(); OI != E; ++OI) - if (!OI->use_empty()) { - std::map::iterator OII = ValueMap.find(OI); - // The PHINode rewriting stuff can insert stores that are not in the - // mapping. Don't mess around with them. - if (OII != ValueMap.end()) { - Instruction *NI = cast(OII->second); - InsertPHINodesForUsesOutsideLoop(OI, NI, *DS, L, NewLoop, - ExitBlocks, ValueMap); - } - } - + // Rewrite the original preheader to select between versions of the loop. assert(isa(OrigPreheader->getTerminator()) && cast(OrigPreheader->getTerminator())->isUnconditional() && From lattner at cs.uiuc.edu Thu Feb 9 14:00:07 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 14:00:07 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcTargetMachine.cpp Message-ID: <200602092000.OAA25642@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcTargetMachine.cpp updated: 1.41 -> 1.42 --- Log message: Enable LSR by default for SPARC: it is a clear win. --- Diffs of the changes: (+1 -4) SparcTargetMachine.cpp | 5 +---- 1 files changed, 1 insertion(+), 4 deletions(-) Index: llvm/lib/Target/Sparc/SparcTargetMachine.cpp diff -u llvm/lib/Target/Sparc/SparcTargetMachine.cpp:1.41 llvm/lib/Target/Sparc/SparcTargetMachine.cpp:1.42 --- llvm/lib/Target/Sparc/SparcTargetMachine.cpp:1.41 Wed Feb 8 23:06:36 2006 +++ llvm/lib/Target/Sparc/SparcTargetMachine.cpp Thu Feb 9 13:59:55 2006 @@ -20,15 +20,12 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetMachineRegistry.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/CommandLine.h" #include using namespace llvm; namespace { // Register the target. RegisterTarget X("sparc", " SPARC"); - - cl::opt EnableLSR("enable-sparc-lsr", cl::Hidden); } /// SparcTargetMachine ctor - Create an ILP32 architecture model @@ -68,7 +65,7 @@ if (FileType != TargetMachine::AssemblyFile) return true; // Run loop strength reduction before anything else. - if (EnableLSR && !Fast) PM.add(createLoopStrengthReducePass()); + if (!Fast) PM.add(createLoopStrengthReducePass()); // FIXME: Implement efficient support for garbage collection intrinsics. PM.add(createLowerGCPass()); From lattner at cs.uiuc.edu Thu Feb 9 14:00:31 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 14:00:31 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/README.txt Message-ID: <200602092000.OAA25677@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: README.txt updated: 1.50 -> 1.51 --- Log message: Done --- Diffs of the changes: (+0 -1) README.txt | 1 - 1 files changed, 1 deletion(-) Index: llvm/lib/Target/Sparc/README.txt diff -u llvm/lib/Target/Sparc/README.txt:1.50 llvm/lib/Target/Sparc/README.txt:1.51 --- llvm/lib/Target/Sparc/README.txt:1.50 Sun Feb 5 00:32:59 2006 +++ llvm/lib/Target/Sparc/README.txt Thu Feb 9 14:00:19 2006 @@ -2,7 +2,6 @@ To-do ----- -* Enable LSR for SPARC. * Keep the address of the constant pool in a register instead of forming its address all of the time. * We can fold small constant offsets into the %hi/%lo references to constant From lattner at cs.uiuc.edu Thu Feb 9 14:01:31 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 14:01:31 -0600 Subject: [llvm-commits] CVS: llvm-test/Makefile.programs Message-ID: <200602092001.OAA25728@zion.cs.uiuc.edu> Changes in directory llvm-test: Makefile.programs updated: 1.190 -> 1.191 --- Log message: change the beta option back --- Diffs of the changes: (+1 -1) Makefile.programs | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm-test/Makefile.programs diff -u llvm-test/Makefile.programs:1.190 llvm-test/Makefile.programs:1.191 --- llvm-test/Makefile.programs:1.190 Wed Feb 8 23:07:57 2006 +++ llvm-test/Makefile.programs Thu Feb 9 14:01:19 2006 @@ -200,7 +200,7 @@ LLCBETAOPTION := -enable-x86-sse endif ifeq ($(ARCH),Sparc) -LLCBETAOPTION := -enable-sparc-lsr +LLCBETAOPTION := -enable-sparc-v9-insts endif # Given a version of the entire program linked together into a single unit of From lattner at cs.uiuc.edu Thu Feb 9 14:16:00 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 14:16:00 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602092016.OAA25837@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.7 -> 1.8 --- Log message: Make the threshold a parameter --- Diffs of the changes: (+7 -3) LoopUnswitch.cpp | 10 +++++++--- 1 files changed, 7 insertions(+), 3 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.7 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.8 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.7 Thu Feb 9 13:14:52 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Feb 9 14:15:48 2006 @@ -34,8 +34,9 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" #include #include #include @@ -43,7 +44,10 @@ namespace { Statistic<> NumUnswitched("loop-unswitch", "Number of loops unswitched"); - + cl::opt + Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), + cl::init(10), cl::Hidden); + class LoopUnswitch : public FunctionPass { LoopInfo *LI; // Loop information public: @@ -145,7 +149,7 @@ continue; // Check to see if it would be profitable to unswitch this loop. - if (L->getBlocks().size() > 10) { + if (L->getBlocks().size() > Threshold) { // FIXME: this should estimate growth by the amount of code shared by the // resultant unswitched loops. This should have no code growth: // for () { if (iv) {...} } From evan.cheng at apple.com Thu Feb 9 16:11:15 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 16:11:15 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/SelectionDAG.h Message-ID: <200602092211.QAA26363@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: SelectionDAG.h updated: 1.96 -> 1.97 --- Log message: Added SelectionDAG::InsertISelMapEntry(). This is used to workaround the gcc problem where it inline the map insertion call too aggressively. Before this change it was producing a frame size of 24k for Select_store(), now it's down to 10k (by calling this method rather than calling the map insertion operator). --- Diffs of the changes: (+8 -2) SelectionDAG.h | 10 ++++++++-- 1 files changed, 8 insertions(+), 2 deletions(-) Index: llvm/include/llvm/CodeGen/SelectionDAG.h diff -u llvm/include/llvm/CodeGen/SelectionDAG.h:1.96 llvm/include/llvm/CodeGen/SelectionDAG.h:1.97 --- llvm/include/llvm/CodeGen/SelectionDAG.h:1.96 Thu Feb 9 01:15:23 2006 +++ llvm/include/llvm/CodeGen/SelectionDAG.h Thu Feb 9 16:11:03 2006 @@ -395,14 +395,20 @@ std::vector *Deleted = 0); void ReplaceAllUsesWith(SDNode *From, const std::vector &To, std::vector *Deleted = 0); - - + /// DeleteNode - Remove the specified node from the system. This node must /// have no referrers. void DeleteNode(SDNode *N); void dump() const; + /// InsertISelMapEntry - A helper function to insert a key / element pair + /// into a SDOperand to SDOperand map. This is added to avoid the map + /// insertion operator from being inlined. + static void InsertISelMapEntry(std::map &Map, + SDNode *Key, unsigned KeyResNo, + SDNode *Element, unsigned ElementResNo); + private: void RemoveNodeFromCSEMaps(SDNode *N); SDNode *AddNonLeafNodeToCSEMaps(SDNode *N); From evan.cheng at apple.com Thu Feb 9 16:11:15 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 16:11:15 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <200602092211.QAA26367@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: SelectionDAG.cpp updated: 1.255 -> 1.256 --- Log message: Added SelectionDAG::InsertISelMapEntry(). This is used to workaround the gcc problem where it inline the map insertion call too aggressively. Before this change it was producing a frame size of 24k for Select_store(), now it's down to 10k (by calling this method rather than calling the map insertion operator). --- Diffs of the changes: (+9 -0) SelectionDAG.cpp | 9 +++++++++ 1 files changed, 9 insertions(+) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.255 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.256 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.255 Thu Feb 9 01:15:23 2006 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Feb 9 16:11:03 2006 @@ -2748,3 +2748,12 @@ std::cerr << "\n\n"; } +/// InsertISelMapEntry - A helper function to insert a key / element pair +/// into a SDOperand to SDOperand map. This is added to avoid the map +/// insertion operator from being inlined. +void SelectionDAG::InsertISelMapEntry(std::map &Map, + SDNode *Key, unsigned KeyResNo, + SDNode *Element, unsigned ElementResNo) { + Map.insert(std::make_pair(SDOperand(Key, KeyResNo), + SDOperand(Element, ElementResNo))); +} From evan.cheng at apple.com Thu Feb 9 16:12:39 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 16:12:39 -0600 Subject: [llvm-commits] CVS: llvm/utils/TableGen/DAGISelEmitter.cpp Message-ID: <200602092212.QAA26382@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: DAGISelEmitter.cpp updated: 1.171 -> 1.172 --- Log message: Call InsertISelMapEntry rather than map insertion operator to prevent overly aggrssive inlining. This reduces Select_store frame size from 24k to 10k. --- Diffs of the changes: (+64 -45) DAGISelEmitter.cpp | 109 +++++++++++++++++++++++++++++++---------------------- 1 files changed, 64 insertions(+), 45 deletions(-) Index: llvm/utils/TableGen/DAGISelEmitter.cpp diff -u llvm/utils/TableGen/DAGISelEmitter.cpp:1.171 llvm/utils/TableGen/DAGISelEmitter.cpp:1.172 --- llvm/utils/TableGen/DAGISelEmitter.cpp:1.171 Thu Feb 9 01:16:09 2006 +++ llvm/utils/TableGen/DAGISelEmitter.cpp Thu Feb 9 16:12:27 2006 @@ -2395,32 +2395,29 @@ unsigned ValNo = 0; for (unsigned i = 0; i < NumResults; i++) { - emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + - ")] = SDOperand(ResNode, " + utostr(ValNo) + ");"); + emitCode("SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, " + + utostr(ValNo) + ", ResNode, " + utostr(ValNo) + ");"); ValNo++; } - if (HasChain) - emitCode(ChainName + " = SDOperand(ResNode, " + utostr(ValNo) + ");"); - if (NodeHasOutFlag) emitCode("InFlag = SDOperand(ResNode, " + utostr(ValNo + (unsigned)HasChain) + ");"); if (HasImpResults && EmitCopyFromRegs(N, ChainEmitted)) { - emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + ")] = " - "SDOperand(ResNode, " + utostr(ValNo) + ");"); + emitCode("SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, " + + utostr(ValNo) + ", ResNode, " + utostr(ValNo) + ");"); ValNo++; } // User does not expect the instruction would produce a chain! bool AddedChain = HasChain && !NodeHasChain; if (NodeHasChain) { - emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + ")] = " + - ChainName + ";"); + emitCode("SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, " + + utostr(ValNo) + ", ResNode, " + utostr(ValNo) + ");"); if (DoReplace) - emitCode("if (N.ResNo == 0) AddHandleReplacement(N.getValue(" - + utostr(ValNo) + "), " + ChainName + ");"); + emitCode("if (N.ResNo == 0) AddHandleReplacement(N.Val, " + + utostr(ValNo) + ", " + "ResNode, " + utostr(ValNo) + ");"); ValNo++; } @@ -2428,20 +2425,23 @@ if (FoldedChains.size() > 0) { std::string Code; for (unsigned j = 0, e = FoldedChains.size(); j < e; j++) - Code += "CodeGenMap[" + FoldedChains[j].first + ".getValue(" + - utostr(FoldedChains[j].second) + ")] = "; - emitCode(Code + ChainName + ";"); + emitCode("SelectionDAG::InsertISelMapEntry(CodeGenMap, " + + FoldedChains[j].first + ".Val, " + + utostr(FoldedChains[j].second) + ", ResNode, " + + utostr(ValNo) + ");"); for (unsigned j = 0, e = FoldedChains.size(); j < e; j++) { std::string Code = - FoldedChains[j].first + ".getValue(" + - utostr(FoldedChains[j].second) + ")"; - emitCode("AddHandleReplacement(" + Code + ", " + ChainName + ");"); + FoldedChains[j].first + ".Val, " + + utostr(FoldedChains[j].second) + ", "; + emitCode("AddHandleReplacement(" + Code + "ResNode, " + + utostr(ValNo) + ");"); } } if (NodeHasOutFlag) - emitCode("CodeGenMap[N.getValue(" + utostr(ValNo) + ")] = InFlag;"); + emitCode("SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, " + + utostr(ValNo) + ", InFlag.Val, InFlag.ResNo);"); if (AddedChain && NodeHasOutFlag) { if (NumExpectedResults == 0) { @@ -2483,7 +2483,9 @@ if (HasInFlag || HasImpInputs) Code += ", InFlag"; emitCode(Code + ");"); - emitCode(" Result = CodeGenMap[N] = SDOperand(ResNode, 0);"); + emitCode(" SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, N.ResNo, " + "ResNode, 0);"); + emitCode(" Result = SDOperand(ResNode, 0);"); emitCode("}"); } @@ -2498,7 +2500,9 @@ emitCode("Tmp" + utostr(ResNo) + " = Transform_" + Op->getName() + "(Tmp" + utostr(OpVal) + ".Val);"); if (isRoot) { - emitCode("CodeGenMap[N] = Tmp" +utostr(ResNo) + ";"); + emitCode("SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val," + "N.ResNo, Tmp" + utostr(ResNo) + ".Val, Tmp" + + utostr(ResNo) + ".ResNo);"); emitCode("Result = Tmp" + utostr(ResNo) + ";"); emitCode("return;"); } @@ -2571,13 +2575,12 @@ } emitCode("Select(" + RootName + utostr(OpNo) + ", " + RootName + utostr(OpNo) + ");"); - emitDecl("Copy", true); - emitCode("Copy = CurDAG->getCopyToReg(" + ChainName + + emitCode("ResNode = CurDAG->getCopyToReg(" + ChainName + ", CurDAG->getRegister(" + ISE.getQualifiedName(RR) + ", MVT::" + getEnumName(RVT) + "), " + RootName + utostr(OpNo) + ", InFlag).Val;"); - emitCode(ChainName + " = SDOperand(Copy, 0);"); - emitCode("InFlag = SDOperand(Copy, 1);"); + emitCode(ChainName + " = SDOperand(ResNode, 0);"); + emitCode("InFlag = SDOperand(ResNode, 1);"); } } } @@ -2882,10 +2885,10 @@ << " && N.getValue(0).hasOneUse()) {\n" << " SDOperand Dummy = " << "CurDAG->getNode(ISD::HANDLENODE, MVT::Other, N);\n" - << " CodeGenMap[N.getValue(" << OpcodeInfo.getNumResults() - << ")] = Dummy;\n" - << " HandleMap[N.getValue(" << OpcodeInfo.getNumResults() - << ")] = Dummy;\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, " + << OpcodeInfo.getNumResults() << ", Dummy.Val, 0);\n" + << " SelectionDAG::InsertISelMapEntry(HandleMap, N.Val, " + << OpcodeInfo.getNumResults() << ", Dummy.Val, 0);\n" << " Result = Dummy;\n" << " return;\n" << " }\n"; @@ -2980,8 +2983,8 @@ << " VTs.push_back(MVT::Other);\n" << " VTs.push_back(MVT::Flag);\n" << " SDOperand New = CurDAG->getNode(ISD::INLINEASM, VTs, Ops);\n" - << " CodeGenMap[N.getValue(0)] = New;\n" - << " CodeGenMap[N.getValue(1)] = New.getValue(1);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 0, New.Val, 0);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 1, New.Val, 1);\n" << " Result = New.getValue(N.ResNo);\n" << " return;\n" << "}\n\n"; @@ -3016,7 +3019,9 @@ << " case ISD::AssertZext: {\n" << " SDOperand Tmp0;\n" << " Select(Tmp0, N.getOperand(0));\n" - << " if (!N.Val->hasOneUse()) CodeGenMap[N] = Tmp0;\n" + << " if (!N.Val->hasOneUse())\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, N.ResNo, " + << "Tmp0.Val, Tmp0.ResNo);\n" << " Result = Tmp0;\n" << " return;\n" << " }\n" @@ -3025,8 +3030,10 @@ << " SDOperand Op0, Op1;\n" << " Select(Op0, N.getOperand(0));\n" << " Select(Op1, N.getOperand(1));\n" - << " Result = CodeGenMap[N] =\n" + << " Result = \n" << " CurDAG->getNode(ISD::TokenFactor, MVT::Other, Op0, Op1);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, N.ResNo, " + << "Result.Val, Result.ResNo);\n" << " } else {\n" << " std::vector Ops;\n" << " for (unsigned i = 0, e = N.getNumOperands(); i != e; ++i) {\n" @@ -3034,8 +3041,10 @@ << " Select(Val, N.getOperand(i));\n" << " Ops.push_back(Val);\n" << " }\n" - << " Result = CodeGenMap[N] = \n" - << " CurDAG->getNode(ISD::TokenFactor, MVT::Other, Ops);\n" + << " Result = \n" + << " CurDAG->getNode(ISD::TokenFactor, MVT::Other, Ops);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, N.ResNo, " + << "Result.Val, Result.ResNo);\n" << " }\n" << " return;\n" << " case ISD::CopyFromReg: {\n" @@ -3049,8 +3058,10 @@ << " return;\n" << " }\n" << " SDOperand New = CurDAG->getCopyFromReg(Chain, Reg, VT);\n" - << " CodeGenMap[N.getValue(0)] = New;\n" - << " CodeGenMap[N.getValue(1)] = New.getValue(1);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 0, " + << "New.Val, 0);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 1, " + << "New.Val, 1);\n" << " Result = New.getValue(N.ResNo);\n" << " return;\n" << " } else {\n" @@ -3062,9 +3073,12 @@ << " return;\n" << " }\n" << " SDOperand New = CurDAG->getCopyFromReg(Chain, Reg, VT, Flag);\n" - << " CodeGenMap[N.getValue(0)] = New;\n" - << " CodeGenMap[N.getValue(1)] = New.getValue(1);\n" - << " CodeGenMap[N.getValue(2)] = New.getValue(2);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 0, " + << "New.Val, 0);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 1, " + << "New.Val, 1);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 2, " + << "New.Val, 2);\n" << " Result = New.getValue(N.ResNo);\n" << " return;\n" << " }\n" @@ -3079,15 +3093,18 @@ << " if (N.Val->getNumValues() == 1) {\n" << " if (Chain != N.getOperand(0) || Val != N.getOperand(2))\n" << " Result = CurDAG->getCopyToReg(Chain, Reg, Val);\n" - << " CodeGenMap[N] = Result;\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 0, " + << "Result.Val, 0);\n" << " } else {\n" << " SDOperand Flag(0, 0);\n" << " if (N.getNumOperands() == 4) Select(Flag, N.getOperand(3));\n" << " if (Chain != N.getOperand(0) || Val != N.getOperand(2) ||\n" << " (N.getNumOperands() == 4 && Flag != N.getOperand(3)))\n" << " Result = CurDAG->getCopyToReg(Chain, Reg, Val, Flag);\n" - << " CodeGenMap[N.getValue(0)] = Result;\n" - << " CodeGenMap[N.getValue(1)] = Result.getValue(1);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 0, " + << "Result.Val, 0);\n" + << " SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 1, " + << "Result.Val, 1);\n" << " Result = Result.getValue(N.ResNo);\n" << " }\n" << " return;\n" @@ -3167,10 +3184,12 @@ OS << "\n"; OS << "// AddHandleReplacement - Note the pending replacement node for a\n" << "// handle node in ReplaceMap.\n"; - OS << "void AddHandleReplacement(SDOperand N, SDOperand R) {\n"; + OS << "void AddHandleReplacement(SDNode *H, unsigned HNum, SDNode *R, " + << "unsigned RNum) {\n"; + OS << " SDOperand N(H, HNum);\n"; OS << " std::map::iterator HMI = HandleMap.find(N);\n"; OS << " if (HMI != HandleMap.end()) {\n"; - OS << " ReplaceMap[HMI->second] = R;\n"; + OS << " ReplaceMap[HMI->second] = SDOperand(R, RNum);\n"; OS << " HandleMap.erase(N);\n"; OS << " }\n"; OS << "}\n"; @@ -3186,7 +3205,7 @@ OS << " SDOperand N = I->first;\n"; OS << " SDOperand R;\n"; OS << " Select(R, N.getValue(0));\n"; - OS << " AddHandleReplacement(N, R);\n"; + OS << " AddHandleReplacement(N.Val, N.ResNo, R.Val, R.ResNo);\n"; OS << " }\n"; OS << "}\n"; OS << "\n"; From evan.cheng at apple.com Thu Feb 9 16:13:04 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 16:13:04 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200602092213.QAA26397@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.45 -> 1.46 --- Log message: Match tblgen change. --- Diffs of the changes: (+2 -2) X86ISelDAGToDAG.cpp | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.45 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.46 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.45 Thu Feb 9 01:17:49 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Feb 9 16:12:53 2006 @@ -536,7 +536,7 @@ CodeGenMap[N.getValue(0)] = Result; if (foldedLoad) { CodeGenMap[N1.getValue(1)] = Result.getValue(1); - AddHandleReplacement(N1.getValue(1), Result.getValue(1)); + AddHandleReplacement(N1.Val, 1, Result.Val, 1); } return; @@ -637,7 +637,7 @@ CodeGenMap[N.getValue(0)] = Result; if (foldedLoad) { CodeGenMap[N1.getValue(1)] = Result.getValue(1); - AddHandleReplacement(N1.getValue(1), Result.getValue(1)); + AddHandleReplacement(N1.Val, 1, Result.Val, 1); } return; } From evan.cheng at apple.com Thu Feb 9 16:15:42 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 16:15:42 -0600 Subject: [llvm-commits] CVS: llvm-test/SingleSource/UnitTests/2005-07-17-INT-To-FP.c Message-ID: <200602092215.QAA26478@zion.cs.uiuc.edu> Changes in directory llvm-test/SingleSource/UnitTests: 2005-07-17-INT-To-FP.c updated: 1.6 -> 1.7 --- Log message: Fixing the test case: it was testing boundry cases where the result of conversion is different depending on whether x86 SSE or 387 instructions are used. --- Diffs of the changes: (+5 -5) 2005-07-17-INT-To-FP.c | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) Index: llvm-test/SingleSource/UnitTests/2005-07-17-INT-To-FP.c diff -u llvm-test/SingleSource/UnitTests/2005-07-17-INT-To-FP.c:1.6 llvm-test/SingleSource/UnitTests/2005-07-17-INT-To-FP.c:1.7 --- llvm-test/SingleSource/UnitTests/2005-07-17-INT-To-FP.c:1.6 Fri Jan 27 20:41:13 2006 +++ llvm-test/SingleSource/UnitTests/2005-07-17-INT-To-FP.c Thu Feb 9 16:15:29 2006 @@ -3,7 +3,7 @@ int tests[] = { 0x80000000, - -123456789, + -123456792, -10, -2, -1, @@ -11,8 +11,8 @@ 1, 2, 10, - 123456789, - 0x7FFFFFFF + 123456792, + 0x7FFFFF80 }; int main() { @@ -34,10 +34,10 @@ } // edge case tests for (i = 0; i < (sizeof(tests) / sizeof(int)); i++) { - printf("%d %f %f %f %f\n", i, + printf("%d %f %f %f\n", i, (double)(unsigned)tests[i], (double)( signed)tests[i], - (float) (unsigned)tests[i], + (float) tests[i], (float) ( signed)tests[i]); } } From lattner at cs.uiuc.edu Thu Feb 9 16:15:54 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 16:15:54 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602092215.QAA26485@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.8 -> 1.9 --- Log message: Simplify control flow a bit, note that unswitch preserves canonical loop form --- Diffs of the changes: (+12 -10) LoopUnswitch.cpp | 22 ++++++++++++---------- 1 files changed, 12 insertions(+), 10 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.8 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.9 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.8 Thu Feb 9 14:15:48 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Feb 9 16:15:42 2006 @@ -59,12 +59,13 @@ /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); AU.addRequired(); AU.addPreserved(); } private: - void VersionLoop(Value *LIC, Loop *L); + void VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2); BasicBlock *SplitBlock(BasicBlock *BB, bool SplitAtTop); void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, bool Val); }; @@ -172,11 +173,13 @@ } //std::cerr << "BEFORE:\n"; LI->dump(); - VersionLoop(BI->getCondition(), L); + Loop *First = 0, *Second = 0; + VersionLoop(BI->getCondition(), L, First, Second); //std::cerr << "AFTER:\n"; LI->dump(); - // FIXME: Why return here? What if we have: - // "for () { if (iv1) { if (iv2) { } } }" ? + // Try to unswitch each of our new loops now! + if (First) visitLoop(First); + if (Second) visitLoop(Second); return true; } @@ -247,8 +250,9 @@ /// VersionLoop - We determined that the loop is profitable to unswitch and /// contains a branch on a loop invariant condition. Split it into loop -/// versions and test the condition outside of either loop. -void LoopUnswitch::VersionLoop(Value *LIC, Loop *L) { +/// versions and test the condition outside of either loop. Return the loops +/// created as Out1/Out2. +void LoopUnswitch::VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2) { Function *F = L->getHeader()->getParent(); DEBUG(std::cerr << "loop-unswitch: Unswitching loop %" @@ -324,10 +328,8 @@ RewriteLoopBodyWithConditionConstant(L, LIC, true); RewriteLoopBodyWithConditionConstant(NewLoop, LIC, false); ++NumUnswitched; - - // Try to unswitch each of our new loops now! - visitLoop(L); - visitLoop(NewLoop); + Out1 = L; + Out2 = NewLoop; } // RewriteLoopBodyWithConditionConstant - We know that the boolean value LIC has From llvm at cs.uiuc.edu Thu Feb 9 16:48:31 2006 From: llvm at cs.uiuc.edu (LLVM) Date: Thu, 9 Feb 2006 16:48:31 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/Prolangs-C/dixie/Makefile add.c bind.c compare.c dixie.h error.c globals.c init.c list.c modify.c modrdn.c parse.c read.c remove.c search.c send.c test.c util.c version.c Message-ID: <200602092248.QAA26660@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/Prolangs-C/dixie: Makefile (r1.2) removed add.c (r1.1) removed bind.c (r1.1) removed compare.c (r1.1) removed dixie.h (r1.1) removed error.c (r1.1) removed globals.c (r1.1) removed init.c (r1.1) removed list.c (r1.1) removed modify.c (r1.1) removed modrdn.c (r1.1) removed parse.c (r1.1) removed read.c (r1.1) removed remove.c (r1.1) removed search.c (r1.1) removed send.c (r1.2) removed test.c (r1.1) removed util.c (r1.1) removed version.c (r1.1) removed --- Log message: Remove dixie. It doesn't work on many platforms (and not all that interesting.) --- Diffs of the changes: (+0 -0) 0 files changed From evan.cheng at apple.com Thu Feb 9 17:02:27 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 17:02:27 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/Prolangs-C/Makefile Message-ID: <200602092302.RAA26745@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/Prolangs-C: Makefile updated: 1.5 -> 1.6 --- Log message: Removed dixie --- Diffs of the changes: (+1 -1) Makefile | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm-test/MultiSource/Benchmarks/Prolangs-C/Makefile diff -u llvm-test/MultiSource/Benchmarks/Prolangs-C/Makefile:1.5 llvm-test/MultiSource/Benchmarks/Prolangs-C/Makefile:1.6 --- llvm-test/MultiSource/Benchmarks/Prolangs-C/Makefile:1.5 Mon Jan 9 15:24:45 2006 +++ llvm-test/MultiSource/Benchmarks/Prolangs-C/Makefile Thu Feb 9 17:02:14 2006 @@ -1,6 +1,6 @@ LEVEL = ../../.. PARALLEL_DIRS := TimberWolfMC agrep allroots archie-client assembler bison \ - cdecl compiler dixie fixoutput football gnugo loader \ + cdecl compiler fixoutput football gnugo loader \ plot2fig simulator unix-smail unix-tbl # Get the $(ARCH) setting From lattner at cs.uiuc.edu Thu Feb 9 19:24:21 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 19:24:21 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602100124.TAA27418@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.9 -> 1.10 --- Log message: Implement unconditional unswitching of 'trivial' loops, those loops that contain branches in their entry block that control whether or not the loop is a noop or not. --- Diffs of the changes: (+149 -18) LoopUnswitch.cpp | 167 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 149 insertions(+), 18 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.9 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.10 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.9 Thu Feb 9 16:15:42 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Feb 9 19:24:09 2006 @@ -65,9 +65,11 @@ } private: + unsigned getLoopUnswitchCost(Loop *L, Value *LIC); void VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2); BasicBlock *SplitBlock(BasicBlock *BB, bool SplitAtTop); void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, bool Val); + void UnswitchTrivialCondition(Loop *L, Value *Cond, ConstantBool *LoopCond); }; RegisterOpt X("loop-unswitch", "Unswitch loops"); } @@ -88,13 +90,8 @@ } -/// InsertPHINodesForUsesOutsideLoop - If this instruction is used outside of -/// the specified loop, insert a PHI node in the appropriate exit block to merge -/// the values in the two different loop versions. -/// -/// Most values are not used outside of the loop they are defined in, so be -/// efficient for this case. -/// +/// LoopValuesUsedOutsideLoop - Return true if there are any values defined in +/// the loop that are used by instructions outside of it. static bool LoopValuesUsedOutsideLoop(Loop *L) { // We will be doing lots of "loop contains block" queries. Loop::contains is // linear time, use a set to speed this up. @@ -117,6 +114,89 @@ return false; } +/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is +/// trivial: that is, that the condition controls whether or not the loop does +/// anything at all. If this is a trivial condition, unswitching produces no +/// code duplications (equivalently, it produces a simpler loop and a new empty +/// loop, which gets deleted). +/// +/// If this is a trivial condition, return ConstantBool::True if the loop body +/// runs when the condition is true, False if the loop body executes when the +/// condition is false. Otherwise, return null to indicate a complex condition. +static ConstantBool *IsTrivialUnswitchCondition(Loop *L, Value *Cond) { + BasicBlock *Header = L->getHeader(); + BranchInst *HeaderTerm = dyn_cast(Header->getTerminator()); + ConstantBool *RetVal = 0; + + // If the header block doesn't end with a conditional branch on Cond, we can't + // handle it. + if (!HeaderTerm || !HeaderTerm->isConditional() || + HeaderTerm->getCondition() != Cond) + return 0; + + // Check to see if the conditional branch goes to the latch block. If not, + // it's not trivial. This also determines the value of Cond that will execute + // the loop. + BasicBlock *Latch = L->getLoopLatch(); + if (HeaderTerm->getSuccessor(1) == Latch) + RetVal = ConstantBool::True; + else if (HeaderTerm->getSuccessor(0) == Latch) + RetVal = ConstantBool::False; + else + return 0; // Doesn't branch to latch block. + + // The latch block must end with a conditional branch where one edge goes to + // the header (this much we know) and one edge goes OUT of the loop. + BranchInst *LatchBranch = dyn_cast(Latch->getTerminator()); + if (!LatchBranch || !LatchBranch->isConditional()) return 0; + + if (LatchBranch->getSuccessor(0) == Header) { + if (L->contains(LatchBranch->getSuccessor(1))) return 0; + } else { + assert(LatchBranch->getSuccessor(1) == Header); + if (L->contains(LatchBranch->getSuccessor(0))) return 0; + } + + // We already know that nothing uses any scalar values defined inside of this + // loop. As such, we just have to check to see if this loop will execute any + // side-effecting instructions (e.g. stores, calls, volatile loads) in the + // part of the loop that the code *would* execute. + for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I) + if (I->mayWriteToMemory()) + return 0; + for (BasicBlock::iterator I = Latch->begin(), E = Latch->end(); I != E; ++I) + if (I->mayWriteToMemory()) + return 0; + return RetVal; +} + +/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if +/// we choose to unswitch the specified loop on the specified value. +/// +unsigned LoopUnswitch::getLoopUnswitchCost(Loop *L, Value *LIC) { + // If the condition is trivial, always unswitch. There is no code growth for + // this case. + if (IsTrivialUnswitchCondition(L, LIC)) + return 0; + + unsigned Cost = 0; + // FIXME: this is brain dead. It should take into consideration code + // shrinkage. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + // Do not include empty blocks in the cost calculation. This happen due to + // loop canonicalization and will be removed. + if (BB->begin() == BasicBlock::iterator(BB->getTerminator())) + continue; + + // Count basic blocks. + ++Cost; + } + + return Cost; +} + bool LoopUnswitch::visitLoop(Loop *L) { bool Changed = false; @@ -150,7 +230,7 @@ continue; // Check to see if it would be profitable to unswitch this loop. - if (L->getBlocks().size() > Threshold) { + if (getLoopUnswitchCost(L, BI->getCondition()) > Threshold) { // FIXME: this should estimate growth by the amount of code shared by the // resultant unswitched loops. This should have no code growth: // for () { if (iv) {...} } @@ -173,13 +253,22 @@ } //std::cerr << "BEFORE:\n"; LI->dump(); - Loop *First = 0, *Second = 0; - VersionLoop(BI->getCondition(), L, First, Second); + Loop *NewLoop1 = 0, *NewLoop2 = 0; + + // If this is a trivial condition to unswitch (which results in no code + // duplication), do it now. + if (ConstantBool *V = IsTrivialUnswitchCondition(L, BI->getCondition())) { + UnswitchTrivialCondition(L, BI->getCondition(), V); + NewLoop1 = L; + } else { + VersionLoop(BI->getCondition(), L, NewLoop1, NewLoop2); + } + //std::cerr << "AFTER:\n"; LI->dump(); // Try to unswitch each of our new loops now! - if (First) visitLoop(First); - if (Second) visitLoop(Second); + if (NewLoop1) visitLoop(NewLoop1); + if (NewLoop2) visitLoop(NewLoop2); return true; } @@ -193,6 +282,9 @@ /// /// This method updates the LoopInfo for this function to correctly reflect the /// CFG changes made. +/// +/// This routine returns the new basic block that was inserted, which is always +/// the later part of the block. BasicBlock *LoopUnswitch::SplitBlock(BasicBlock *BB, bool SplitAtTop) { BasicBlock::iterator SplitPoint; if (!SplitAtTop) @@ -201,12 +293,12 @@ SplitPoint = BB->begin(); while (isa(SplitPoint)) ++SplitPoint; } - + BasicBlock *New = BB->splitBasicBlock(SplitPoint, BB->getName()+".tail"); // New now lives in whichever loop that BB used to. if (Loop *L = LI->getLoopFor(BB)) L->addBasicBlockToLoop(New, *LI); - return SplitAtTop ? BB : New; + return New; } @@ -247,6 +339,42 @@ return New; } +/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable +/// condition in it (a cond branch from its header block to its latch block, +/// where the path through the loop that doesn't execute its body has no +/// side-effects), unswitch it. This doesn't involve any code duplication, just +/// moving the conditional branch outside of the loop and updating loop info. +void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, + ConstantBool *LoopCond) { + // First step, split the preahder, so that we know that there is a safe place + // to insert the conditional branch. We will change 'OrigPH' to have a + // conditional branch on Cond. + BasicBlock *OrigPH = L->getLoopPreheader(); + BasicBlock *NewPH = SplitBlock(OrigPH, false); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the non-header successor of the latch block. + BranchInst *LatchBranch =cast(L->getLoopLatch()->getTerminator()); + BasicBlock *ExitBlock = + LatchBranch->getSuccessor(LatchBranch->getSuccessor(0) == L->getHeader()); + assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); + + // Split this block now, so that the loop maintains its exit block. + BasicBlock *NewExit = SplitBlock(ExitBlock, true); + + // Okay, now we have a position to branch from and a position to branch to, + // insert the new conditional branch. + bool EnterOnTrue = LoopCond->getValue(); + new BranchInst(EnterOnTrue ? NewPH : NewExit, EnterOnTrue ? NewExit : NewPH, + Cond, OrigPH->getTerminator()); + OrigPH->getTerminator()->eraseFromParent(); + + // Now that we know that the loop is never entered when this condition is a + // particular value, rewrite the loop with this info. We know that this will + // at least eliminate the old branch. + RewriteLoopBodyWithConditionConstant(L, Cond, EnterOnTrue); +} + /// VersionLoop - We determined that the loop is profitable to unswitch and /// contains a branch on a loop invariant condition. Split it into loop @@ -254,7 +382,7 @@ /// created as Out1/Out2. void LoopUnswitch::VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2) { Function *F = L->getHeader()->getParent(); - + DEBUG(std::cerr << "loop-unswitch: Unswitching loop %" << L->getHeader()->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << F->getName() @@ -275,8 +403,10 @@ std::sort(ExitBlocks.begin(), ExitBlocks.end()); ExitBlocks.erase(std::unique(ExitBlocks.begin(), ExitBlocks.end()), ExitBlocks.end()); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - LoopBlocks.push_back(ExitBlocks[i] = SplitBlock(ExitBlocks[i], true)); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + SplitBlock(ExitBlocks[i], true); + LoopBlocks.push_back(ExitBlocks[i]); + } // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between @@ -337,6 +467,7 @@ // of properties correlated to it. void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, bool Val) { + assert(!isa(LIC) && "Why are we unswitching on a constant?"); // FIXME: Support correlated properties, like: // for (...) // if (li1 < li2) @@ -347,7 +478,7 @@ std::vector Users(LIC->use_begin(), LIC->use_end()); for (unsigned i = 0, e = Users.size(); i != e; ++i) - if (Instruction *U = dyn_cast(Users[i])) + if (Instruction *U = cast(Users[i])) if (L->contains(U->getParent())) U->replaceUsesOfWith(LIC, BoolVal); } From lattner at cs.uiuc.edu Thu Feb 9 19:36:47 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 19:36:47 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602100136.TAA27515@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.10 -> 1.11 --- Log message: When unswitching a trivial loop, do admit we are doing it! :) --- Diffs of the changes: (+7 -0) LoopUnswitch.cpp | 7 +++++++ 1 files changed, 7 insertions(+) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.10 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.11 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.10 Thu Feb 9 19:24:09 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Feb 9 19:36:35 2006 @@ -346,6 +346,11 @@ /// moving the conditional branch outside of the loop and updating loop info. void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, ConstantBool *LoopCond) { + DEBUG(std::cerr << "loop-unswitch: Trivial-Unswitch loop %" + << L->getHeader()->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << L->getHeader()->getParent()->getName() + << " on cond:" << *Cond << "\n"); + // First step, split the preahder, so that we know that there is a safe place // to insert the conditional branch. We will change 'OrigPH' to have a // conditional branch on Cond. @@ -373,6 +378,8 @@ // particular value, rewrite the loop with this info. We know that this will // at least eliminate the old branch. RewriteLoopBodyWithConditionConstant(L, Cond, EnterOnTrue); + + ++NumUnswitched; } From lattner at cs.uiuc.edu Thu Feb 9 20:01:35 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 20:01:35 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602100201.UAA27693@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.11 -> 1.12 --- Log message: Move code around to be more logical, no functionality change. --- Diffs of the changes: (+32 -26) LoopUnswitch.cpp | 58 ++++++++++++++++++++++++++++++------------------------- 1 files changed, 32 insertions(+), 26 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.11 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.12 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.11 Thu Feb 9 19:36:35 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Feb 9 20:01:22 2006 @@ -69,7 +69,8 @@ void VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2); BasicBlock *SplitBlock(BasicBlock *BB, bool SplitAtTop); void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, bool Val); - void UnswitchTrivialCondition(Loop *L, Value *Cond, ConstantBool *LoopCond); + void UnswitchTrivialCondition(Loop *L, Value *Cond, bool EntersLoopOnCond, + BasicBlock *ExitBlock); }; RegisterOpt X("loop-unswitch", "Unswitch loops"); } @@ -123,38 +124,41 @@ /// If this is a trivial condition, return ConstantBool::True if the loop body /// runs when the condition is true, False if the loop body executes when the /// condition is false. Otherwise, return null to indicate a complex condition. -static ConstantBool *IsTrivialUnswitchCondition(Loop *L, Value *Cond) { +static bool IsTrivialUnswitchCondition(Loop *L, Value *Cond, + bool *CondEntersLoop = 0, + BasicBlock **LoopExit = 0) { BasicBlock *Header = L->getHeader(); BranchInst *HeaderTerm = dyn_cast(Header->getTerminator()); - ConstantBool *RetVal = 0; // If the header block doesn't end with a conditional branch on Cond, we can't // handle it. if (!HeaderTerm || !HeaderTerm->isConditional() || HeaderTerm->getCondition() != Cond) - return 0; + return false; // Check to see if the conditional branch goes to the latch block. If not, // it's not trivial. This also determines the value of Cond that will execute // the loop. BasicBlock *Latch = L->getLoopLatch(); - if (HeaderTerm->getSuccessor(1) == Latch) - RetVal = ConstantBool::True; - else if (HeaderTerm->getSuccessor(0) == Latch) - RetVal = ConstantBool::False; + if (HeaderTerm->getSuccessor(1) == Latch) { + if (CondEntersLoop) *CondEntersLoop = true; + } else if (HeaderTerm->getSuccessor(0) == Latch) + if (CondEntersLoop) *CondEntersLoop = false; else - return 0; // Doesn't branch to latch block. + return false; // Doesn't branch to latch block. // The latch block must end with a conditional branch where one edge goes to // the header (this much we know) and one edge goes OUT of the loop. BranchInst *LatchBranch = dyn_cast(Latch->getTerminator()); - if (!LatchBranch || !LatchBranch->isConditional()) return 0; + if (!LatchBranch || !LatchBranch->isConditional()) return false; if (LatchBranch->getSuccessor(0) == Header) { - if (L->contains(LatchBranch->getSuccessor(1))) return 0; + if (L->contains(LatchBranch->getSuccessor(1))) return false; + if (LoopExit) *LoopExit = LatchBranch->getSuccessor(1); } else { assert(LatchBranch->getSuccessor(1) == Header); - if (L->contains(LatchBranch->getSuccessor(0))) return 0; + if (L->contains(LatchBranch->getSuccessor(0))) return false; + if (LoopExit) *LoopExit = LatchBranch->getSuccessor(0); } // We already know that nothing uses any scalar values defined inside of this @@ -163,11 +167,11 @@ // part of the loop that the code *would* execute. for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I) if (I->mayWriteToMemory()) - return 0; + return false; for (BasicBlock::iterator I = Latch->begin(), E = Latch->end(); I != E; ++I) if (I->mayWriteToMemory()) - return 0; - return RetVal; + return false; + return true; } /// getLoopUnswitchCost - Return the cost (code size growth) that will happen if @@ -257,8 +261,12 @@ // If this is a trivial condition to unswitch (which results in no code // duplication), do it now. - if (ConstantBool *V = IsTrivialUnswitchCondition(L, BI->getCondition())) { - UnswitchTrivialCondition(L, BI->getCondition(), V); + bool EntersLoopOnCond; + BasicBlock *ExitBlock; + if (IsTrivialUnswitchCondition(L, BI->getCondition(), &EntersLoopOnCond, + &ExitBlock)) { + UnswitchTrivialCondition(L, BI->getCondition(), + EntersLoopOnCond, ExitBlock); NewLoop1 = L; } else { VersionLoop(BI->getCondition(), L, NewLoop1, NewLoop2); @@ -345,7 +353,8 @@ /// side-effects), unswitch it. This doesn't involve any code duplication, just /// moving the conditional branch outside of the loop and updating loop info. void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, - ConstantBool *LoopCond) { + bool EnterOnCond, + BasicBlock *ExitBlock) { DEBUG(std::cerr << "loop-unswitch: Trivial-Unswitch loop %" << L->getHeader()->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << L->getHeader()->getParent()->getName() @@ -358,26 +367,23 @@ BasicBlock *NewPH = SplitBlock(OrigPH, false); // Now that we have a place to insert the conditional branch, create a place - // to branch to: this is the non-header successor of the latch block. - BranchInst *LatchBranch =cast(L->getLoopLatch()->getTerminator()); - BasicBlock *ExitBlock = - LatchBranch->getSuccessor(LatchBranch->getSuccessor(0) == L->getHeader()); - assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); + // to branch to: this is the exit block out of the loop that we should + // short-circuit to. // Split this block now, so that the loop maintains its exit block. + assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); BasicBlock *NewExit = SplitBlock(ExitBlock, true); // Okay, now we have a position to branch from and a position to branch to, // insert the new conditional branch. - bool EnterOnTrue = LoopCond->getValue(); - new BranchInst(EnterOnTrue ? NewPH : NewExit, EnterOnTrue ? NewExit : NewPH, + new BranchInst(EnterOnCond ? NewPH : NewExit, EnterOnCond ? NewExit : NewPH, Cond, OrigPH->getTerminator()); OrigPH->getTerminator()->eraseFromParent(); // Now that we know that the loop is never entered when this condition is a // particular value, rewrite the loop with this info. We know that this will // at least eliminate the old branch. - RewriteLoopBodyWithConditionConstant(L, Cond, EnterOnTrue); + RewriteLoopBodyWithConditionConstant(L, Cond, EnterOnCond); ++NumUnswitched; } From lattner at cs.uiuc.edu Thu Feb 9 20:30:48 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu, 9 Feb 2006 20:30:48 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602100230.UAA27942@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.12 -> 1.13 --- Log message: add some notes, move some code around. Implement unswitching of loops with branches on partially invariant computations. --- Diffs of the changes: (+64 -19) LoopUnswitch.cpp | 83 ++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 64 insertions(+), 19 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.12 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.13 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.12 Thu Feb 9 20:01:22 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Feb 9 20:30:37 2006 @@ -115,6 +115,27 @@ return false; } +/// FindTrivialLoopExitBlock - We know that we have a branch from the loop +/// header to the specified latch block. See if one of the successors of the +/// latch block is an exit, and if so what block it is. +static BasicBlock *FindTrivialLoopExitBlock(Loop *L, BasicBlock *Latch) { + BasicBlock *Header = L->getHeader(); + BranchInst *LatchBranch = dyn_cast(Latch->getTerminator()); + if (!LatchBranch || !LatchBranch->isConditional()) return 0; + + // Simple case, the latch block is a conditional branch. The target that + // doesn't go to the loop header is our block if it is not in the loop. + if (LatchBranch->getSuccessor(0) == Header) { + if (L->contains(LatchBranch->getSuccessor(1))) return false; + return LatchBranch->getSuccessor(1); + } else { + assert(LatchBranch->getSuccessor(1) == Header); + if (L->contains(LatchBranch->getSuccessor(0))) return false; + return LatchBranch->getSuccessor(0); + } +} + + /// IsTrivialUnswitchCondition - Check to see if this unswitch condition is /// trivial: that is, that the condition controls whether or not the loop does /// anything at all. If this is a trivial condition, unswitching produces no @@ -149,17 +170,9 @@ // The latch block must end with a conditional branch where one edge goes to // the header (this much we know) and one edge goes OUT of the loop. - BranchInst *LatchBranch = dyn_cast(Latch->getTerminator()); - if (!LatchBranch || !LatchBranch->isConditional()) return false; - - if (LatchBranch->getSuccessor(0) == Header) { - if (L->contains(LatchBranch->getSuccessor(1))) return false; - if (LoopExit) *LoopExit = LatchBranch->getSuccessor(1); - } else { - assert(LatchBranch->getSuccessor(1) == Header); - if (L->contains(LatchBranch->getSuccessor(0))) return false; - if (LoopExit) *LoopExit = LatchBranch->getSuccessor(0); - } + BasicBlock *LoopExitBlock = FindTrivialLoopExitBlock(L, Latch); + if (!LoopExitBlock) return 0; + if (LoopExit) *LoopExit = LoopExitBlock; // We already know that nothing uses any scalar values defined inside of this // loop. As such, we just have to check to see if this loop will execute any @@ -201,6 +214,32 @@ return Cost; } +/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is +/// invariant in the loop, or has an invariant piece, return the invariant. +/// Otherwise, return null. +static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { + // Constants should be folded, not unswitched on! + if (isa(Cond)) return false; + + // TODO: Handle: br (VARIANT|INVARIANT). + // TODO: Hoist simple expressions out of loops. + if (L->isLoopInvariant(Cond)) return Cond; + + if (BinaryOperator *BO = dyn_cast(Cond)) + if (BO->getOpcode() == Instruction::And || + BO->getOpcode() == Instruction::Or) { + // If either the left or right side is invariant, we can unswitch on this, + // which will cause the branch to go away in one loop and the condition to + // simplify in the other one. + if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed)) + return LHS; + if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed)) + return RHS; + } + + return 0; +} + bool LoopUnswitch::visitLoop(Loop *L) { bool Changed = false; @@ -217,6 +256,8 @@ for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { TerminatorInst *TI = (*I)->getTerminator(); + // FIXME: Handle invariant select instructions. + if (SwitchInst *SI = dyn_cast(TI)) { if (!isa(SI) && L->isLoopInvariant(SI->getCondition())) DEBUG(std::cerr << "TODO: Implement unswitching 'switch' loop %" @@ -229,12 +270,16 @@ if (!BI) continue; // If this isn't branching on an invariant condition, we can't unswitch it. - if (!BI->isConditional() || isa(BI->getCondition()) || - !L->isLoopInvariant(BI->getCondition())) + if (!BI->isConditional()) continue; + // See if this, or some part of it, is loop invariant. If so, we can + // unswitch on it if we desire. + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), L, Changed); + if (LoopCond == 0) continue; + // Check to see if it would be profitable to unswitch this loop. - if (getLoopUnswitchCost(L, BI->getCondition()) > Threshold) { + if (getLoopUnswitchCost(L, LoopCond) > Threshold) { // FIXME: this should estimate growth by the amount of code shared by the // resultant unswitched loops. This should have no code growth: // for () { if (iv) {...} } @@ -263,13 +308,11 @@ // duplication), do it now. bool EntersLoopOnCond; BasicBlock *ExitBlock; - if (IsTrivialUnswitchCondition(L, BI->getCondition(), &EntersLoopOnCond, - &ExitBlock)) { - UnswitchTrivialCondition(L, BI->getCondition(), - EntersLoopOnCond, ExitBlock); + if (IsTrivialUnswitchCondition(L, LoopCond, &EntersLoopOnCond, &ExitBlock)){ + UnswitchTrivialCondition(L, LoopCond, EntersLoopOnCond, ExitBlock); NewLoop1 = L; } else { - VersionLoop(BI->getCondition(), L, NewLoop1, NewLoop2); + VersionLoop(LoopCond, L, NewLoop1, NewLoop2); } //std::cerr << "AFTER:\n"; LI->dump(); @@ -489,6 +532,8 @@ // ... ConstantBool *BoolVal = ConstantBool::get(Val); + // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, + // selects, switches. std::vector Users(LIC->use_begin(), LIC->use_end()); for (unsigned i = 0, e = Users.size(); i != e; ++i) if (Instruction *U = cast(Users[i])) From evan.cheng at apple.com Thu Feb 9 23:49:07 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 9 Feb 2006 23:49:07 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/README.txt Message-ID: <200602100549.XAA29406@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: README.txt updated: 1.50 -> 1.51 --- Log message: Remove a completed entry; add a new entry about fisttp op --- Diffs of the changes: (+3 -4) README.txt | 7 +++---- 1 files changed, 3 insertions(+), 4 deletions(-) Index: llvm/lib/Target/X86/README.txt diff -u llvm/lib/Target/X86/README.txt:1.50 llvm/lib/Target/X86/README.txt:1.51 --- llvm/lib/Target/X86/README.txt:1.50 Wed Feb 8 11:47:22 2006 +++ llvm/lib/Target/X86/README.txt Thu Feb 9 23:48:15 2006 @@ -114,10 +114,6 @@ //===---------------------------------------------------------------------===// -Check if load folding would add a cycle in the dag. - -//===---------------------------------------------------------------------===// - Model X86 EFLAGS as a real register to avoid redudant cmp / test. e.g. cmpl $1, %eax @@ -427,3 +423,6 @@ When: "movl $_dst, _ptr" is sufficient. +//===---------------------------------------------------------------------===// + +Use fisttp to do FP to integer conversion whenever it is available. From lattner at cs.uiuc.edu Fri Feb 10 00:59:16 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 00:59:16 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcInstrInfo.td SparcRegisterInfo.td Message-ID: <200602100659.AAA29626@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcInstrInfo.td updated: 1.119 -> 1.120 SparcRegisterInfo.td updated: 1.28 -> 1.29 --- Log message: Update to new-style flags usage, simplifying the .td file --- Diffs of the changes: (+25 -36) SparcInstrInfo.td | 52 ++++++++++++++++++++++++--------------------------- SparcRegisterInfo.td | 9 -------- 2 files changed, 25 insertions(+), 36 deletions(-) Index: llvm/lib/Target/Sparc/SparcInstrInfo.td diff -u llvm/lib/Target/Sparc/SparcInstrInfo.td:1.119 llvm/lib/Target/Sparc/SparcInstrInfo.td:1.120 --- llvm/lib/Target/Sparc/SparcInstrInfo.td:1.119 Wed Feb 8 23:06:36 2006 +++ llvm/lib/Target/Sparc/SparcInstrInfo.td Fri Feb 10 00:58:25 2006 @@ -91,13 +91,11 @@ def CCOp : Operand; def SDTSPcmpfcc : -SDTypeProfile<1, 2, [SDTCisVT<0, FlagVT>, SDTCisFP<1>, SDTCisSameAs<1, 2>]>; +SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; def SDTSPbrcc : -SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, - SDTCisVT<2, FlagVT>]>; +SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; def SDTSPselectcc : -SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i32>, SDTCisVT<4, FlagVT>]>; +SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>; def SDTSPFTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; def SDTSPITOF : @@ -105,8 +103,8 @@ def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>; def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>; -def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain]>; -def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain]>; +def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>; +def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>; def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>; def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>; @@ -114,8 +112,8 @@ def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>; def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>; -def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc>; -def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc>; +def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>; +def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>; // These are target-independent nodes, but have target-specific formats. def SDT_SPCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; @@ -213,32 +211,32 @@ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_ICC PSEUDO!", [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F, - imm:$Cond, ICC))]>; + imm:$Cond))]>; def SELECT_CC_Int_FCC : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_FCC PSEUDO!", [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F, - imm:$Cond, FCC))]>; + imm:$Cond))]>; def SELECT_CC_FP_ICC : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_ICC PSEUDO!", [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F, - imm:$Cond, ICC))]>; + imm:$Cond))]>; def SELECT_CC_FP_FCC : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_FCC PSEUDO!", [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F, - imm:$Cond, FCC))]>; + imm:$Cond))]>; def SELECT_CC_DFP_ICC : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_ICC PSEUDO!", [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F, - imm:$Cond, ICC))]>; + imm:$Cond))]>; def SELECT_CC_DFP_FCC : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_FCC PSEUDO!", [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F, - imm:$Cond, FCC))]>; + imm:$Cond))]>; } @@ -605,7 +603,7 @@ // FIXME: the encoding for the JIT should look at the condition field. def BCOND : BranchSP<0, (ops brtarget:$dst, CCOp:$cc), "b$cc $dst", - [(SPbricc bb:$dst, imm:$cc, ICC)]>; + [(SPbricc bb:$dst, imm:$cc)]>; // Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121 @@ -622,7 +620,7 @@ // FIXME: the encoding for the JIT should look at the condition field. def FBCOND : FPBranchSP<0, (ops brtarget:$dst, CCOp:$cc), "fb$cc $dst", - [(SPbrfcc bb:$dst, imm:$cc, FCC)]>; + [(SPbrfcc bb:$dst, imm:$cc)]>; // Section B.24 - Call and Link Instruction, p. 125 @@ -767,11 +765,11 @@ def FCMPS : F3_3<2, 0b110101, 0b001010001, (ops FPRegs:$src1, FPRegs:$src2), "fcmps $src1, $src2\n\tnop", - [(set FCC, (SPcmpfcc FPRegs:$src1, FPRegs:$src2))]>; + [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>; def FCMPD : F3_3<2, 0b110101, 0b001010010, (ops DFPRegs:$src1, DFPRegs:$src2), "fcmpd $src1, $src2\n\tnop", - [(set FCC, (SPcmpfcc DFPRegs:$src1, DFPRegs:$src2))]>; + [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>; //===----------------------------------------------------------------------===// @@ -786,44 +784,44 @@ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, CCOp:$cc), "mov$cc %icc, $F, $dst", [(set IntRegs:$dst, - (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc, ICC))]>; + (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>; def MOVICCri : Pseudo<(ops IntRegs:$dst, IntRegs:$T, i32imm:$F, CCOp:$cc), "mov$cc %icc, $F, $dst", [(set IntRegs:$dst, - (SPselecticc simm11:$F, IntRegs:$T, imm:$cc, ICC))]>; + (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>; def MOVFCCrr : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, CCOp:$cc), "mov$cc %fcc0, $F, $dst", [(set IntRegs:$dst, - (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc, FCC))]>; + (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>; def MOVFCCri : Pseudo<(ops IntRegs:$dst, IntRegs:$T, i32imm:$F, CCOp:$cc), "mov$cc %fcc0, $F, $dst", [(set IntRegs:$dst, - (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc, FCC))]>; + (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>; def FMOVS_ICC : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, CCOp:$cc), "fmovs$cc %icc, $F, $dst", [(set FPRegs:$dst, - (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc, ICC))]>; + (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>; def FMOVD_ICC : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, CCOp:$cc), "fmovd$cc %icc, $F, $dst", [(set DFPRegs:$dst, - (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc, ICC))]>; + (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; def FMOVS_FCC : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, CCOp:$cc), "fmovs$cc %fcc0, $F, $dst", [(set FPRegs:$dst, - (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc, FCC))]>; + (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>; def FMOVD_FCC : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, CCOp:$cc), "fmovd$cc %fcc0, $F, $dst", [(set DFPRegs:$dst, - (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc, FCC))]>; + (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; } Index: llvm/lib/Target/Sparc/SparcRegisterInfo.td diff -u llvm/lib/Target/Sparc/SparcRegisterInfo.td:1.28 llvm/lib/Target/Sparc/SparcRegisterInfo.td:1.29 --- llvm/lib/Target/Sparc/SparcRegisterInfo.td:1.28 Sat Feb 4 23:50:24 2006 +++ llvm/lib/Target/Sparc/SparcRegisterInfo.td Fri Feb 10 00:58:25 2006 @@ -68,15 +68,6 @@ def D12 : Rd<24, "F24", [F24, F25]>; def D13 : Rd<26, "F26", [F26, F27]>; def D14 : Rd<28, "F28", [F28, F29]>; def D15 : Rd<30, "F30", [F30, F31]>; -/// Integer and FP Condition codes. -let Namespace = "SP" in { - def ICC : Register<"ICC">; - def FCC : Register<"FCC">; -} -def FLAGS_REGS : RegisterClass<"SP", [FlagVT], 32, [ICC, FCC]> { - let Size = 32; -} - // Register classes. // // FIXME: the register order should be defined in terms of the preferred From lattner at cs.uiuc.edu Fri Feb 10 01:36:34 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 01:36:34 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcAsmPrinter.cpp SparcISelDAGToDAG.cpp SparcInstrInfo.td Message-ID: <200602100736.BAA29923@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcAsmPrinter.cpp updated: 1.53 -> 1.54 SparcISelDAGToDAG.cpp updated: 1.82 -> 1.83 SparcInstrInfo.td updated: 1.120 -> 1.121 --- Log message: Use the auto-generated call matcher. Remove a broken impl of the frameaddr/returnaddr intrinsics. Autogen frameindex matcher --- Diffs of the changes: (+34 -61) SparcAsmPrinter.cpp | 14 ++++++++-- SparcISelDAGToDAG.cpp | 67 ++++++++------------------------------------------ SparcInstrInfo.td | 14 +++++++--- 3 files changed, 34 insertions(+), 61 deletions(-) Index: llvm/lib/Target/Sparc/SparcAsmPrinter.cpp diff -u llvm/lib/Target/Sparc/SparcAsmPrinter.cpp:1.53 llvm/lib/Target/Sparc/SparcAsmPrinter.cpp:1.54 --- llvm/lib/Target/Sparc/SparcAsmPrinter.cpp:1.53 Sat Feb 4 23:50:24 2006 +++ llvm/lib/Target/Sparc/SparcAsmPrinter.cpp Fri Feb 10 01:35:42 2006 @@ -57,7 +57,8 @@ } void printOperand(const MachineInstr *MI, int opNum); - void printMemOperand(const MachineInstr *MI, int opNum); + void printMemOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); void printCCOperand(const MachineInstr *MI, int opNum); bool printInstruction(const MachineInstr *MI); // autogenerated. @@ -189,8 +190,17 @@ if (CloseParen) O << ")"; } -void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum) { +void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + const char *Modifier) { printOperand(MI, opNum); + + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + O << ", "; + printOperand(MI, opNum+1); + return; + } + MachineOperand::MachineOperandType OpTy = MI->getOperand(opNum+1).getType(); if ((OpTy == MachineOperand::MO_VirtualRegister || Index: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp diff -u llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.82 llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.83 --- llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.82 Thu Feb 9 01:17:49 2006 +++ llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Fri Feb 10 01:35:42 2006 @@ -111,9 +111,6 @@ unsigned CC, bool isTailCall, SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG); - virtual std::pair - LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth, - SelectionDAG &DAG); virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI, MachineBasicBlock *MBB); @@ -595,8 +592,11 @@ // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); std::vector NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain @@ -653,13 +653,6 @@ return std::make_pair(RetVal, Chain); } -std::pair SparcTargetLowering:: -LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth, - SelectionDAG &DAG) { - assert(0 && "Unimp"); - abort(); -} - // Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so // set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition. static void LookThroughSetCC(SDOperand &LHS, SDOperand &RHS, @@ -974,6 +967,9 @@ Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { @@ -1007,7 +1003,11 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDOperand Addr, SDOperand &R1, SDOperand &R2) { - if (Addr.getOpcode() == ISD::FrameIndex) return false; + if (Addr.getOpcode() == ISD::FrameIndex) return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. + if (Addr.getOpcode() == ISD::ADD) { if (isa(Addr.getOperand(1)) && Predicate_simm13(Addr.getOperand(1).Val)) @@ -1042,21 +1042,6 @@ switch (N->getOpcode()) { default: break; - case ISD::FrameIndex: { - int FI = cast(N)->getIndex(); - if (N->hasOneUse()) { - Result = CurDAG->SelectNodeTo(N, SP::ADDri, MVT::i32, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - CurDAG->getTargetConstant(0, MVT::i32)); - return; - } - - Result = CodeGenMap[Op] = - SDOperand(CurDAG->getTargetNode(SP::ADDri, MVT::i32, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - CurDAG->getTargetConstant(0, MVT::i32)), 0); - return; - } case ISD::ADD_PARTS: { SDOperand LHSL, LHSH, RHSL, RHSH; Select(LHSL, N->getOperand(0)); @@ -1123,39 +1108,11 @@ Select(MulRHS, N->getOperand(1)); unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr; SDNode *Mul = CurDAG->getTargetNode(Opcode, MVT::i32, MVT::Flag, - MulLHS, MulRHS); + MulLHS, MulRHS); // The high part is in the Y register. Result = CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDOperand(Mul, 1)); return; } - case SPISD::CALL: - // FIXME: This is a workaround for a bug in tblgen. - { // Pattern #47: (call:Flag (tglobaladdr:i32):$dst, ICC:Flag) - // Emits: (CALL:void (tglobaladdr:i32):$dst) - // Pattern complexity = 2 cost = 1 - SDOperand N1 = N->getOperand(1); - if (N1.getOpcode() != ISD::TargetGlobalAddress && - N1.getOpcode() != ISD::ExternalSymbol) goto P47Fail; - SDOperand InFlag = SDOperand(0, 0); - SDOperand Chain = N->getOperand(0); - SDOperand Tmp0 = N1; - Select(Chain, Chain); - SDNode *ResNode; - if (N->getNumOperands() == 3) { - Select(InFlag, N->getOperand(2)); - ResNode = CurDAG->getTargetNode(SP::CALL, MVT::Other, MVT::Flag, Tmp0, - Chain, InFlag); - } else { - ResNode = CurDAG->getTargetNode(SP::CALL, MVT::Other, MVT::Flag, Tmp0, - Chain); - } - Chain = CodeGenMap[SDOperand(N, 0)] = SDOperand(ResNode, 0); - CodeGenMap[SDOperand(N, 1)] = SDOperand(ResNode, 1); - Result = SDOperand(ResNode, Op.ResNo); - return; - } - P47Fail:; - } SelectCode(Result, Op); Index: llvm/lib/Target/Sparc/SparcInstrInfo.td diff -u llvm/lib/Target/Sparc/SparcInstrInfo.td:1.120 llvm/lib/Target/Sparc/SparcInstrInfo.td:1.121 --- llvm/lib/Target/Sparc/SparcInstrInfo.td:1.120 Fri Feb 10 00:58:25 2006 +++ llvm/lib/Target/Sparc/SparcInstrInfo.td Fri Feb 10 01:35:42 2006 @@ -68,7 +68,7 @@ // Addressing modes. def ADDRrr : ComplexPattern; -def ADDRri : ComplexPattern; +def ADDRri : ComplexPattern; // Address operands def MEMrr : Operand { @@ -444,6 +444,13 @@ (ops IntRegs:$dst, IntRegs:$b, i32imm:$c), "add $b, $c, $dst", [(set IntRegs:$dst, (add IntRegs:$b, simm13:$c))]>; + +// "LEA" forms of add (patterns to make tblgen happy) +def LEA_ADDri : F3_2<2, 0b000000, + (ops IntRegs:$dst, MEMri:$addr), + "add ${addr:arith}, $dst", + [(set IntRegs:$dst, ADDRri:$addr)]>; + def ADDCCrr : F3_1<2, 0b010000, (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c), "addcc $b, $c, $dst", []>; @@ -871,12 +878,11 @@ def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)), (ADDri IntRegs:$r, tconstpool:$in)>; - // Calls: def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; -def : Pat<(call externalsym:$dst), - (CALL externalsym:$dst)>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>; def : Pat<(ret), (RETL)>; From lattner at cs.uiuc.edu Fri Feb 10 13:08:27 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 13:08:27 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602101908.NAA10439@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.13 -> 1.14 --- Log message: Fix a case where UnswitchTrivialCondition broke critical edges with phi's in the successors --- Diffs of the changes: (+24 -1) LoopUnswitch.cpp | 25 ++++++++++++++++++++++++- 1 files changed, 24 insertions(+), 1 deletion(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.13 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.14 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.13 Thu Feb 9 20:30:37 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Fri Feb 10 13:08:15 2006 @@ -34,6 +34,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" @@ -255,6 +256,8 @@ // loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { + for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); + BBI != E; ++BBI) TerminatorInst *TI = (*I)->getTerminator(); // FIXME: Handle invariant select instructions. @@ -415,7 +418,27 @@ // Split this block now, so that the loop maintains its exit block. assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); - BasicBlock *NewExit = SplitBlock(ExitBlock, true); + BasicBlock *NewExit; + if (BasicBlock *SinglePred = ExitBlock->getSinglePredecessor()) { + assert(SinglePred == L->getLoopLatch() && "Unexpected case"); + NewExit = SplitBlock(ExitBlock, true); + } else { + // Otherwise, this is a critical edge. Split block would split the wrong + // edge here, so we use SplitCriticalEdge, which allows us to specify the + // edge to split, not just the block. + TerminatorInst *LatchTerm = L->getLoopLatch()->getTerminator(); + unsigned SuccNum = 0; + for (unsigned i = 0, e = LatchTerm->getNumSuccessors(); ; ++i) { + assert(i != e && "Didn't find edge?"); + if (LatchTerm->getSuccessor(i) == ExitBlock) { + SuccNum = i; + break; + } + } + SplitCriticalEdge(LatchTerm, SuccNum, this); + NewExit = LatchTerm->getSuccessor(SuccNum); + assert(NewExit != ExitBlock && "Edge not split!"); + } // Okay, now we have a position to branch from and a position to branch to, // insert the new conditional branch. From lattner at cs.uiuc.edu Fri Feb 10 15:32:23 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 15:32:23 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h Message-ID: <200602102132.PAA11554@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Transforms/Utils: BasicBlockUtils.h updated: 1.10 -> 1.11 --- Log message: Remove a level of indirection. --- Diffs of the changes: (+8 -9) BasicBlockUtils.h | 17 ++++++++--------- 1 files changed, 8 insertions(+), 9 deletions(-) Index: llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h diff -u llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h:1.10 llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h:1.11 --- llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h:1.10 Wed Aug 17 01:34:37 2005 +++ llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h Fri Feb 10 15:32:11 2006 @@ -76,20 +76,19 @@ /// This updates all of the same analyses as the other SplitCriticalEdge /// function. inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI, Pass *P = 0) { - BasicBlock *Pred = *PI; bool MadeChange = false; - for (succ_iterator SI = succ_begin(Pred), E = succ_end(Pred); SI != E; ++SI) - if (*SI == Succ) - MadeChange |= SplitCriticalEdge(Pred, SI, P); + TerminatorInst *TI = (*PI)->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == Succ) + MadeChange |= SplitCriticalEdge(TI, i, P); return MadeChange; } inline bool SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst, Pass *P = 0) { - for (succ_iterator SI = succ_begin(Src); ; ++SI) { - assert(SI != succ_end(Src) && "Edge doesn't exist"); - if (*SI == Dst) - return SplitCriticalEdge(Src, SI, P); - } + TerminatorInst *TI = Src->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); ; ++i) + if (TI->getSuccessor(i) == Dst) + return SplitCriticalEdge(TI, i, P); } } // End llvm namespace From evan.cheng at apple.com Fri Feb 10 16:24:44 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 10 Feb 2006 16:24:44 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200602102224.QAA11858@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.46 -> 1.47 --- Log message: Added X86 isel debugging stuff. --- Diffs of the changes: (+63 -0) X86ISelDAGToDAG.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 63 insertions(+) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.46 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.47 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.46 Thu Feb 9 16:12:53 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Feb 10 16:24:32 2006 @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "isel" #include "X86.h" #include "X86InstrBuilder.h" #include "X86RegisterInfo.h" @@ -143,6 +144,8 @@ inline SDOperand getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } + + std::string Indent; }; } @@ -153,7 +156,14 @@ MachineFunction::iterator FirstMBB = BB; // Codegen the basic block. +#ifndef NDEBUG + DEBUG(std::cerr << "===== Instruction selection begins:\n"); + Indent = ""; +#endif DAG.setRoot(SelectRoot(DAG.getRoot())); +#ifndef NDEBUG + DEBUG(std::cerr << "===== Instruction selection ends:\n"); +#endif CodeGenMap.clear(); DAG.RemoveDeadNodes(); @@ -451,14 +461,37 @@ unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); +#ifndef NDEBUG + std::string IndentSave = Indent; + DEBUG(std::cerr << Indent); + DEBUG(std::cerr << "Selecting: "); + DEBUG(Node->dump(CurDAG)); + DEBUG(std::cerr << "\n"); + Indent += " "; +#endif + if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { Result = N; +#ifndef NDEBUG + DEBUG(std::cerr << Indent); + DEBUG(std::cerr << "== "); + DEBUG(Node->dump(CurDAG)); + DEBUG(std::cerr << "\n"); + Indent = IndentSave; +#endif return; // Already selected. } std::map::iterator CGMI = CodeGenMap.find(N); if (CGMI != CodeGenMap.end()) { Result = CGMI->second; +#ifndef NDEBUG + DEBUG(std::cerr << Indent); + DEBUG(std::cerr << "== "); + DEBUG(Result.Val->dump(CurDAG)); + DEBUG(std::cerr << "\n"); + Indent = IndentSave; +#endif return; } @@ -539,6 +572,13 @@ AddHandleReplacement(N1.Val, 1, Result.Val, 1); } +#ifndef NDEBUG + DEBUG(std::cerr << Indent); + DEBUG(std::cerr << "== "); + DEBUG(Result.Val->dump(CurDAG)); + DEBUG(std::cerr << "\n"); + Indent = IndentSave; +#endif return; } @@ -639,6 +679,14 @@ CodeGenMap[N1.getValue(1)] = Result.getValue(1); AddHandleReplacement(N1.Val, 1, Result.Val, 1); } + +#ifndef NDEBUG + DEBUG(std::cerr << Indent); + DEBUG(std::cerr << "== "); + DEBUG(Result.Val->dump(CurDAG)); + DEBUG(std::cerr << "\n"); + Indent = IndentSave; +#endif return; } @@ -670,11 +718,26 @@ else Result = CodeGenMap[N] = SDOperand(CurDAG->getTargetNode(Opc, VT, Result), 0); + +#ifndef NDEBUG + DEBUG(std::cerr << Indent); + DEBUG(std::cerr << "== "); + DEBUG(Result.Val->dump(CurDAG)); + DEBUG(std::cerr << "\n"); + Indent = IndentSave; +#endif return; } } SelectCode(Result, N); +#ifndef NDEBUG + DEBUG(std::cerr << Indent); + DEBUG(std::cerr << "=> "); + DEBUG(Result.Val->dump(CurDAG)); + DEBUG(std::cerr << "\n"); + Indent = IndentSave; +#endif } /// createX86ISelDag - This pass converts a legalized DAG into a From evan.cheng at apple.com Fri Feb 10 16:46:38 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 10 Feb 2006 16:46:38 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200602102246.QAA11945@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.47 -> 1.48 --- Log message: Nicer code. :-) --- Diffs of the changes: (+18 -17) X86ISelDAGToDAG.cpp | 35 ++++++++++++++++++----------------- 1 files changed, 18 insertions(+), 17 deletions(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.47 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.48 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.47 Fri Feb 10 16:24:32 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Feb 10 16:46:26 2006 @@ -145,7 +145,9 @@ return CurDAG->getTargetConstant(Imm, MVT::i32); } - std::string Indent; +#ifndef NDEBUG + unsigned Indent; +#endif }; } @@ -158,7 +160,7 @@ // Codegen the basic block. #ifndef NDEBUG DEBUG(std::cerr << "===== Instruction selection begins:\n"); - Indent = ""; + Indent = 0; #endif DAG.setRoot(SelectRoot(DAG.getRoot())); #ifndef NDEBUG @@ -462,22 +464,21 @@ unsigned Opcode = Node->getOpcode(); #ifndef NDEBUG - std::string IndentSave = Indent; - DEBUG(std::cerr << Indent); + DEBUG(std::cerr << std::string(Indent, ' ')); DEBUG(std::cerr << "Selecting: "); DEBUG(Node->dump(CurDAG)); DEBUG(std::cerr << "\n"); - Indent += " "; + Indent += 2; #endif if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { Result = N; #ifndef NDEBUG - DEBUG(std::cerr << Indent); + DEBUG(std::cerr << std::string(Indent, ' ')); DEBUG(std::cerr << "== "); DEBUG(Node->dump(CurDAG)); DEBUG(std::cerr << "\n"); - Indent = IndentSave; + Indent -= 2; #endif return; // Already selected. } @@ -486,11 +487,11 @@ if (CGMI != CodeGenMap.end()) { Result = CGMI->second; #ifndef NDEBUG - DEBUG(std::cerr << Indent); + DEBUG(std::cerr << std::string(Indent, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); - Indent = IndentSave; + Indent -= 2; #endif return; } @@ -573,11 +574,11 @@ } #ifndef NDEBUG - DEBUG(std::cerr << Indent); + DEBUG(std::cerr << std::string(Indent, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); - Indent = IndentSave; + Indent -= 2; #endif return; } @@ -681,11 +682,11 @@ } #ifndef NDEBUG - DEBUG(std::cerr << Indent); + DEBUG(std::cerr << std::string(Indent, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); - Indent = IndentSave; + Indent -= 2; #endif return; } @@ -720,11 +721,11 @@ SDOperand(CurDAG->getTargetNode(Opc, VT, Result), 0); #ifndef NDEBUG - DEBUG(std::cerr << Indent); + DEBUG(std::cerr << std::string(Indent, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); - Indent = IndentSave; + Indent -= 2; #endif return; } @@ -732,11 +733,11 @@ SelectCode(Result, N); #ifndef NDEBUG - DEBUG(std::cerr << Indent); + DEBUG(std::cerr << std::string(Indent, ' ')); DEBUG(std::cerr << "=> "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); - Indent = IndentSave; + Indent -= 2; #endif } From lattner at cs.uiuc.edu Fri Feb 10 17:16:51 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 17:16:51 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602102316.RAA12097@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.14 -> 1.15 --- Log message: Reform the unswitching code in terms of edge splitting, not block splitting. --- Diffs of the changes: (+67 -49) LoopUnswitch.cpp | 116 +++++++++++++++++++++++++++++++------------------------ 1 files changed, 67 insertions(+), 49 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.14 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.15 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.14 Fri Feb 10 13:08:15 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Fri Feb 10 17:16:39 2006 @@ -68,7 +68,7 @@ private: unsigned getLoopUnswitchCost(Loop *L, Value *LIC); void VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2); - BasicBlock *SplitBlock(BasicBlock *BB, bool SplitAtTop); + BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To); void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, bool Val); void UnswitchTrivialCondition(Loop *L, Value *Cond, bool EntersLoopOnCond, BasicBlock *ExitBlock); @@ -256,8 +256,6 @@ // loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { - for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); - BBI != E; ++BBI) TerminatorInst *TI = (*I)->getTerminator(); // FIXME: Handle invariant select instructions. @@ -329,31 +327,49 @@ return Changed; } -/// SplitBlock - Split the specified basic block into two pieces. If SplitAtTop -/// is false, this splits the block so the second half only has an unconditional -/// branch. If SplitAtTop is true, it makes it so the first half of the block -/// only has an unconditional branch in it. -/// -/// This method updates the LoopInfo for this function to correctly reflect the -/// CFG changes made. -/// -/// This routine returns the new basic block that was inserted, which is always -/// the later part of the block. -BasicBlock *LoopUnswitch::SplitBlock(BasicBlock *BB, bool SplitAtTop) { +BasicBlock *LoopUnswitch::SplitEdge(BasicBlock *BB, BasicBlock *Succ) { + TerminatorInst *LatchTerm = BB->getTerminator(); + unsigned SuccNum = 0; + for (unsigned i = 0, e = LatchTerm->getNumSuccessors(); ; ++i) { + assert(i != e && "Didn't find edge?"); + if (LatchTerm->getSuccessor(i) == Succ) { + SuccNum = i; + break; + } + } + + // If this is a critical edge, let SplitCriticalEdge do it. + if (SplitCriticalEdge(BB->getTerminator(), SuccNum, this)) + return LatchTerm->getSuccessor(SuccNum); + + // If the edge isn't critical, then BB has a single successor or Succ has a + // single pred. Split the block. + BasicBlock *BlockToSplit; BasicBlock::iterator SplitPoint; - if (!SplitAtTop) + if (BasicBlock *SP = Succ->getSinglePredecessor()) { + // If the successor only has a single pred, split the top of the successor + // block. + assert(SP == BB && "CFG broken"); + BlockToSplit = Succ; + SplitPoint = Succ->begin(); + } else { + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + BlockToSplit = BB; SplitPoint = BB->getTerminator(); - else { - SplitPoint = BB->begin(); - while (isa(SplitPoint)) ++SplitPoint; } - - BasicBlock *New = BB->splitBasicBlock(SplitPoint, BB->getName()+".tail"); + + BasicBlock *New = + BlockToSplit->splitBasicBlock(SplitPoint, + BlockToSplit->getName()+".tail"); // New now lives in whichever loop that BB used to. - if (Loop *L = LI->getLoopFor(BB)) + if (Loop *L = LI->getLoopFor(BlockToSplit)) L->addBasicBlockToLoop(New, *LI); return New; } + // RemapInstruction - Convert the instruction operands from referencing the @@ -406,40 +422,21 @@ << " blocks] in Function " << L->getHeader()->getParent()->getName() << " on cond:" << *Cond << "\n"); - // First step, split the preahder, so that we know that there is a safe place + // First step, split the preheader, so that we know that there is a safe place // to insert the conditional branch. We will change 'OrigPH' to have a // conditional branch on Cond. BasicBlock *OrigPH = L->getLoopPreheader(); - BasicBlock *NewPH = SplitBlock(OrigPH, false); + BasicBlock *NewPH = SplitEdge(OrigPH, L->getHeader()); // Now that we have a place to insert the conditional branch, create a place // to branch to: this is the exit block out of the loop that we should // short-circuit to. - // Split this block now, so that the loop maintains its exit block. + // Split this edge now, so that the loop maintains its exit block. assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); - BasicBlock *NewExit; - if (BasicBlock *SinglePred = ExitBlock->getSinglePredecessor()) { - assert(SinglePred == L->getLoopLatch() && "Unexpected case"); - NewExit = SplitBlock(ExitBlock, true); - } else { - // Otherwise, this is a critical edge. Split block would split the wrong - // edge here, so we use SplitCriticalEdge, which allows us to specify the - // edge to split, not just the block. - TerminatorInst *LatchTerm = L->getLoopLatch()->getTerminator(); - unsigned SuccNum = 0; - for (unsigned i = 0, e = LatchTerm->getNumSuccessors(); ; ++i) { - assert(i != e && "Didn't find edge?"); - if (LatchTerm->getSuccessor(i) == ExitBlock) { - SuccNum = i; - break; - } - } - SplitCriticalEdge(LatchTerm, SuccNum, this); - NewExit = LatchTerm->getSuccessor(SuccNum); - assert(NewExit != ExitBlock && "Edge not split!"); - } - + BasicBlock *NewExit = SplitEdge(L->getLoopLatch(), ExitBlock); + assert(NewExit != ExitBlock && "Edge not split!"); + // Okay, now we have a position to branch from and a position to branch to, // insert the new conditional branch. new BranchInst(EnterOnCond ? NewPH : NewExit, EnterOnCond ? NewExit : NewPH, @@ -467,12 +464,15 @@ << " blocks] in Function " << F->getName() << " on cond:" << *LIC << "\n"); + // LoopBlocks contains all of the basic blocks of the loop, including the + // preheader of the loop, the body of the loop, and the exit blocks of the + // loop, in that order. std::vector LoopBlocks; // First step, split the preheader and exit blocks, and add these blocks to // the LoopBlocks list. BasicBlock *OrigPreheader = L->getLoopPreheader(); - LoopBlocks.push_back(SplitBlock(OrigPreheader, false)); + LoopBlocks.push_back(SplitEdge(OrigPreheader, L->getHeader())); // We want the loop to come after the preheader, but before the exit blocks. LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); @@ -482,10 +482,28 @@ std::sort(ExitBlocks.begin(), ExitBlocks.end()); ExitBlocks.erase(std::unique(ExitBlocks.begin(), ExitBlocks.end()), ExitBlocks.end()); + // Split all of the edges from inside the loop to their exit blocks. This + // unswitching trivial: no phi nodes to update. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - SplitBlock(ExitBlocks[i], true); - LoopBlocks.push_back(ExitBlocks[i]); + BasicBlock *ExitBlock = ExitBlocks[i]; + std::vector Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); + + for (unsigned j = 0, e = Preds.size(); j != e; ++j) { + assert(L->contains(Preds[j]) && + "All preds of loop exit blocks must be the same loop!"); + SplitEdge(Preds[j], ExitBlock); + } } + + // The exit blocks may have been changed due to edge splitting, recompute. + ExitBlocks.clear(); + L->getExitBlocks(ExitBlocks); + std::sort(ExitBlocks.begin(), ExitBlocks.end()); + ExitBlocks.erase(std::unique(ExitBlocks.begin(), ExitBlocks.end()), + ExitBlocks.end()); + + // Add exit blocks to the loop blocks. + LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between From lattner at cs.uiuc.edu Fri Feb 10 17:26:25 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 17:26:25 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602102326.RAA12184@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.15 -> 1.16 --- Log message: Update PHI nodes in successors of exit blocks. --- Diffs of the changes: (+34 -5) LoopUnswitch.cpp | 39 ++++++++++++++++++++++++++++++++++----- 1 files changed, 34 insertions(+), 5 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.15 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.16 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.15 Fri Feb 10 17:16:39 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Fri Feb 10 17:26:14 2006 @@ -256,6 +256,16 @@ // loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { + for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); + BBI != E; ++BBI) + if (SelectInst *SI = dyn_cast(BBI)) { + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), L, Changed); + if (LoopCond == 0) continue; + + //if (UnswitchIfProfitable(LoopCond, + std::cerr << "LOOP INVARIANT SELECT: " << *SI; + } + TerminatorInst *TI = (*I)->getTerminator(); // FIXME: Handle invariant select instructions. @@ -523,13 +533,32 @@ // Now we create the new Loop object for the versioned loop. Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI); - if (Loop *Parent = L->getParentLoop()) { + Loop *ParentLoop = L->getParentLoop(); + if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop // as well. - Parent->addBasicBlockToLoop(NewBlocks[0], *LI); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - Parent->addBasicBlockToLoop(cast(ValueMap[ExitBlocks[i]]), - *LI); + ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); + } + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *NewExit = cast(ValueMap[ExitBlocks[i]]); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(cast(NewExit), *LI); + + assert(NewExit->getTerminator()->getNumSuccessors() == 1 && + "Exit block should have been split to have one successor!"); + BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); + + // If the successor of the exit block had PHI nodes, add an entry for + // NewExit. + PHINode *PN; + for (BasicBlock::iterator I = ExitSucc->begin(); + (PN = dyn_cast(I)); ++I) { + Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); + std::map::iterator It = ValueMap.find(V); + if (It != ValueMap.end()) V = It->second; + PN->addIncoming(V, NewExit); + } } // Rewrite the code to refer to itself. From lattner at cs.uiuc.edu Fri Feb 10 18:43:49 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 18:43:49 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Message-ID: <200602110043.SAA12655@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: LoopUnswitch.cpp updated: 1.16 -> 1.17 --- Log message: implement unswitching of loops with switch stmts and selects in them --- Diffs of the changes: (+135 -94) LoopUnswitch.cpp | 229 ++++++++++++++++++++++++++++++++----------------------- 1 files changed, 135 insertions(+), 94 deletions(-) Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp diff -u llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.16 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.17 --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp:1.16 Fri Feb 10 17:26:14 2006 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp Fri Feb 10 18:43:37 2006 @@ -66,10 +66,13 @@ } private: + bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,Loop *L); unsigned getLoopUnswitchCost(Loop *L, Value *LIC); - void VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2); + void VersionLoop(Value *LIC, Constant *OnVal, + Loop *L, Loop *&Out1, Loop *&Out2); BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To); - void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, bool Val); + void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,Constant *Val, + bool isEqual); void UnswitchTrivialCondition(Loop *L, Value *Cond, bool EntersLoopOnCond, BasicBlock *ExitBlock); }; @@ -256,85 +259,86 @@ // loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + if (BranchInst *BI = dyn_cast(TI)) { + // If this isn't branching on an invariant condition, we can't unswitch + // it. + if (BI->isConditional()) { + // See if this, or some part of it, is loop invariant. If so, we can + // unswitch on it if we desire. + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), L, Changed); + if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantBool::True, L)) + return true; + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), L, Changed); + if (LoopCond && SI->getNumCases() > 1) { + // Find a value to unswitch on: + // FIXME: this should chose the most expensive case! + Constant *UnswitchVal = SI->getCaseValue(1); + if (UnswitchIfProfitable(LoopCond, UnswitchVal, L)) + return true; + } + } + + // Scan the instructions to check for unswitchable values. for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); BBI != E; ++BBI) if (SelectInst *SI = dyn_cast(BBI)) { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), L, Changed); - if (LoopCond == 0) continue; - - //if (UnswitchIfProfitable(LoopCond, - std::cerr << "LOOP INVARIANT SELECT: " << *SI; + if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantBool::True, L)) + return true; } - - TerminatorInst *TI = (*I)->getTerminator(); - // FIXME: Handle invariant select instructions. - - if (SwitchInst *SI = dyn_cast(TI)) { - if (!isa(SI) && L->isLoopInvariant(SI->getCondition())) - DEBUG(std::cerr << "TODO: Implement unswitching 'switch' loop %" - << L->getHeader()->getName() << ", cost = " - << L->getBlocks().size() << "\n" << **I); - continue; - } - - BranchInst *BI = dyn_cast(TI); - if (!BI) continue; - - // If this isn't branching on an invariant condition, we can't unswitch it. - if (!BI->isConditional()) - continue; - - // See if this, or some part of it, is loop invariant. If so, we can - // unswitch on it if we desire. - Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), L, Changed); - if (LoopCond == 0) continue; + } - // Check to see if it would be profitable to unswitch this loop. - if (getLoopUnswitchCost(L, LoopCond) > Threshold) { - // FIXME: this should estimate growth by the amount of code shared by the - // resultant unswitched loops. This should have no code growth: - // for () { if (iv) {...} } - // as one copy of the loop will be empty. - // - DEBUG(std::cerr << "NOT unswitching loop %" - << L->getHeader()->getName() << ", cost too high: " - << L->getBlocks().size() << "\n"); - continue; - } + return Changed; +} + +/// UnswitchIfProfitable - We have found that we can unswitch L when +/// LoopCond == Val to simplify the loop. If we decide that this is profitable, +/// unswitch the loop, reprocess the pieces, then return true. +bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,Loop *L){ + // Check to see if it would be profitable to unswitch this loop. + if (getLoopUnswitchCost(L, LoopCond) > Threshold) { + // FIXME: this should estimate growth by the amount of code shared by the + // resultant unswitched loops. + // + DEBUG(std::cerr << "NOT unswitching loop %" + << L->getHeader()->getName() << ", cost too high: " + << L->getBlocks().size() << "\n"); + return false; + } - // If this loop has live-out values, we can't unswitch it. We need something - // like loop-closed SSA form in order to know how to insert PHI nodes for - // these values. - if (LoopValuesUsedOutsideLoop(L)) { - DEBUG(std::cerr << "NOT unswitching loop %" - << L->getHeader()->getName() - << ", a loop value is used outside loop!\n"); - continue; - } + // If this loop has live-out values, we can't unswitch it. We need something + // like loop-closed SSA form in order to know how to insert PHI nodes for + // these values. + if (LoopValuesUsedOutsideLoop(L)) { + DEBUG(std::cerr << "NOT unswitching loop %" << L->getHeader()->getName() + << ", a loop value is used outside loop!\n"); + return false; + } - //std::cerr << "BEFORE:\n"; LI->dump(); - Loop *NewLoop1 = 0, *NewLoop2 = 0; + //std::cerr << "BEFORE:\n"; LI->dump(); + Loop *NewLoop1 = 0, *NewLoop2 = 0; - // If this is a trivial condition to unswitch (which results in no code - // duplication), do it now. - bool EntersLoopOnCond; - BasicBlock *ExitBlock; - if (IsTrivialUnswitchCondition(L, LoopCond, &EntersLoopOnCond, &ExitBlock)){ - UnswitchTrivialCondition(L, LoopCond, EntersLoopOnCond, ExitBlock); - NewLoop1 = L; - } else { - VersionLoop(LoopCond, L, NewLoop1, NewLoop2); - } - - //std::cerr << "AFTER:\n"; LI->dump(); - - // Try to unswitch each of our new loops now! - if (NewLoop1) visitLoop(NewLoop1); - if (NewLoop2) visitLoop(NewLoop2); - return true; + // If this is a trivial condition to unswitch (which results in no code + // duplication), do it now. + bool EntersLoopOnCond; + BasicBlock *ExitBlock; + if (IsTrivialUnswitchCondition(L, LoopCond, &EntersLoopOnCond, &ExitBlock)){ + UnswitchTrivialCondition(L, LoopCond, EntersLoopOnCond, ExitBlock); + NewLoop1 = L; + } else { + VersionLoop(LoopCond, Val, L, NewLoop1, NewLoop2); } - - return Changed; + ++NumUnswitched; + + //std::cerr << "AFTER:\n"; LI->dump(); + + // Try to unswitch each of our new loops now! + if (NewLoop1) visitLoop(NewLoop1); + if (NewLoop2) visitLoop(NewLoop2); + return true; } BasicBlock *LoopUnswitch::SplitEdge(BasicBlock *BB, BasicBlock *Succ) { @@ -456,23 +460,22 @@ // Now that we know that the loop is never entered when this condition is a // particular value, rewrite the loop with this info. We know that this will // at least eliminate the old branch. - RewriteLoopBodyWithConditionConstant(L, Cond, EnterOnCond); - - ++NumUnswitched; + RewriteLoopBodyWithConditionConstant(L, Cond, ConstantBool::get(EnterOnCond), + true); } -/// VersionLoop - We determined that the loop is profitable to unswitch and -/// contains a branch on a loop invariant condition. Split it into loop -/// versions and test the condition outside of either loop. Return the loops -/// created as Out1/Out2. -void LoopUnswitch::VersionLoop(Value *LIC, Loop *L, Loop *&Out1, Loop *&Out2) { +/// VersionLoop - We determined that the loop is profitable to unswitch when LIC +/// equal Val. Split it into loop versions and test the condition outside of +/// either loop. Return the loops created as Out1/Out2. +void LoopUnswitch::VersionLoop(Value *LIC, Constant *Val, Loop *L, + Loop *&Out1, Loop *&Out2) { Function *F = L->getHeader()->getParent(); DEBUG(std::cerr << "loop-unswitch: Unswitching loop %" - << L->getHeader()->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << F->getName() - << " on cond:" << *LIC << "\n"); + << L->getHeader()->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << F->getName() + << " when '" << *Val << "' == " << *LIC << "\n"); // LoopBlocks contains all of the basic blocks of the loop, including the // preheader of the loop, the body of the loop, and the exit blocks of the @@ -572,41 +575,79 @@ cast(OrigPreheader->getTerminator())->isUnconditional() && OrigPreheader->getTerminator()->getSuccessor(0) == LoopBlocks[0] && "Preheader splitting did not work correctly!"); - // Remove the unconditional branch to LoopBlocks[0]. - OrigPreheader->getInstList().pop_back(); // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. - new BranchInst(LoopBlocks[0], NewBlocks[0], LIC, OrigPreheader); + Value *BranchVal = LIC; + if (!isa(BranchVal)) { + BranchVal = BinaryOperator::createSetEQ(LIC, Val, "tmp", + OrigPreheader->getTerminator()); + } else if (Val != ConstantBool::True) { + // We want to enter the new loop when the condition is true. + BranchVal = BinaryOperator::createNot(BranchVal, "tmp", + OrigPreheader->getTerminator()); + } + + // Remove the unconditional branch to LoopBlocks[0] and insert the new branch. + OrigPreheader->getInstList().pop_back(); + new BranchInst(NewBlocks[0], LoopBlocks[0], BranchVal, OrigPreheader); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. - RewriteLoopBodyWithConditionConstant(L, LIC, true); - RewriteLoopBodyWithConditionConstant(NewLoop, LIC, false); - ++NumUnswitched; + RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); + RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true); Out1 = L; Out2 = NewLoop; } -// RewriteLoopBodyWithConditionConstant - We know that the boolean value LIC has -// the value specified by Val in the specified loop. Rewrite any uses of LIC or -// of properties correlated to it. +// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has +// the value specified by Val in the specified loop, or we know it does NOT have +// that value. Rewrite any uses of LIC or of properties correlated to it. void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, - bool Val) { + Constant *Val, + bool IsEqual) { assert(!isa(LIC) && "Why are we unswitching on a constant?"); + // FIXME: Support correlated properties, like: // for (...) // if (li1 < li2) // ... // if (li1 > li2) // ... - ConstantBool *BoolVal = ConstantBool::get(Val); + // NotVal - If Val is a bool, this contains its inverse. + Constant *NotVal = 0; + if (ConstantBool *CB = dyn_cast(Val)) + NotVal = ConstantBool::get(!CB->getValue()); + // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, // selects, switches. std::vector Users(LIC->use_begin(), LIC->use_end()); + + // Haha, this loop could be unswitched. Get it? The unswitch pass could + // unswitch itself. Amazing. for (unsigned i = 0, e = Users.size(); i != e; ++i) if (Instruction *U = cast(Users[i])) if (L->contains(U->getParent())) - U->replaceUsesOfWith(LIC, BoolVal); + if (IsEqual) { + U->replaceUsesOfWith(LIC, Val); + } else if (NotVal) { + U->replaceUsesOfWith(LIC, NotVal); + } else { + // If we know that LIC is not Val, use this info to simplify code. + if (SwitchInst *SI = dyn_cast(U)) { + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + if (SI->getCaseValue(i) == Val) { + // Found a dead case value. Don't remove PHI nodes in the + // successor if they become single-entry, those PHI nodes may + // be in the Users list. + SI->getSuccessor(i)->removePredecessor(SI->getParent(), true); + SI->removeCase(i); + break; + } + } + } + + // TODO: We could simplify stuff like X == C. + } } From jlaskey at apple.com Fri Feb 10 19:01:43 2006 From: jlaskey at apple.com (Jim Laskey) Date: Fri, 10 Feb 2006 19:01:43 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Message-ID: <200602110101.TAA12759@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: SelectionDAGISel.cpp updated: 1.150 -> 1.151 --- Log message: Reorg for integration with gcc4. Old style debug info will not be passed though to SelIDAG. --- Diffs of the changes: (+21 -24) SelectionDAGISel.cpp | 45 +++++++++++++++++++++------------------------ 1 files changed, 21 insertions(+), 24 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.150 llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.151 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.150 Sat Feb 4 00:49:00 2006 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri Feb 10 19:01:30 2006 @@ -941,33 +941,30 @@ if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions()) return "llvm_debugger_stop"; - std::string fname = ""; - std::vector Ops; + MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo(); + if (DebugInfo && DebugInfo->Verify(I.getOperand(4))) { + std::vector Ops; - // Input Chain - Ops.push_back(getRoot()); - - // line number - Ops.push_back(getValue(I.getOperand(2))); - - // column - Ops.push_back(getValue(I.getOperand(3))); - - // filename/working dir - // Pull the filename out of the the compilation unit. - const GlobalVariable *cunit = dyn_cast(I.getOperand(4)); - if (cunit && cunit->hasInitializer()) { - if (ConstantStruct *CS = - dyn_cast(cunit->getInitializer())) { - if (CS->getNumOperands() > 0) { - Ops.push_back(DAG.getString(getStringValue(CS->getOperand(3)))); - Ops.push_back(DAG.getString(getStringValue(CS->getOperand(4)))); - } - } + // Input Chain + Ops.push_back(getRoot()); + + // line number + Ops.push_back(getValue(I.getOperand(2))); + + // column + Ops.push_back(getValue(I.getOperand(3))); + + DebugInfoDesc *DD = DebugInfo->Deserialize(I.getOperand(4)); + assert(DD && "Not a debug information descriptor"); + CompileUnitDesc *CompileUnit = dyn_cast(DD); + assert(CompileUnit && "Not a compile unit"); + Ops.push_back(DAG.getString(CompileUnit->getFileName())); + Ops.push_back(DAG.getString(CompileUnit->getDirectory())); + + if (Ops.size() == 5) // Found filename/workingdir. + DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops)); } - if (Ops.size() == 5) // Found filename/workingdir. - DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops)); setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType()))); return 0; } From jlaskey at apple.com Fri Feb 10 19:01:44 2006 From: jlaskey at apple.com (Jim Laskey) Date: Fri, 10 Feb 2006 19:01:44 -0600 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineDebugInfo.h Message-ID: <200602110101.TAA12763@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineDebugInfo.h updated: 1.13 -> 1.14 --- Log message: Reorg for integration with gcc4. Old style debug info will not be passed though to SelIDAG. --- Diffs of the changes: (+169 -59) MachineDebugInfo.h | 228 +++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 169 insertions(+), 59 deletions(-) Index: llvm/include/llvm/CodeGen/MachineDebugInfo.h diff -u llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.13 llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.14 --- llvm/include/llvm/CodeGen/MachineDebugInfo.h:1.13 Mon Feb 6 15:54:05 2006 +++ llvm/include/llvm/CodeGen/MachineDebugInfo.h Fri Feb 10 19:01:30 2006 @@ -32,6 +32,7 @@ #include "llvm/Support/Dwarf.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/GlobalValue.h" #include "llvm/Pass.h" #include "llvm/User.h" @@ -42,6 +43,7 @@ //===----------------------------------------------------------------------===// // Forward declarations. +class Constant; class DebugInfoDesc; class GlobalVariable; class Module; @@ -57,12 +59,13 @@ // DebugInfoDesc type identifying tags. // FIXME - Change over with gcc4. + DI_TAG_anchor = 0, #if 1 DI_TAG_compile_unit = DW_TAG_compile_unit, DI_TAG_global_variable = DW_TAG_variable, DI_TAG_subprogram = DW_TAG_subprogram #else - DI_TAG_compile_unit = 1, + DI_TAG_compile_unit, DI_TAG_global_variable, DI_TAG_subprogram #endif @@ -117,6 +120,10 @@ /// Return NULL if not a recognized Tag. static DebugInfoDesc *DescFactory(unsigned Tag); + /// getLinkage - get linkage appropriate for this type of descriptor. + /// + virtual GlobalValue::LinkageTypes getLinkage() const; + //===--------------------------------------------------------------------===// // Subclasses should supply the following static methods. @@ -128,11 +135,15 @@ /// ApplyToFields - Target the vistor to the fields of the descriptor. /// - virtual void ApplyToFields(DIVisitor *Visitor) = 0; + virtual void ApplyToFields(DIVisitor *Visitor); - /// TypeString - Return a string used to compose globalnames and labels. + /// getDescString - Return a string used to compose global names and labels. /// - virtual const char *TypeString() const = 0; + virtual const char *getDescString() const = 0; + + /// getTypeString - Return a string used to label this descriptor's type. + /// + virtual const char *getTypeString() const = 0; #ifndef NDEBUG virtual void dump() = 0; @@ -141,27 +152,90 @@ //===----------------------------------------------------------------------===// +/// AnchorDesc - Descriptors of this class act as markers for identifying +/// descriptors of certain groups. +class AnchorDesc : public DebugInfoDesc { +private: + std::string Name; // Anchor type string. + +public: + AnchorDesc() + : DebugInfoDesc(DI_TAG_anchor) + , Name("") + {} + AnchorDesc(const std::string &N) + : DebugInfoDesc(DI_TAG_anchor) + , Name(N) + {} + + // Accessors + const std::string &getName() const { return Name; } + + // Implement isa/cast/dyncast. + static bool classof(const AnchorDesc *) { return true; } + static bool classof(const DebugInfoDesc *D) { + return D->getTag() == DI_TAG_anchor; + } + + /// getLinkage - get linkage appropriate for this type of descriptor. + /// + virtual GlobalValue::LinkageTypes getLinkage() const; + + /// ApplyToFields - Target the visitor to the fields of the AnchorDesc. + /// + virtual void ApplyToFields(DIVisitor *Visitor); + + /// getDescString - Return a string used to compose global names and labels. + /// + virtual const char *getDescString() const; + + /// getTypeString - Return a string used to label this descriptor's type. + /// + virtual const char *getTypeString() const; + +#ifndef NDEBUG + virtual void dump(); +#endif +}; + +//===----------------------------------------------------------------------===// +/// AnchoredDesc - This class manages anchors for a variety of top level +/// descriptors. +class AnchoredDesc : public DebugInfoDesc { +private: + AnchorDesc *Anchor; // Anchor for all descriptors of the + // same type. + +protected: + + AnchoredDesc(unsigned T); + +public: + // Accessors. + AnchorDesc *getAnchor() const { return Anchor; } + void setAnchor(AnchorDesc *A) { Anchor = A; } + + //===--------------------------------------------------------------------===// + // Subclasses should supply the following virtual methods. + + /// ApplyToFields - Target the visitor to the fields of the AnchoredDesc. + /// + virtual void ApplyToFields(DIVisitor *Visitor); +}; + +//===----------------------------------------------------------------------===// /// CompileUnitDesc - This class packages debug information associated with a /// source/header file. -class CompileUnitDesc : public DebugInfoDesc { +class CompileUnitDesc : public AnchoredDesc { private: unsigned DebugVersion; // LLVM debug version when produced. unsigned Language; // Language number (ex. DW_LANG_C89.) std::string FileName; // Source file name. std::string Directory; // Source file directory. std::string Producer; // Compiler string. - GlobalVariable *TransUnit; // Translation unit - ignored. public: - CompileUnitDesc() - : DebugInfoDesc(DI_TAG_compile_unit) - , DebugVersion(LLVMDebugVersion) - , Language(0) - , FileName("") - , Directory("") - , Producer("") - , TransUnit(NULL) - {} + CompileUnitDesc(); // Accessors unsigned getDebugVersion() const { return DebugVersion; } @@ -173,6 +247,7 @@ void setFileName(const std::string &FN) { FileName = FN; } void setDirectory(const std::string &D) { Directory = D; } void setProducer(const std::string &P) { Producer = P; } + // FIXME - Need translation unit getter/setter. // Implement isa/cast/dyncast. @@ -189,9 +264,17 @@ /// virtual void ApplyToFields(DIVisitor *Visitor); - /// TypeString - Return a string used to compose globalnames and labels. + /// getDescString - Return a string used to compose global names and labels. + /// + virtual const char *getDescString() const; + + /// getTypeString - Return a string used to label this descriptor's type. + /// + virtual const char *getTypeString() const; + + /// getAnchorString - Return a string used to label this descriptor's anchor. /// - virtual const char *TypeString() const; + virtual const char *getAnchorString() const; #ifndef NDEBUG virtual void dump(); @@ -199,42 +282,49 @@ }; //===----------------------------------------------------------------------===// -/// GlobalVariableDesc - This class packages debug information associated with a -/// GlobalVariable. -class GlobalVariableDesc : public DebugInfoDesc { +/// GlobalDesc - This class is the base descriptor for global functions and +/// variables. +class GlobalDesc : public AnchoredDesc { private: DebugInfoDesc *Context; // Context debug descriptor. std::string Name; // Global name. - GlobalVariable *TransUnit; // Translation unit - ignored. // FIXME - Use a descriptor. GlobalVariable *TyDesc; // Type debug descriptor. bool IsStatic; // Is the global a static. bool IsDefinition; // Is the global defined in context. - GlobalVariable *Global; // llvm global. +protected: + GlobalDesc(unsigned T); + public: - GlobalVariableDesc() - : DebugInfoDesc(DI_TAG_global_variable) - , Context(0) - , Name("") - , TransUnit(NULL) - , TyDesc(NULL) - , IsStatic(false) - , IsDefinition(false) - , Global(NULL) - {} - // Accessors DebugInfoDesc *getContext() const { return Context; } const std::string &getName() const { return Name; } bool isStatic() const { return IsStatic; } bool isDefinition() const { return IsDefinition; } - GlobalVariable *getGlobalVariable() const { return Global; } + void setContext(DebugInfoDesc *C) { Context = C; } void setName(const std::string &N) { Name = N; } void setIsStatic(bool IS) { IsStatic = IS; } void setIsDefinition(bool ID) { IsDefinition = ID; } + + /// ApplyToFields - Target the visitor to the fields of the GlobalDesc. + /// + virtual void ApplyToFields(DIVisitor *Visitor); +}; + +//===----------------------------------------------------------------------===// +/// GlobalVariableDesc - This class packages debug information associated with a +/// GlobalVariable. +class GlobalVariableDesc : public GlobalDesc { +private: + GlobalVariable *Global; // llvm global. + +public: + GlobalVariableDesc(); + + // Accessors. + GlobalVariable *getGlobalVariable() const { return Global; } void setGlobalVariable(GlobalVariable *GV) { Global = GV; } - // FIXME - Other getters/setters. // Implement isa/cast/dyncast. static bool classof(const GlobalVariableDesc *) { return true; } @@ -246,10 +336,18 @@ /// GlobalVariableDesc. virtual void ApplyToFields(DIVisitor *Visitor); - /// TypeString - Return a string used to compose globalnames and labels. + /// getDescString - Return a string used to compose global names and labels. /// - virtual const char *TypeString() const; + virtual const char *getDescString() const; + /// getTypeString - Return a string used to label this descriptor's type. + /// + virtual const char *getTypeString() const; + + /// getAnchorString - Return a string used to label this descriptor's anchor. + /// + virtual const char *getAnchorString() const; + #ifndef NDEBUG virtual void dump(); #endif @@ -258,32 +356,24 @@ //===----------------------------------------------------------------------===// /// SubprogramDesc - This class packages debug information associated with a /// subprogram/function. -class SubprogramDesc : public DebugInfoDesc { +class SubprogramDesc : public GlobalDesc { private: DebugInfoDesc *Context; // Context debug descriptor. std::string Name; // Subprogram name. - GlobalVariable *TransUnit; // Translation unit - ignored. // FIXME - Use a descriptor. GlobalVariable *TyDesc; // Type debug descriptor. bool IsStatic; // Is the subprogram a static. bool IsDefinition; // Is the subprogram defined in context. public: - SubprogramDesc() - : DebugInfoDesc(DI_TAG_subprogram) - , Context(0) - , Name("") - , TransUnit(NULL) - , TyDesc(NULL) - , IsStatic(false) - , IsDefinition(false) - {} + SubprogramDesc(); // Accessors DebugInfoDesc *getContext() const { return Context; } const std::string &getName() const { return Name; } bool isStatic() const { return IsStatic; } bool isDefinition() const { return IsDefinition; } + void setContext(DebugInfoDesc *C) { Context = C; } void setName(const std::string &N) { Name = N; } void setIsStatic(bool IS) { IsStatic = IS; } void setIsDefinition(bool ID) { IsDefinition = ID; } @@ -299,10 +389,18 @@ /// virtual void ApplyToFields(DIVisitor *Visitor); - /// TypeString - Return a string used to compose globalnames and labels. + /// getDescString - Return a string used to compose global names and labels. /// - virtual const char *TypeString() const; + virtual const char *getDescString() const; + /// getTypeString - Return a string used to label this descriptor's type. + /// + virtual const char *getTypeString() const; + + /// getAnchorString - Return a string used to label this descriptor's anchor. + /// + virtual const char *getAnchorString() const; + #ifndef NDEBUG virtual void dump(); #endif @@ -313,18 +411,15 @@ /// into DebugInfoDesc objects. class DIDeserializer { private: - Module *M; // Definition space module. unsigned DebugVersion; // Version of debug information in use. std::map GlobalDescs; // Previously defined gloabls. public: - DIDeserializer() : M(NULL), DebugVersion(LLVMDebugVersion) {} + DIDeserializer() : DebugVersion(LLVMDebugVersion) {} ~DIDeserializer() {} // Accessors - Module *getModule() const { return M; }; - void setModule(Module *module) { M = module; } unsigned getDebugVersion() const { return DebugVersion; } /// Deserialize - Reconstitute a GlobalVariable into it's component @@ -345,10 +440,18 @@ // Types per Tag. Created lazily. std::map DescGlobals; // Previously defined descriptors. - std::map StringCache; + std::map StringCache; // Previously defined strings. + public: - DISerializer() : M(NULL) {} + DISerializer() + : M(NULL) + , StrPtrTy(NULL) + , EmptyStructPtrTy(NULL) + , TagTypes() + , DescGlobals() + , StringCache() + {} ~DISerializer() {} // Accessors @@ -369,7 +472,7 @@ /// getString - Construct the string as constant string global. /// - GlobalVariable *getString(const std::string &String); + Constant *getString(const std::string &String); /// Serialize - Recursively cast the specified descriptor into a /// GlobalVariable so that it can be serialized to a .bc or .ll file. @@ -395,6 +498,7 @@ /// Verify - Return true if the GlobalVariable appears to be a valid /// serialization of a DebugInfoDesc. + bool Verify(Value *V); bool Verify(GlobalVariable *GV); }; @@ -454,9 +558,7 @@ class MachineDebugInfo : public ImmutablePass { private: // Use the same serializer/deserializer/verifier for the module. - DISerializer SR; DIDeserializer DR; - DIVerifier VR; // CompileUnits - Uniquing vector for compile units. UniqueVector CompileUnits; @@ -482,6 +584,14 @@ /// bool doFinalization(); + /// Deserialize - Convert a Value to a debug information descriptor. + /// + DebugInfoDesc *Deserialize(Value *V); + + /// Verify - Verify that a Value is debug information descriptor. + /// + bool Verify(Value *V); + /// AnalyzeModule - Scan the module for global debug information. /// void AnalyzeModule(Module &M); From jlaskey at apple.com Fri Feb 10 19:01:44 2006 From: jlaskey at apple.com (Jim Laskey) Date: Fri, 10 Feb 2006 19:01:44 -0600 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/MachineDebugInfo.cpp Message-ID: <200602110101.TAA12767@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: MachineDebugInfo.cpp updated: 1.12 -> 1.13 --- Log message: Reorg for integration with gcc4. Old style debug info will not be passed though to SelIDAG. --- Diffs of the changes: (+246 -110) MachineDebugInfo.cpp | 356 +++++++++++++++++++++++++++++++++++---------------- 1 files changed, 246 insertions(+), 110 deletions(-) Index: llvm/lib/CodeGen/MachineDebugInfo.cpp diff -u llvm/lib/CodeGen/MachineDebugInfo.cpp:1.12 llvm/lib/CodeGen/MachineDebugInfo.cpp:1.13 --- llvm/lib/CodeGen/MachineDebugInfo.cpp:1.12 Mon Feb 6 15:54:05 2006 +++ llvm/lib/CodeGen/MachineDebugInfo.cpp Fri Feb 10 19:01:30 2006 @@ -49,10 +49,14 @@ static std::vector getGlobalVariablesUsing(Module &M, const std::string &RootName) { std::vector Result; // GlobalVariables matching criteria. + + std::vector FieldTypes; + FieldTypes.push_back(Type::UIntTy); + FieldTypes.push_back(PointerType::get(Type::SByteTy)); // Get the GlobalVariable root. GlobalVariable *UseRoot = M.getGlobalVariable(RootName, - StructType::get(std::vector())); + StructType::get(FieldTypes)); // If present and linkonce then scan for users. if (UseRoot && UseRoot->hasLinkOnceLinkage()) { @@ -168,28 +172,6 @@ // Check constant. return dyn_cast(CI->getOperand(i)); } - -//===----------------------------------------------------------------------===// - -/// TagFromGlobal - Returns the Tag number from a debug info descriptor -/// GlobalVariable. -unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV) { - ConstantUInt *C = getUIntOperand(GV, 0); - return C ? (unsigned)C->getValue() : (unsigned)DIInvalid; -} - -/// DescFactory - Create an instance of debug info descriptor based on Tag. -/// Return NULL if not a recognized Tag. -DebugInfoDesc *DebugInfoDesc::DescFactory(unsigned Tag) { - switch (Tag) { - case DI_TAG_compile_unit: return new CompileUnitDesc(); - case DI_TAG_global_variable: return new GlobalVariableDesc(); - case DI_TAG_subprogram: return new SubprogramDesc(); - default: break; - } - return NULL; -} - //===----------------------------------------------------------------------===// /// ApplyToFields - Target the visitor to each field of the debug information @@ -206,7 +188,7 @@ unsigned Count; // Running count of fields. public: - DICountVisitor() : DIVisitor(), Count(1) {} + DICountVisitor() : DIVisitor(), Count(0) {} // Accessors. unsigned getCount() const { return Count; } @@ -234,7 +216,7 @@ DIDeserializeVisitor(DIDeserializer &D, GlobalVariable *GV) : DIVisitor() , DR(D) - , I(1) + , I(0) , CI(cast(GV->getInitializer())) {} @@ -284,7 +266,7 @@ /// Apply - Set the value of each of the fields. /// virtual void Apply(int &Field) { - Elements.push_back(ConstantUInt::get(Type::IntTy, Field)); + Elements.push_back(ConstantSInt::get(Type::IntTy, Field)); } virtual void Apply(unsigned &Field) { Elements.push_back(ConstantUInt::get(Type::UIntTy, Field)); @@ -314,7 +296,11 @@ } virtual void Apply(GlobalVariable *&Field) { const PointerType *EmptyTy = SR.getEmptyStructPtrType(); - Elements.push_back(ConstantExpr::getCast(Field, EmptyTy)); + if (Field) { + Elements.push_back(ConstantExpr::getCast(Field, EmptyTy)); + } else { + Elements.push_back(ConstantPointerNull::get(EmptyTy)); + } } }; @@ -373,7 +359,7 @@ : DIVisitor() , VR(V) , IsValid(true) - , I(1) + , I(0) , CI(cast(GV->getInitializer())) { } @@ -410,36 +396,146 @@ } }; + +//===----------------------------------------------------------------------===// + +/// TagFromGlobal - Returns the Tag number from a debug info descriptor +/// GlobalVariable. +unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV) { + ConstantUInt *C = getUIntOperand(GV, 0); + return C ? (unsigned)C->getValue() : (unsigned)DIInvalid; +} + +/// DescFactory - Create an instance of debug info descriptor based on Tag. +/// Return NULL if not a recognized Tag. +DebugInfoDesc *DebugInfoDesc::DescFactory(unsigned Tag) { + switch (Tag) { + case DI_TAG_anchor: return new AnchorDesc(); + case DI_TAG_compile_unit: return new CompileUnitDesc(); + case DI_TAG_global_variable: return new GlobalVariableDesc(); + case DI_TAG_subprogram: return new SubprogramDesc(); + default: break; + } + return NULL; +} + +/// getLinkage - get linkage appropriate for this type of descriptor. +/// +GlobalValue::LinkageTypes DebugInfoDesc::getLinkage() const { + return GlobalValue::InternalLinkage; +} + +/// ApplyToFields - Target the vistor to the fields of the descriptor. +/// +void DebugInfoDesc::ApplyToFields(DIVisitor *Visitor) { + Visitor->Apply(Tag); +} + +//===----------------------------------------------------------------------===// + +/// getLinkage - get linkage appropriate for this type of descriptor. +/// +GlobalValue::LinkageTypes AnchorDesc::getLinkage() const { + return GlobalValue::LinkOnceLinkage; +} + +/// ApplyToFields - Target the visitor to the fields of the TransUnitDesc. +/// +void AnchorDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(Name); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *AnchorDesc::getDescString() const { + return Name.c_str(); +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *AnchorDesc::getTypeString() const { + return "llvm.dbg.anchor.type"; +} + +#ifndef NDEBUG +void AnchorDesc::dump() { + std::cerr << getDescString() << " " + << "Tag(" << getTag() << "), " + << "Name(" << Name << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +AnchoredDesc::AnchoredDesc(unsigned T) +: DebugInfoDesc(T) +, Anchor(NULL) +{} + +/// ApplyToFields - Target the visitor to the fields of the AnchoredDesc. +/// +void AnchoredDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply((DebugInfoDesc *&)Anchor); +} + //===----------------------------------------------------------------------===// +CompileUnitDesc::CompileUnitDesc() +: AnchoredDesc(DI_TAG_compile_unit) +, DebugVersion(LLVMDebugVersion) +, Language(0) +, FileName("") +, Directory("") +, Producer("") +{} + /// DebugVersionFromGlobal - Returns the version number from a compile unit /// GlobalVariable. unsigned CompileUnitDesc::DebugVersionFromGlobal(GlobalVariable *GV) { - ConstantUInt *C = getUIntOperand(GV, 1); + ConstantUInt *C = getUIntOperand(GV, 2); return C ? (unsigned)C->getValue() : (unsigned)DIInvalid; } /// ApplyToFields - Target the visitor to the fields of the CompileUnitDesc. /// void CompileUnitDesc::ApplyToFields(DIVisitor *Visitor) { + AnchoredDesc::ApplyToFields(Visitor); + Visitor->Apply(DebugVersion); Visitor->Apply(Language); Visitor->Apply(FileName); Visitor->Apply(Directory); Visitor->Apply(Producer); - Visitor->Apply(TransUnit); } -/// TypeString - Return a string used to compose globalnames and labels. +/// getDescString - Return a string used to compose global names and labels. +/// +const char *CompileUnitDesc::getDescString() const { + return "llvm.dbg.compile_unit"; +} + +/// getTypeString - Return a string used to label this descriptors type. /// -const char *CompileUnitDesc::TypeString() const { - return "compile_unit"; +const char *CompileUnitDesc::getTypeString() const { + return "llvm.dbg.compile_unit.type"; +} + +/// getAnchorString - Return a string used to label this descriptor's anchor. +/// +const char *CompileUnitDesc::getAnchorString() const { + return "llvm.dbg.compile_units"; } #ifndef NDEBUG void CompileUnitDesc::dump() { - std::cerr << TypeString() << " " + std::cerr << getDescString() << " " << "Tag(" << getTag() << "), " + << "Anchor(" << getAnchor() << "), " + << "DebugVersion(" << DebugVersion << "), " << "Language(" << Language << "), " << "FileName(\"" << FileName << "\"), " << "Directory(\"" << Directory << "\"), " @@ -449,76 +545,122 @@ //===----------------------------------------------------------------------===// -/// ApplyToFields - Target the visitor to the fields of the GlobalVariableDesc. +GlobalDesc::GlobalDesc(unsigned T) +: AnchoredDesc(T) +, Context(0) +, Name("") +, TyDesc(NULL) +, IsStatic(false) +, IsDefinition(false) +{} + +/// ApplyToFields - Target the visitor to the fields of the global. /// -void GlobalVariableDesc::ApplyToFields(DIVisitor *Visitor) { +void GlobalDesc::ApplyToFields(DIVisitor *Visitor) { + AnchoredDesc::ApplyToFields(Visitor); + Visitor->Apply(Context); Visitor->Apply(Name); - Visitor->Apply(TransUnit); Visitor->Apply(TyDesc); Visitor->Apply(IsStatic); Visitor->Apply(IsDefinition); +} + +//===----------------------------------------------------------------------===// + +GlobalVariableDesc::GlobalVariableDesc() +: GlobalDesc(DI_TAG_global_variable) +, Global(NULL) +{} + +/// ApplyToFields - Target the visitor to the fields of the GlobalVariableDesc. +/// +void GlobalVariableDesc::ApplyToFields(DIVisitor *Visitor) { + GlobalDesc::ApplyToFields(Visitor); + Visitor->Apply(Global); } -/// TypeString - Return a string used to compose globalnames and labels. +/// getDescString - Return a string used to compose global names and labels. /// -const char *GlobalVariableDesc::TypeString() const { - return "global_variable"; +const char *GlobalVariableDesc::getDescString() const { + return "llvm.dbg.global_variable"; +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *GlobalVariableDesc::getTypeString() const { + return "llvm.dbg.global_variable.type"; +} + +/// getAnchorString - Return a string used to label this descriptor's anchor. +/// +const char *GlobalVariableDesc::getAnchorString() const { + return "llvm.dbg.global_variables"; } #ifndef NDEBUG void GlobalVariableDesc::dump() { - std::cerr << TypeString() << " " + std::cerr << getDescString() << " " << "Tag(" << getTag() << "), " - << "Name(\"" << Name << "\"), " - << "Type(" << TyDesc << "), " - << "IsStatic(" << (IsStatic ? "true" : "false") << "), " - << "IsDefinition(" << (IsDefinition ? "true" : "false") << "), " + << "Anchor(" << getAnchor() << "), " + << "Name(\"" << getName() << "\"), " + << "IsStatic(" << (isStatic() ? "true" : "false") << "), " + << "IsDefinition(" << (isDefinition() ? "true" : "false") << "), " << "Global(" << Global << ")\n"; } #endif //===----------------------------------------------------------------------===// +SubprogramDesc::SubprogramDesc() +: GlobalDesc(DI_TAG_subprogram) +{} + /// ApplyToFields - Target the visitor to the fields of the /// SubprogramDesc. void SubprogramDesc::ApplyToFields(DIVisitor *Visitor) { - Visitor->Apply(Context); - Visitor->Apply(Name); - Visitor->Apply(TransUnit); - Visitor->Apply(TyDesc); - Visitor->Apply(IsStatic); - Visitor->Apply(IsDefinition); - - // FIXME - Temp variable until restructured. - GlobalVariable *Tmp; - Visitor->Apply(Tmp); + GlobalDesc::ApplyToFields(Visitor); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *SubprogramDesc::getDescString() const { + return "llvm.dbg.subprogram"; +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *SubprogramDesc::getTypeString() const { + return "llvm.dbg.subprogram.type"; } -/// TypeString - Return a string used to compose globalnames and labels. +/// getAnchorString - Return a string used to label this descriptor's anchor. /// -const char *SubprogramDesc::TypeString() const { - return "subprogram"; +const char *SubprogramDesc::getAnchorString() const { + return "llvm.dbg.subprograms"; } #ifndef NDEBUG void SubprogramDesc::dump() { - std::cerr << TypeString() << " " + std::cerr << getDescString() << " " << "Tag(" << getTag() << "), " - << "Name(\"" << Name << "\"), " - << "Type(" << TyDesc << "), " - << "IsStatic(" << (IsStatic ? "true" : "false") << "), " - << "IsDefinition(" << (IsDefinition ? "true" : "false") << ")\n"; + << "Anchor(" << getAnchor() << "), " + << "Name(\"" << getName() << "\"), " + << "IsStatic(" << (isStatic() ? "true" : "false") << "), " + << "IsDefinition(" << (isDefinition() ? "true" : "false") << ")\n"; } #endif //===----------------------------------------------------------------------===// DebugInfoDesc *DIDeserializer::Deserialize(Value *V) { - return Deserialize(cast(V)); + return Deserialize(getGlobalVariable(V)); } DebugInfoDesc *DIDeserializer::Deserialize(GlobalVariable *GV) { + // Handle NULL. + if (!GV) return NULL; + // Check to see if it has been already deserialized. DebugInfoDesc *&Slot = GlobalDescs[GV]; if (Slot) return Slot; @@ -579,25 +721,17 @@ // If not already defined. if (!Ty) { - // Get descriptor type name. - const char *TS = DD->TypeString(); - // Set up fields vector. std::vector Fields; - // Add tag field. - Fields.push_back(Type::UIntTy); - // Get types of remaining fields. + // Get types of fields. DIGetTypesVisitor GTAM(*this, Fields); GTAM.ApplyToFields(DD); // Construct structured type. Ty = StructType::get(Fields); - // Construct a name for the type. - const std::string Name = std::string("lldb.") + DD->TypeString() + ".type"; - // Register type name with module. - M->addTypeName(Name, Ty); + M->addTypeName(DD->getTypeString(), Ty); } return Ty; @@ -605,17 +739,21 @@ /// getString - Construct the string as constant string global. /// -GlobalVariable *DISerializer::getString(const std::string &String) { +Constant *DISerializer::getString(const std::string &String) { // Check string cache for previous edition. - GlobalVariable *&Slot = StringCache[String]; - // return GlobalVariable if previously defined. + Constant *&Slot = StringCache[String]; + // return Constant if previously defined. if (Slot) return Slot; - // Construct strings as an llvm constant. + // Construct string as an llvm constant. Constant *ConstStr = ConstantArray::get(String); // Otherwise create and return a new string global. - return Slot = new GlobalVariable(ConstStr->getType(), true, - GlobalVariable::InternalLinkage, - ConstStr, "str", M); + GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true, + GlobalVariable::InternalLinkage, + ConstStr, "str", M); + // Convert to generic string pointer. + Slot = ConstantExpr::getCast(StrGV, getStrPtrType()); + return Slot; + } /// Serialize - Recursively cast the specified descriptor into a GlobalVariable @@ -627,29 +765,19 @@ // See if DebugInfoDesc exists, if so return prior GlobalVariable. if (Slot) return Slot; - // Get DebugInfoDesc type Tag. - unsigned Tag = DD->getTag(); - - // Construct name. - const std::string Name = std::string("lldb.") + - DD->TypeString(); - // Get the type associated with the Tag. const StructType *Ty = getTagType(DD); // Create the GlobalVariable early to prevent infinite recursion. - GlobalVariable *GV = new GlobalVariable(Ty, true, - GlobalValue::InternalLinkage, - NULL, Name, M); + GlobalVariable *GV = new GlobalVariable(Ty, true, DD->getLinkage(), + NULL, DD->getDescString(), M); // Insert new GlobalVariable in DescGlobals map. Slot = GV; // Set up elements vector std::vector Elements; - // Add Tag value. - Elements.push_back(ConstantUInt::get(Type::UIntTy, Tag)); - // Add remaining fields. + // Add fields. DISerializeVisitor SRAM(*this, Elements); SRAM.ApplyToFields(DD); @@ -678,6 +806,9 @@ /// Verify - Return true if the GlobalVariable appears to be a valid /// serialization of a DebugInfoDesc. +bool DIVerifier::Verify(Value *V) { + return Verify(getGlobalVariable(V)); +} bool DIVerifier::Verify(GlobalVariable *GV) { // Check if seen before. if (markVisited(GV)) return true; @@ -732,9 +863,7 @@ MachineDebugInfo::MachineDebugInfo() -: SR() -, DR() -, VR() +: DR() , CompileUnits() , Directories() , SourceFiles() @@ -758,22 +887,32 @@ return false; } +/// Deserialize - Convert a Value to a debug information descriptor. +/// +DebugInfoDesc *MachineDebugInfo::Deserialize(Value *V) { + return DR.Deserialize(V); +} + +/// Verify - Verify that a Value is debug information descriptor. +/// +bool MachineDebugInfo::Verify(Value *V) { + DIVerifier VR; + return VR.Verify(V); +} + /// AnalyzeModule - Scan the module for global debug information. /// void MachineDebugInfo::AnalyzeModule(Module &M) { - SR.setModule(&M); - DR.setModule(&M); SetupCompileUnits(M); } /// SetupCompileUnits - Set up the unique vector of compile units. /// void MachineDebugInfo::SetupCompileUnits(Module &M) { - SR.setModule(&M); - DR.setModule(&M); // Get vector of all debug compile units. + CompileUnitDesc CompileUnit; std::vector Globals = - getGlobalVariablesUsing(M, "llvm.dbg.translation_units"); + getGlobalVariablesUsing(M, CompileUnit.getAnchorString()); // Scan all compile unit globals. for (unsigned i = 0, N = Globals.size(); i < N; ++i) { @@ -793,11 +932,10 @@ /// std::vector MachineDebugInfo::getGlobalVariables(Module &M) { - SR.setModule(&M); - DR.setModule(&M); // Get vector of all debug global objects. + GlobalVariableDesc Global; std::vector Globals = - getGlobalVariablesUsing(M, "llvm.dbg.globals"); + getGlobalVariablesUsing(M, Global.getAnchorString()); // Accumulation of GlobalVariables. std::vector GlobalVariables; @@ -805,11 +943,9 @@ // Scan all globals. for (unsigned i = 0, N = Globals.size(); i < N; ++i) { GlobalVariable *GV = Globals[i]; - if (DebugInfoDesc::TagFromGlobal(GV) == DI_TAG_global_variable) { - GlobalVariableDesc *GVD = - static_cast(DR.Deserialize(GV)); - GlobalVariables.push_back(GVD); - } + GlobalVariableDesc *GVD = + static_cast(DR.Deserialize(GV)); + GlobalVariables.push_back(GVD); } return GlobalVariables; From lattner at cs.uiuc.edu Fri Feb 10 19:43:49 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 19:43:49 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Utils/LoopSimplify.cpp Message-ID: <200602110143.TAA12915@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Utils: LoopSimplify.cpp updated: 1.62 -> 1.63 --- Log message: remove dead expr --- Diffs of the changes: (+0 -1) LoopSimplify.cpp | 1 - 1 files changed, 1 deletion(-) Index: llvm/lib/Transforms/Utils/LoopSimplify.cpp diff -u llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.62 llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.63 --- llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.62 Mon Jan 9 02:03:08 2006 +++ llvm/lib/Transforms/Utils/LoopSimplify.cpp Fri Feb 10 19:43:37 2006 @@ -195,7 +195,6 @@ // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; - DominatorSet &DS = getAnalysis(); for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast(I++)); ) if (Value *V = PN->hasConstantValue()) { From evan.cheng at apple.com Fri Feb 10 20:05:48 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 10 Feb 2006 20:05:48 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200602110205.UAA13006@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.48 -> 1.49 --- Log message: Prevent certain nodes that have already been selected from being folded into X86 addressing mode. Currently we do not allow any node whose target node produces a chain as well as any node that is at the root of the addressing mode expression tree. --- Diffs of the changes: (+40 -19) X86ISelDAGToDAG.cpp | 59 +++++++++++++++++++++++++++++++++++----------------- 1 files changed, 40 insertions(+), 19 deletions(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.48 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.49 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.48 Fri Feb 10 16:46:26 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Feb 10 20:05:36 2006 @@ -107,7 +107,7 @@ private: void Select(SDOperand &Result, SDOperand N); - bool MatchAddress(SDOperand N, X86ISelAddressMode &AM); + bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true); bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, @@ -252,7 +252,26 @@ /// MatchAddress - Add the specified node to the specified addressing mode, /// returning true if it cannot be done. This just pattern matches for the /// addressing mode -bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM) { +bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, + bool isRoot) { + bool StopHere = false; + // If N has already been selected, we may or may not want to fold its + // operands into the addressing mode. It will result in code duplication! + // FIXME: Right now we do. That is, as long as the selected target node + // does not produce a chain. This may require a more sophisticated heuristics. + std::map::iterator CGMI= CodeGenMap.find(N.getValue(0)); + if (CGMI != CodeGenMap.end()) { + if (isRoot) + // Stop here if it is a root. It's probably not profitable to go deeper. + StopHere = true; + else { + for (unsigned i = 0, e = CGMI->second.Val->getNumValues(); i != e; ++i) { + if (CGMI->second.Val->getValueType(i) == MVT::Other) + StopHere = true; + } + } + } + switch (N.getOpcode()) { default: break; case ISD::FrameIndex: @@ -287,7 +306,7 @@ return false; case ISD::SHL: - if (AM.IndexReg.Val == 0 && AM.Scale == 1) + if (!StopHere && AM.IndexReg.Val == 0 && AM.Scale == 1) if (ConstantSDNode *CN = dyn_cast(N.Val->getOperand(1))) { unsigned Val = CN->getValue(); if (Val == 1 || Val == 2 || Val == 3) { @@ -313,7 +332,7 @@ case ISD::MUL: // X*[3,5,9] -> X+X*[2,4,8] - if (AM.IndexReg.Val == 0 && AM.BaseType == X86ISelAddressMode::RegBase && + if (!StopHere && AM.IndexReg.Val == 0 && AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) if (ConstantSDNode *CN = dyn_cast(N.Val->getOperand(1))) if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) { @@ -341,15 +360,17 @@ break; case ISD::ADD: { - X86ISelAddressMode Backup = AM; - if (!MatchAddress(N.Val->getOperand(0), AM) && - !MatchAddress(N.Val->getOperand(1), AM)) - return false; - AM = Backup; - if (!MatchAddress(N.Val->getOperand(1), AM) && - !MatchAddress(N.Val->getOperand(0), AM)) - return false; - AM = Backup; + if (!StopHere) { + X86ISelAddressMode Backup = AM; + if (!MatchAddress(N.Val->getOperand(0), AM, false) && + !MatchAddress(N.Val->getOperand(1), AM, false)) + return false; + AM = Backup; + if (!MatchAddress(N.Val->getOperand(1), AM, false) && + !MatchAddress(N.Val->getOperand(0), AM, false)) + return false; + AM = Backup; + } break; } } @@ -474,7 +495,7 @@ if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { Result = N; #ifndef NDEBUG - DEBUG(std::cerr << std::string(Indent, ' ')); + DEBUG(std::cerr << std::string(Indent-2, ' ')); DEBUG(std::cerr << "== "); DEBUG(Node->dump(CurDAG)); DEBUG(std::cerr << "\n"); @@ -487,7 +508,7 @@ if (CGMI != CodeGenMap.end()) { Result = CGMI->second; #ifndef NDEBUG - DEBUG(std::cerr << std::string(Indent, ' ')); + DEBUG(std::cerr << std::string(Indent-2, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); @@ -574,7 +595,7 @@ } #ifndef NDEBUG - DEBUG(std::cerr << std::string(Indent, ' ')); + DEBUG(std::cerr << std::string(Indent-2, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); @@ -682,7 +703,7 @@ } #ifndef NDEBUG - DEBUG(std::cerr << std::string(Indent, ' ')); + DEBUG(std::cerr << std::string(Indent-2, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); @@ -721,7 +742,7 @@ SDOperand(CurDAG->getTargetNode(Opc, VT, Result), 0); #ifndef NDEBUG - DEBUG(std::cerr << std::string(Indent, ' ')); + DEBUG(std::cerr << std::string(Indent-2, ' ')); DEBUG(std::cerr << "== "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); @@ -733,7 +754,7 @@ SelectCode(Result, N); #ifndef NDEBUG - DEBUG(std::cerr << std::string(Indent, ' ')); + DEBUG(std::cerr << std::string(Indent-2, ' ')); DEBUG(std::cerr << "=> "); DEBUG(Result.Val->dump(CurDAG)); DEBUG(std::cerr << "\n"); From lattner at cs.uiuc.edu Fri Feb 10 20:13:28 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri, 10 Feb 2006 20:13:28 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Utils/LoopSimplify.cpp Message-ID: <200602110213.UAA13076@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Utils: LoopSimplify.cpp updated: 1.63 -> 1.64 --- Log message: Make this check stricter. Disallow loop exit blocks from being shared by loops and their subloops. --- Diffs of the changes: (+7 -4) LoopSimplify.cpp | 11 +++++++---- 1 files changed, 7 insertions(+), 4 deletions(-) Index: llvm/lib/Transforms/Utils/LoopSimplify.cpp diff -u llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.63 llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.64 --- llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.63 Fri Feb 10 19:43:37 2006 +++ llvm/lib/Transforms/Utils/LoopSimplify.cpp Fri Feb 10 20:13:17 2006 @@ -109,7 +109,6 @@ return Changed; } - /// ProcessLoop - Walk the loop structure in depth first order, ensuring that /// all loops have preheaders. /// @@ -162,12 +161,15 @@ L->getExitBlocks(ExitBlocks); SetVector ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); + LoopInfo &LI = getAnalysis(); for (SetVector::iterator I = ExitBlockSet.begin(), E = ExitBlockSet.end(); I != E; ++I) { BasicBlock *ExitBlock = *I; for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); PI != PE; ++PI) - if (!L->contains(*PI)) { + // Must be exactly this loop: no subloops, parent loops, or non-loop preds + // allowed. + if (LI.getLoopFor(*PI) != L) { RewriteLoopExitBlock(L, ExitBlock); NumInserted++; Changed = true; @@ -178,6 +180,7 @@ // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. if (L->getNumBackEdges() != 1) { + // If this is really a nested loop, rip it out into a child loop. if (Loop *NL = SeparateNestedLoop(L)) { ++NumNested; @@ -310,8 +313,8 @@ std::vector OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); PI != PE; ++PI) - if (!L->contains(*PI)) // Coming in from outside the loop? - OutsideBlocks.push_back(*PI); // Keep track of it... + if (!L->contains(*PI)) // Coming in from outside the loop? + OutsideBlocks.push_back(*PI); // Keep track of it... // Split out the loop pre-header BasicBlock *NewBB = From duraid at octopus.com.au Sat Feb 11 01:32:27 2006 From: duraid at octopus.com.au (Duraid Madina) Date: Sat, 11 Feb 2006 01:32:27 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/IA64/IA64InstrInfo.td Message-ID: <200602110732.BAA14102@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/IA64: IA64InstrInfo.td updated: 1.45 -> 1.46 --- Log message: now short immediates will get matched (previously constants were all triggering movl 64bit imm fat instructions) --- Diffs of the changes: (+3 -5) IA64InstrInfo.td | 8 +++----- 1 files changed, 3 insertions(+), 5 deletions(-) Index: llvm/lib/Target/IA64/IA64InstrInfo.td diff -u llvm/lib/Target/IA64/IA64InstrInfo.td:1.45 llvm/lib/Target/IA64/IA64InstrInfo.td:1.46 --- llvm/lib/Target/IA64/IA64InstrInfo.td:1.45 Thu Jan 26 19:46:15 2006 +++ llvm/lib/Target/IA64/IA64InstrInfo.td Sat Feb 11 01:32:15 2006 @@ -99,11 +99,9 @@ return (v <= 8191 && v >= -8192); }]>; -def imm64 : PatLeaf<(i64 imm), [{ - // imm64 predicate - True if the immediate fits in a 64-bit - // field - i.e., true. used to keep movl happy - return true; -}]>; +// imm64 predicate - True if the immediate fits in a 64-bit +// field - i.e., true. used to keep movl happy +def imm64 : PatLeaf<(i64 imm)>; def ADD : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), "add $dst = $src1, $src2", From duraid at octopus.com.au Sat Feb 11 01:33:29 2006 From: duraid at octopus.com.au (Duraid Madina) Date: Sat, 11 Feb 2006 01:33:29 -0600 Subject: [llvm-commits] CVS: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Message-ID: <200602110733.BAA14114@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/IA64: IA64ISelDAGToDAG.cpp updated: 1.36 -> 1.37 --- Log message: fix storing booleans (grawp missed this one) --- Diffs of the changes: (+3 -3) IA64ISelDAGToDAG.cpp | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) Index: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp diff -u llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.36 llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.37 --- llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.36 Thu Feb 9 01:17:49 2006 +++ llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Sat Feb 11 01:33:17 2006 @@ -556,9 +556,9 @@ // then load 1 into the same reg iff the predicate to store is 1 SDOperand Tmp; Select(Tmp, N->getOperand(1)); - CurDAG->getTargetNode(IA64::TPCADDS, MVT::i64, Initial, - CurDAG->getConstant(1, MVT::i64), - Tmp); + Tmp = SDOperand(CurDAG->getTargetNode(IA64::TPCADDS, MVT::i64, Initial, + CurDAG->getConstant(1, MVT::i64), + Tmp), 0); Result = CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain); return; } From lattner at cs.uiuc.edu Sat Feb 11 02:47:59 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Sat, 11 Feb 2006 02:47:59 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Utils/LoopSimplify.cpp Message-ID: <200602110847.CAA24443@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Utils: LoopSimplify.cpp updated: 1.64 -> 1.65 --- Log message: revert my previous change, it exposed other problems. --- Diffs of the changes: (+1 -1) LoopSimplify.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Transforms/Utils/LoopSimplify.cpp diff -u llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.64 llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.65 --- llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.64 Fri Feb 10 20:13:17 2006 +++ llvm/lib/Transforms/Utils/LoopSimplify.cpp Sat Feb 11 02:47:47 2006 @@ -169,7 +169,7 @@ PI != PE; ++PI) // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. - if (LI.getLoopFor(*PI) != L) { + if (!L->contains(*PI)) { RewriteLoopExitBlock(L, ExitBlock); NumInserted++; Changed = true; From lattner at cs.uiuc.edu Sat Feb 11 03:32:00 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Sat, 11 Feb 2006 03:32:00 -0600 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200602110932.DAA14372@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.428 -> 1.429 --- Log message: Port the recent innovations in ComputeMaskedBits to SimplifyDemandedBits. This allows us to simplify on conditions where bits are not known, but they are not demanded either! This also fixes a couple of bugs in ComputeMaskedBits that were exposed during this work. In the future, swaths of instcombine should be removed, as this code subsumes a bunch of ad-hockery. --- Diffs of the changes: (+425 -211) InstructionCombining.cpp | 636 +++++++++++++++++++++++++++++++---------------- 1 files changed, 425 insertions(+), 211 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.428 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.429 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.428 Thu Feb 9 01:41:14 2006 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Sat Feb 11 03:31:47 2006 @@ -228,7 +228,9 @@ // operators. bool SimplifyCommutative(BinaryOperator &I); - bool SimplifyDemandedBits(Value *V, uint64_t Mask, unsigned Depth = 0); + bool SimplifyDemandedBits(Value *V, uint64_t Mask, + uint64_t &KnownZero, uint64_t &KnownOne, + unsigned Depth = 0); // FoldOpIntoPhi - Given a binary operator or cast instruction which has a // PHI node as operand #0, see if we can fold the instruction into the PHI @@ -406,6 +408,18 @@ ConstantInt::get(C->getType(), 1))); } +/// GetConstantInType - Return a ConstantInt with the specified type and value. +/// +static ConstantInt *GetConstantInType(const Type *Ty, uint64_t Val) { + if (Ty->isUnsigned()) + return ConstantUInt::get(Ty, Val); + int64_t SVal = Val; + SVal <<= 64-Ty->getPrimitiveSizeInBits(); + SVal >>= 64-Ty->getPrimitiveSizeInBits(); + return ConstantSInt::get(Ty, SVal); +} + + /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne /// bitsets. This code only analyzes bits in Mask, in order to short-circuit @@ -420,7 +434,7 @@ // this won't lose us code quality. if (ConstantIntegral *CI = dyn_cast(V)) { // We know all of the bits for a constant! - KnownOne = CI->getZExtValue(); + KnownOne = CI->getZExtValue() & Mask; KnownZero = ~KnownOne & Mask; return; } @@ -430,147 +444,149 @@ return; // Limit search depth. uint64_t KnownZero2, KnownOne2; - if (Instruction *I = dyn_cast(V)) { - switch (I->getOpcode()) { - case Instruction::And: - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - Mask &= ~KnownZero; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; - return; - case Instruction::Or: - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; - // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; - return; - case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-0 bits are known if clear or set in both the LHS & RHS. - uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; + Instruction *I = dyn_cast(V); + if (!I) return; + + switch (I->getOpcode()) { + case Instruction::And: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + Mask &= ~KnownZero; + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case Instruction::Or: + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + Mask &= ~KnownOne; + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case Instruction::Xor: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case Instruction::Select: + ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case Instruction::Cast: { + const Type *SrcTy = I->getOperand(0)->getType(); + if (!SrcTy->isIntegral()) return; + + // If this is an integer truncate or noop, just look in the input. + if (SrcTy->getPrimitiveSizeInBits() >= + I->getType()->getPrimitiveSizeInBits()) { + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); return; } - case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; - return; - case Instruction::Cast: { - const Type *SrcTy = I->getOperand(0)->getType(); - if (!SrcTy->isIntegral()) return; + // Sign or Zero extension. Compute the bits in the result that are not + // present in the input. + uint64_t NotIn = ~SrcTy->getIntegralTypeMask(); + uint64_t NewBits = I->getType()->getIntegralTypeMask() & NotIn; - // If this is an integer truncate or noop, just look in the input. - if (SrcTy->getPrimitiveSizeInBits() >= - I->getType()->getPrimitiveSizeInBits()) { - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - return; - } + // Handle zero extension. + if (!SrcTy->isSigned()) { + Mask &= SrcTy->getIntegralTypeMask(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + // The top bits are known to be zero. + KnownZero |= NewBits; + } else { + // Sign extension. + Mask &= SrcTy->getIntegralTypeMask(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - // Sign or Zero extension. Compute the bits in the result that are not - // present in the input. - uint64_t NotIn = ~SrcTy->getIntegralTypeMask(); - uint64_t NewBits = I->getType()->getIntegralTypeMask() & NotIn; - - // Handle zero extension. - if (!SrcTy->isSigned()) { - Mask &= SrcTy->getIntegralTypeMask(); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - // The top bits are known to be zero. + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + uint64_t InSignBit = 1ULL << (SrcTy->getPrimitiveSizeInBits()-1); + if (KnownZero & InSignBit) { // Input sign bit known zero KnownZero |= NewBits; - } else { - // Sign extension. - Mask &= SrcTy->getIntegralTypeMask(); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. - uint64_t InSignBit = 1ULL << (SrcTy->getPrimitiveSizeInBits()-1); - if (KnownZero & InSignBit) { // Input sign bit known zero - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if (KnownOne & InSignBit) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; - } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; - } + KnownOne &= ~NewBits; + } else if (KnownOne & InSignBit) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; } + } + return; + } + case Instruction::Shl: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { + Mask >>= SA->getValue(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= SA->getValue(); + KnownOne <<= SA->getValue(); + KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. return; } - case Instruction::Shl: - // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { - Mask >> SA->getValue(); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero <<= SA->getValue(); - KnownOne <<= SA->getValue(); - KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. - return; - } - break; - case Instruction::Shr: - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t HighBits = (1ULL << SA->getValue())-1; - HighBits <<= I->getType()->getPrimitiveSizeInBits()-SA->getValue(); + break; + case Instruction::Shr: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t HighBits = (1ULL << SA->getValue())-1; + HighBits <<= I->getType()->getPrimitiveSizeInBits()-SA->getValue(); + + if (I->getType()->isUnsigned()) { // Unsigned shift right. + Mask <<= SA->getValue(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + KnownZero >>= SA->getValue(); + KnownOne >>= SA->getValue(); + KnownZero |= HighBits; // high bits known zero. + } else { + Mask <<= SA->getValue(); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + KnownZero >>= SA->getValue(); + KnownOne >>= SA->getValue(); - if (I->getType()->isUnsigned()) { // Unsigned shift right. - Mask << SA->getValue(); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); - KnownZero >>= SA->getValue(); - KnownOne >>= SA->getValue(); - KnownZero |= HighBits; // high bits known zero. - } else { - Mask << SA->getValue(); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); - KnownZero >>= SA->getValue(); - KnownOne >>= SA->getValue(); - - // Handle the sign bits. - uint64_t SignBit = 1ULL << (I->getType()->getPrimitiveSizeInBits()-1); - SignBit >>= SA->getValue(); // Adjust to where it is now in the mask. - - if (KnownZero & SignBit) { // New bits are known zero. - KnownZero |= HighBits; - } else if (KnownOne & SignBit) { // New bits are known one. - KnownOne |= HighBits; - } + // Handle the sign bits. + uint64_t SignBit = 1ULL << (I->getType()->getPrimitiveSizeInBits()-1); + SignBit >>= SA->getValue(); // Adjust to where it is now in the mask. + + if (KnownZero & SignBit) { // New bits are known zero. + KnownZero |= HighBits; + } else if (KnownOne & SignBit) { // New bits are known one. + KnownOne |= HighBits; } - return; } - break; + return; } + break; } } @@ -584,19 +600,54 @@ return (KnownZero & Mask) == Mask; } -/// SimplifyDemandedBits - Look at V. At this point, we know that only the Mask -/// bits of the result of V are ever used downstream. If we can use this -/// information to simplify V, return V and set NewVal to the new value we -/// should use in V's place. -bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t Mask, +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, + uint64_t Demanded) { + ConstantInt *OpC = dyn_cast(I->getOperand(OpNo)); + if (!OpC) return false; + + // If there are no bits set that aren't demanded, nothing to do. + if ((~Demanded & OpC->getZExtValue()) == 0) + return false; + + // This is producing any bits that are not needed, shrink the RHS. + uint64_t Val = Demanded & OpC->getZExtValue(); + I->setOperand(OpNo, GetConstantInType(OpC->getType(), Val)); + return true; +} + + + +/// SimplifyDemandedBits - Look at V. At this point, we know that only the +/// DemandedMask bits of the result of V are ever used downstream. If we can +/// use this information to simplify V, do so and return true. Otherwise, +/// analyze the expression and return a mask of KnownOne and KnownZero bits for +/// the expression (used to simplify the caller). The KnownZero/One bits may +/// only be accurate for those bits in the DemandedMask. +bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, + uint64_t &KnownZero, uint64_t &KnownOne, unsigned Depth) { + if (ConstantIntegral *CI = dyn_cast(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getZExtValue() & DemandedMask; + KnownZero = ~KnownOne & DemandedMask; + return false; + } + + KnownZero = KnownOne = 0; if (!V->hasOneUse()) { // Other users may use these bits. - if (Depth != 0) // Not at the root. + if (Depth != 0) { // Not at the root. + // Just compute the KnownZero/KnownOne bits to simplify things downstream. + ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); return false; + } // If this is the root being simplified, allow it to have multiple uses, - // just set the Mask to all bits. - Mask = V->getType()->getIntegralTypeMask(); - } else if (Mask == 0) { // Not demanding any bits from V. + // just set the DemandedMask to all bits. + DemandedMask = V->getType()->getIntegralTypeMask(); + } else if (DemandedMask == 0) { // Not demanding any bits from V. if (V != UndefValue::get(V->getType())) return UpdateValueUsesWith(V, UndefValue::get(V->getType())); return false; @@ -607,99 +658,257 @@ Instruction *I = dyn_cast(V); if (!I) return false; // Only analyze instructions. + uint64_t KnownZero2, KnownOne2; switch (I->getOpcode()) { default: break; case Instruction::And: - if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { - // Only demanding an intersection of the bits. - if (SimplifyDemandedBits(I->getOperand(0), RHS->getRawValue() & Mask, - Depth+1)) - return true; - if (~Mask & RHS->getZExtValue()) { - // If this is producing any bits that are not needed, simplify the RHS. - uint64_t Val = Mask & RHS->getZExtValue(); - Constant *RHS = - ConstantUInt::get(I->getType()->getUnsignedVersion(), Val); - if (I->getType()->isSigned()) - RHS = ConstantExpr::getCast(RHS, I->getType()); - I->setOperand(1, RHS); - return UpdateValueUsesWith(I, I); + // If either the LHS or the RHS are Zero, the result is zero. + if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If something is known zero on the RHS, the bits aren't demanded on the + // LHS. + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask & ~KnownZero, + KnownZero2, KnownOne2, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((DemandedMask & ~KnownZero2 & KnownOne) == (DemandedMask & ~KnownZero2)) + return UpdateValueUsesWith(I, I->getOperand(0)); + if ((DemandedMask & ~KnownZero & KnownOne2) == (DemandedMask & ~KnownZero)) + return UpdateValueUsesWith(I, I->getOperand(1)); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return UpdateValueUsesWith(I, I); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case Instruction::Or: + if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask & ~KnownOne, + KnownZero2, KnownOne2, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((DemandedMask & ~KnownOne2 & KnownZero) == DemandedMask & ~KnownOne2) + return UpdateValueUsesWith(I, I->getOperand(0)); + if ((DemandedMask & ~KnownOne & KnownZero2) == DemandedMask & ~KnownOne) + return UpdateValueUsesWith(I, I->getOperand(1)); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return UpdateValueUsesWith(I, I); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case Instruction::Xor: { + if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, + KnownZero2, KnownOne2, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((DemandedMask & KnownZero) == DemandedMask) + return UpdateValueUsesWith(I, I->getOperand(0)); + if ((DemandedMask & KnownZero2) == DemandedMask) + return UpdateValueUsesWith(I, I->getOperand(1)); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + uint64_t KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + if (uint64_t UnknownBits = DemandedMask & ~(KnownZeroOut|KnownOneOut)) { + if ((UnknownBits & (KnownZero|KnownZero2)) == UnknownBits) { + Instruction *Or = + BinaryOperator::createOr(I->getOperand(0), I->getOperand(1), + I->getName()); + InsertNewInstBefore(Or, *I); + return UpdateValueUsesWith(I, Or); } } - // Walk the LHS and the RHS. - return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1) || - SimplifyDemandedBits(I->getOperand(1), Mask, Depth+1); - case Instruction::Or: - case Instruction::Xor: - if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { - // If none of the [x]or'd in bits are demanded, don't both with the [x]or. - if ((Mask & RHS->getRawValue()) == 0) - return UpdateValueUsesWith(I, I->getOperand(0)); - - // Otherwise, for an OR, we only demand those bits not set by the OR. - if (I->getOpcode() == Instruction::Or) - Mask &= ~RHS->getRawValue(); - return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1); - } - // Walk the LHS and the RHS. - return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1) || - SimplifyDemandedBits(I->getOperand(1), Mask, Depth+1); + + // If the RHS is a constant, see if we can simplify it. + // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return UpdateValueUsesWith(I, I); + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + } + case Instruction::Select: + if (SimplifyDemandedBits(I->getOperand(2), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return true; + if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, + KnownZero2, KnownOne2, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return UpdateValueUsesWith(I, I); + if (ShrinkDemandedConstant(I, 2, DemandedMask)) + return UpdateValueUsesWith(I, I); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; case Instruction::Cast: { const Type *SrcTy = I->getOperand(0)->getType(); - if (SrcTy == Type::BoolTy) - return SimplifyDemandedBits(I->getOperand(0), Mask&1, Depth+1); + if (!SrcTy->isIntegral()) return false; - if (!SrcTy->isInteger()) return false; - - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); - // If this is a sign-extend, treat specially. - if (SrcTy->isSigned() && - SrcBits < I->getType()->getPrimitiveSizeInBits()) { - // If none of the top bits are demanded, convert this into an unsigned - // extend instead of a sign extend. - if ((Mask & ((1ULL << SrcBits)-1)) == 0) { + // If this is an integer truncate or noop, just look in the input. + if (SrcTy->getPrimitiveSizeInBits() >= + I->getType()->getPrimitiveSizeInBits()) { + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + + // Sign or Zero extension. Compute the bits in the result that are not + // present in the input. + uint64_t NotIn = ~SrcTy->getIntegralTypeMask(); + uint64_t NewBits = I->getType()->getIntegralTypeMask() & NotIn; + + // Handle zero extension. + if (!SrcTy->isSigned()) { + DemandedMask &= SrcTy->getIntegralTypeMask(); + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + // The top bits are known to be zero. + KnownZero |= NewBits; + } else { + // Sign extension. + if (SimplifyDemandedBits(I->getOperand(0), + DemandedMask & SrcTy->getIntegralTypeMask(), + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + uint64_t InSignBit = 1ULL << (SrcTy->getPrimitiveSizeInBits()-1); + + // If the input sign bit is known zero, or if the NewBits are not demanded + // convert this into a zero extension. + if ((KnownZero & InSignBit) || (NewBits & ~DemandedMask) == NewBits) { // Convert to unsigned first. Instruction *NewVal; NewVal = new CastInst(I->getOperand(0), SrcTy->getUnsignedVersion(), I->getOperand(0)->getName()); InsertNewInstBefore(NewVal, *I); + // Then cast that to the destination type. NewVal = new CastInst(NewVal, I->getType(), I->getName()); InsertNewInstBefore(NewVal, *I); return UpdateValueUsesWith(I, NewVal); + } else if (KnownOne & InSignBit) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; } - - // Otherwise, the high-bits *are* demanded. This means that the code - // implicitly demands computation of the sign bit of the input, make sure - // we explicitly include it in Mask. - Mask |= 1ULL << (SrcBits-1); } - - // If this is an extension, the top bits are ignored. - Mask &= SrcTy->getIntegralTypeMask(); - return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1); + break; } - case Instruction::Select: - // Simplify the T and F values if they are not demanded. - return SimplifyDemandedBits(I->getOperand(2), Mask, Depth+1) || - SimplifyDemandedBits(I->getOperand(1), Mask, Depth+1); case Instruction::Shl: - // We only demand the low bits of the input. - if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) - return SimplifyDemandedBits(I->getOperand(0), Mask >> SA->getValue(), - Depth+1); + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask >> SA->getValue(), + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= SA->getValue(); + KnownOne <<= SA->getValue(); + KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. + } break; case Instruction::Shr: - // We only demand the high bits of the input. - if (I->getType()->isUnsigned()) - if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { - Mask <<= SA->getValue(); - Mask &= I->getType()->getIntegralTypeMask(); - return SimplifyDemandedBits(I->getOperand(0), Mask, Depth+1); + if (ConstantUInt *SA = dyn_cast(I->getOperand(1))) { + unsigned ShAmt = SA->getValue(); + + // Compute the new bits that are at the top now. + uint64_t HighBits = (1ULL << ShAmt)-1; + HighBits <<= I->getType()->getPrimitiveSizeInBits() - ShAmt; + + if (I->getType()->isUnsigned()) { // Unsigned shift right. + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask << ShAmt, + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero >>= ShAmt; + KnownOne >>= ShAmt; + KnownZero |= HighBits; // high bits known zero. + } else { // Signed shift right. + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask << ShAmt, + KnownZero, KnownOne, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero >>= SA->getValue(); + KnownOne >>= SA->getValue(); + + // Handle the sign bits. + uint64_t SignBit = 1ULL << (I->getType()->getPrimitiveSizeInBits()-1); + SignBit >>= SA->getValue(); // Adjust to where it is now in the mask. + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) { + // Convert the input to unsigned. + Instruction *NewVal; + NewVal = new CastInst(I->getOperand(0), + I->getType()->getUnsignedVersion(), + I->getOperand(0)->getName()); + InsertNewInstBefore(NewVal, *I); + // Perform the unsigned shift right. + NewVal = new ShiftInst(Instruction::Shr, NewVal, SA, I->getName()); + InsertNewInstBefore(NewVal, *I); + // Then cast that to the destination type. + NewVal = new CastInst(NewVal, I->getType(), I->getName()); + InsertNewInstBefore(NewVal, *I); + return UpdateValueUsesWith(I, NewVal); + } else if (KnownOne & SignBit) { // New bits are known one. + KnownOne |= HighBits; + } } - // FIXME: handle signed shr, demanding the appropriate bits. If the top - // bits aren't demanded, strength reduce to a logical SHR instead. + } break; } + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + return UpdateValueUsesWith(I, GetConstantInType(I->getType(), KnownOne)); return false; } @@ -2021,7 +2230,9 @@ // See if we can simplify any instructions used by the LHS whose sole // purpose is to compute bits we don't care about. - if (SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask())) + uint64_t KnownZero, KnownOne; + if (SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask(), + KnownZero, KnownOne)) return &I; if (ConstantIntegral *AndRHS = dyn_cast(Op1)) { @@ -4378,9 +4589,12 @@ // See if we can simplify any instructions used by the LHS whose sole // purpose is to compute bits we don't care about. - if (CI.getType()->isInteger() && CI.getOperand(0)->getType()->isIntegral() && - SimplifyDemandedBits(&CI, CI.getType()->getIntegralTypeMask())) - return &CI; + if (CI.getType()->isInteger() && CI.getOperand(0)->getType()->isIntegral()) { + uint64_t KnownZero, KnownOne; + if (SimplifyDemandedBits(&CI, CI.getType()->getIntegralTypeMask(), + KnownZero, KnownOne)) + return &CI; + } // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! From lattner at cs.uiuc.edu Sat Feb 11 03:33:40 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Sat, 11 Feb 2006 03:33:40 -0600 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/InstCombine/pow.ll Message-ID: <200602110933.DAA14682@zion.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/InstCombine: pow.ll (r1.7) removed --- Log message: This is implemented by the simplify-libcalls pass, not instcombine --- Diffs of the changes: (+0 -0) 0 files changed From lattner at cs.uiuc.edu Sat Feb 11 03:37:20 2006 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Sat, 11 Feb 2006 03:37:20 -0600 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/SimplifyLibCalls/Pow.ll Message-ID: <200602110937.DAA15140@zion.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/SimplifyLibCalls: Pow.ll updated: 1.3 -> 1.4 --- Log message: Update comments to be actually accurate --- Diffs of the changes: (+19 -13) Pow.ll | 32 +++++++++++++++++++------------- 1 files changed, 19 insertions(+), 13 deletions(-) Index: llvm/test/Regression/Transforms/SimplifyLibCalls/Pow.ll diff -u llvm/test/Regression/Transforms/SimplifyLibCalls/Pow.ll:1.3 llvm/test/Regression/Transforms/SimplifyLibCalls/Pow.ll:1.4 --- llvm/test/Regression/Transforms/SimplifyLibCalls/Pow.ll:1.3 Wed Aug 24 12:20:30 2005 +++ llvm/test/Regression/Transforms/SimplifyLibCalls/Pow.ll Sat Feb 11 03:37:07 2006 @@ -1,17 +1,23 @@ -; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | not grep 'call.*pow' +; Testcase for calls to the standard C "pow" function +; +; Equivalent to: http://gcc.gnu.org/ml/gcc-patches/2003-02/msg01786.html +; RUN: llvm-as < %s | opt -simplify-libcalls -disable-output && +; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | not grep 'call double .pow' -declare double %pow(double,double) -%fpstorage = global double 5.0 +declare double %pow(double, double) -implementation ; Functions: +double %test1(double %X) { + %Y = call double %pow(double %X, double 0.0) + ret double %Y ; x^0.0 always equals 1.0 +} + +double %test2(double %X) { + %Y = call double %pow(double %X, double -0.0) + ret double %Y ; x^-0.0 always equals 1.0 +} -int %main () { - %fpnum = load double* %fpstorage; - %one = call double %pow(double 1.0, double %fpnum) - %two = call double %pow(double %one, double 0.5) - %three = call double %pow(double %two, double 1.0) - %four = call double %pow(double %three, double -1.0) - %five = call double %pow(double %four, double 0.0) - %result = cast double %five to int - ret int %result +double %test3(double %X) { + %Y = call double %pow(double 1.0, double %X) + ret double %Y ; 1.0^x always equals 1.0 } + From llvm at cs.uiuc.edu Sat Feb 11 04:24:36 2006 From: llvm at cs.uiuc.edu (LLVM) Date: Sat, 11 Feb 2006 04:24:36 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/ Message-ID: <200602111024.EAA23555@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/JM: --- Log message: Directory /var/cvs/llvm/llvm-test/MultiSource/Applications/JM added to the repository --- Diffs of the changes: (+0 -0) 0 files changed From llvm at cs.uiuc.edu Sat Feb 11 04:25:26 2006 From: llvm at cs.uiuc.edu (LLVM) Date: Sat, 11 Feb 2006 04:25:26 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/ldecod/ Message-ID: <200602111025.EAA23611@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/JM/ldecod: --- Log message: Directory /var/cvs/llvm/llvm-test/MultiSource/Applications/JM/ldecod added to the repository --- Diffs of the changes: (+0 -0) 0 files changed From llvm at cs.uiuc.edu Sat Feb 11 04:25:26 2006 From: llvm at cs.uiuc.edu (LLVM) Date: Sat, 11 Feb 2006 04:25:26 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/lencod/ Message-ID: <200602111025.EAA23615@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/JM/lencod: --- Log message: Directory /var/cvs/llvm/llvm-test/MultiSource/Applications/JM/lencod added to the repository --- Diffs of the changes: (+0 -0) 0 files changed From llvm at cs.uiuc.edu Sat Feb 11 04:25:59 2006 From: llvm at cs.uiuc.edu (LLVM) Date: Sat, 11 Feb 2006 04:25:59 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/ldecod/data/ Message-ID: <200602111025.EAA23659@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/JM/ldecod/data: --- Log message: Directory /var/cvs/llvm/llvm-test/MultiSource/Applications/JM/ldecod/data added to the repository --- Diffs of the changes: (+0 -0) 0 files changed From llvm at cs.uiuc.edu Sat Feb 11 04:27:02 2006 From: llvm at cs.uiuc.edu (LLVM) Date: Sat, 11 Feb 2006 04:27:02 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/lencod/data/ Message-ID: <200602111027.EAA23957@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/JM/lencod/data: --- Log message: Directory /var/cvs/llvm/llvm-test/MultiSource/Applications/JM/lencod/data added to the repository --- Diffs of the changes: (+0 -0) 0 files changed From evan.cheng at apple.com Sat Feb 11 04:33:42 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 11 Feb 2006 04:33:42 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/ldecod/Makefile annexb.c annexb.h biaridecod.c biaridecod.h block.c block.h cabac.c cabac.h context_ini.c context_ini.h contributors.h ctx_tables.h defines.h elements.h erc_api.c erc_api.h erc_do.h erc_do_i.c erc_do_p.c erc_globals.h errorconcealment.c errorconcealment.h filehandle.c fmo.c fmo.h global.h header.c header.h image.c image.h ldecod.c leaky_bucket.c leaky_bucket.h loopFilter.c loopfilter.h macroblock.c macroblock.h mb_access.c mb_access.h mbuffer.c mbuffer.h memalloc.c memalloc.h nal.c nal_part.c nalu.c nalu.h nalucommon.c nalucommon.h output.c output.h parset.c parset.h parsetcommon.c parsetcommon.h rtp.c rtp.h sei.c sei.h transform8x8.c transform8x8.h vlc.c vlc.h Message-ID: <200602111033.EAA24539@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/JM/ldecod: Makefile added (r1.1) annexb.c added (r1.1) annexb.h added (r1.1) biaridecod.c added (r1.1) biaridecod.h added (r1.1) block.c added (r1.1) block.h added (r1.1) cabac.c added (r1.1) cabac.h added (r1.1) context_ini.c added (r1.1) context_ini.h added (r1.1) contributors.h added (r1.1) ctx_tables.h added (r1.1) defines.h added (r1.1) elements.h added (r1.1) erc_api.c added (r1.1) erc_api.h added (r1.1) erc_do.h added (r1.1) erc_do_i.c added (r1.1) erc_do_p.c added (r1.1) erc_globals.h added (r1.1) errorconcealment.c added (r1.1) errorconcealment.h added (r1.1) filehandle.c added (r1.1) fmo.c added (r1.1) fmo.h added (r1.1) global.h added (r1.1) header.c added (r1.1) header.h added (r1.1) image.c added (r1.1) image.h added (r1.1) ldecod.c added (r1.1) leaky_bucket.c added (r1.1) leaky_bucket.h added (r1.1) loopFilter.c added (r1.1) loopfilter.h added (r1.1) macroblock.c added (r1.1) macroblock.h added (r1.1) mb_access.c added (r1.1) mb_access.h added (r1.1) mbuffer.c added (r1.1) mbuffer.h added (r1.1) memalloc.c added (r1.1) memalloc.h added (r1.1) nal.c added (r1.1) nal_part.c added (r1.1) nalu.c added (r1.1) nalu.h added (r1.1) nalucommon.c added (r1.1) nalucommon.h added (r1.1) output.c added (r1.1) output.h added (r1.1) parset.c added (r1.1) parset.h added (r1.1) parsetcommon.c added (r1.1) parsetcommon.h added (r1.1) rtp.c added (r1.1) rtp.h added (r1.1) sei.c added (r1.1) sei.h added (r1.1) transform8x8.c added (r1.1) transform8x8.h added (r1.1) vlc.c added (r1.1) vlc.h added (r1.1) --- Log message: Added H.264 reference encoder / decoder from ITU to LLVM test suite. --- Diffs of the changes: (+32096 -0) Makefile | 8 annexb.c | 313 +++ annexb.h | 28 biaridecod.c | 386 +++ biaridecod.h | 39 block.c | 1408 ++++++++++++++ block.h | 32 cabac.c | 1802 ++++++++++++++++++ cabac.h | 54 context_ini.c | 74 context_ini.h | 23 contributors.h | 202 ++ ctx_tables.h | 729 +++++++ defines.h | 165 + elements.h | 107 + erc_api.c | 382 +++ erc_api.h | 169 + erc_do.h | 44 erc_do_i.c | 541 +++++ erc_do_p.c | 1817 ++++++++++++++++++ erc_globals.h | 52 errorconcealment.c | 242 ++ errorconcealment.h | 21 filehandle.c | 166 + fmo.c | 551 +++++ fmo.h | 30 global.h | 815 ++++++++ header.c | 791 ++++++++ header.h | 23 image.c | 1834 ++++++++++++++++++ image.h | 24 ldecod.c | 940 +++++++++ leaky_bucket.c | 138 + leaky_bucket.h | 26 loopFilter.c | 478 ++++ loopfilter.h | 18 macroblock.c | 5118 +++++++++++++++++++++++++++++++++++++++++++++++++++++ macroblock.h | 338 +++ mb_access.c | 677 +++++++ mb_access.h | 30 mbuffer.c | 3830 +++++++++++++++++++++++++++++++++++++++ mbuffer.h | 218 ++ memalloc.c | 591 ++++++ memalloc.h | 55 nal.c | 118 + nal_part.c | 44 nalu.c | 44 nalu.h | 28 nalucommon.c | 75 nalucommon.h | 55 output.c | 673 ++++++ output.h | 27 parset.c | 684 +++++++ parset.h | 42 parsetcommon.c | 218 ++ parsetcommon.h | 214 ++ rtp.c | 364 +++ rtp.h | 48 sei.c | 1571 ++++++++++++++++ sei.h | 65 transform8x8.c | 1035 ++++++++++ transform8x8.h | 38 vlc.c | 1368 ++++++++++++++ vlc.h | 56 64 files changed, 32096 insertions(+) Index: llvm-test/MultiSource/Applications/JM/ldecod/Makefile diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/Makefile:1.1 *** /dev/null Sat Feb 11 04:33:32 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/Makefile Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,8 ---- + LEVEL = ../../../.. + PROG = ldecod + CPPFLAGS = -D __USE_LARGEFILE64 -D _FILE_OFFSET_BITS=64 + LDFLAGS = -lm $(TOOLLINKOPTS) + + RUN_OPTIONS = -i data/test.264 -o data/test_dec.yuv -r data/test_rec.yuv + + include ../../../Makefile.multisrc Index: llvm-test/MultiSource/Applications/JM/ldecod/annexb.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/annexb.c:1.1 *** /dev/null Sat Feb 11 04:33:41 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/annexb.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,313 ---- + + /*! + ************************************************************************************* + * \file annexb.c + * + * \brief + * Annex B Byte Stream format + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + ************************************************************************************* + */ + + #include + #include + + #include "global.h" + #include "annexb.h" + #include "memalloc.h" + + + FILE *bits = NULL; //!< the bit stream file + static int FindStartCode (unsigned char *Buf, int zeros_in_startcode); + + int IsFirstByteStreamNALU=1; + int LastAccessUnitExists=0; + int NALUCount=0; + + + /*! + ************************************************************************ + * \brief + * Returns the size of the NALU (bits between start codes in case of + * Annex B. nalu->buf and nalu->len are filled. Other field in + * nalu-> remain uninitialized (will be taken care of by NALUtoRBSP. + * + * \return + * 0 if there is nothing any more to read (EOF) + * -1 in case of any error + * + * \note Side-effect: Returns length of start-code in bytes. + * + * \note + * GetAnnexbNALU expects start codes at byte aligned positions in the file + * + ************************************************************************ + */ + + int GetAnnexbNALU (NALU_t *nalu) + { + int info2, info3, pos = 0; + int StartCodeFound, rewind; + unsigned char *Buf; + int LeadingZero8BitsCount=0, TrailingZero8Bits=0; + + if ((Buf = (unsigned char*)calloc (nalu->max_size , sizeof(char))) == NULL) no_mem_exit("GetAnnexbNALU: Buf"); + + while(!feof(bits) && (Buf[pos++]=fgetc(bits))==0); + + if(feof(bits)) + { + if(pos==0) + return 0; + else + { + printf( "GetAnnexbNALU can't read start code\n"); + free(Buf); + return -1; + } + } + + if(Buf[pos-1]!=1) + { + printf ("GetAnnexbNALU: no Start Code at the begin of the NALU, return -1\n"); + free(Buf); + return -1; + } + + if(pos<3) + { + printf ("GetAnnexbNALU: no Start Code at the begin of the NALU, return -1\n"); + free(Buf); + return -1; + } + else if(pos==3) + { + nalu->startcodeprefix_len = 3; + LeadingZero8BitsCount = 0; + } + else + { + LeadingZero8BitsCount = pos-4; + nalu->startcodeprefix_len = 4; + } + + //the 1st byte stream NAL unit can has leading_zero_8bits, but subsequent ones are not + //allowed to contain it since these zeros(if any) are considered trailing_zero_8bits + //of the previous byte stream NAL unit. + if(!IsFirstByteStreamNALU && LeadingZero8BitsCount>0) + { + printf ("GetAnnexbNALU: The leading_zero_8bits syntax can only be present in the first byte stream NAL unit, return -1\n"); + free(Buf); + return -1; + } + IsFirstByteStreamNALU=0; + + StartCodeFound = 0; + info2 = 0; + info3 = 0; + + while (!StartCodeFound) + { + if (feof (bits)) + { + //Count the trailing_zero_8bits + while(Buf[pos-2-TrailingZero8Bits]==0) + TrailingZero8Bits++; + nalu->len = (pos-1)-nalu->startcodeprefix_len-LeadingZero8BitsCount-TrailingZero8Bits; + memcpy (nalu->buf, &Buf[LeadingZero8BitsCount+nalu->startcodeprefix_len], nalu->len); + nalu->forbidden_bit = (nalu->buf[0]>>7) & 1; + nalu->nal_reference_idc = (nalu->buf[0]>>5) & 3; + nalu->nal_unit_type = (nalu->buf[0]) & 0x1f; + + // printf ("GetAnnexbNALU, eof case: pos %d nalu->len %d, nalu->reference_idc %d, nal_unit_type %d \n", pos, nalu->len, nalu->nal_reference_idc, nalu->nal_unit_type); + + #if TRACE + fprintf (p_trace, "\n\nLast NALU in File\n\n"); + fprintf (p_trace, "Annex B NALU w/ %s startcode, len %d, forbidden_bit %d, nal_reference_idc %d, nal_unit_type %d\n\n", + nalu->startcodeprefix_len == 4?"long":"short", nalu->len, nalu->forbidden_bit, nalu->nal_reference_idc, nalu->nal_unit_type); + fflush (p_trace); + #endif + free(Buf); + return pos-1; + } + Buf[pos++] = fgetc (bits); + info3 = FindStartCode(&Buf[pos-4], 3); + if(info3 != 1) + info2 = FindStartCode(&Buf[pos-3], 2); + StartCodeFound = (info2 == 1 || info3 == 1); + } + + //Count the trailing_zero_8bits + if(info3==1) //if the detected start code is 00 00 01, trailing_zero_8bits is sure not to be present + { + while(Buf[pos-5-TrailingZero8Bits]==0) + TrailingZero8Bits++; + } + // Here, we have found another start code (and read length of startcode bytes more than we should + // have. Hence, go back in the file + rewind = 0; + if(info3 == 1) + rewind = -4; + else if (info2 == 1) + rewind = -3; + else + printf(" Panic: Error in next start code search \n"); + + if (0 != fseek (bits, rewind, SEEK_CUR)) + { + snprintf (errortext, ET_SIZE, "GetAnnexbNALU: Cannot fseek %d in the bit stream file", rewind); + free(Buf); + error(errortext, 600); + } + + // Here the leading zeros(if any), Start code, the complete NALU, trailing zeros(if any) + // and the next start code is in the Buf. + // The size of Buf is pos, pos+rewind are the number of bytes excluding the next + // start code, and (pos+rewind)-startcodeprefix_len-LeadingZero8BitsCount-TrailingZero8Bits + // is the size of the NALU. + + nalu->len = (pos+rewind)-nalu->startcodeprefix_len-LeadingZero8BitsCount-TrailingZero8Bits; + memcpy (nalu->buf, &Buf[LeadingZero8BitsCount+nalu->startcodeprefix_len], nalu->len); + nalu->forbidden_bit = (nalu->buf[0]>>7) & 1; + nalu->nal_reference_idc = (nalu->buf[0]>>5) & 3; + nalu->nal_unit_type = (nalu->buf[0]) & 0x1f; + + + //printf ("GetAnnexbNALU, regular case: pos %d nalu->len %d, nalu->reference_idc %d, nal_unit_type %d \n", pos, nalu->len, nalu->nal_reference_idc, nalu->nal_unit_type); + #if TRACE + fprintf (p_trace, "\n\nAnnex B NALU w/ %s startcode, len %d, forbidden_bit %d, nal_reference_idc %d, nal_unit_type %d\n\n", + nalu->startcodeprefix_len == 4?"long":"short", nalu->len, nalu->forbidden_bit, nalu->nal_reference_idc, nalu->nal_unit_type); + fflush (p_trace); + #endif + + free(Buf); + + return (pos+rewind); + } + + + + + /*! + ************************************************************************ + * \brief + * Opens the bit stream file named fn + * \return + * none + ************************************************************************ + */ + void OpenBitstreamFile (char *fn) + { + if (NULL == (bits=fopen(fn, "rb"))) + { + snprintf (errortext, ET_SIZE, "Cannot open Annex B ByteStream file '%s'", input->infile); + error(errortext,500); + } + } + + + /*! + ************************************************************************ + * \brief + * Closes the bit stream file + ************************************************************************ + */ + void CloseBitstreamFile() + { + fclose (bits); + } + + + /*! + ************************************************************************ + * \brief + * returns if new start code is found at byte aligned position buf. + * new-startcode is of form N 0x00 bytes, followed by a 0x01 byte. + * + * \return + * 1 if start-code is found or \n + * 0, indicating that there is no start code + * + * \param Buf + * pointer to byte-stream + * \param zeros_in_startcode + * indicates number of 0x00 bytes in start-code. + ************************************************************************ + */ + static int FindStartCode (unsigned char *Buf, int zeros_in_startcode) + { + int info; + int i; + + info = 1; + for (i = 0; i < zeros_in_startcode; i++) + if(Buf[i] != 0) + info = 0; + + if(Buf[i] != 1) + info = 0; + return info; + } + + void CheckZeroByteNonVCL(NALU_t *nalu, int * ret) + { + int CheckZeroByte=0; + + //This function deals only with non-VCL NAL units + if(nalu->nal_unit_type>=1&&nalu->nal_unit_type<=5) + return; + + //for SPS and PPS, zero_byte shall exist + if(nalu->nal_unit_type==NALU_TYPE_SPS || nalu->nal_unit_type==NALU_TYPE_PPS) + CheckZeroByte=1; + //check the possibility of the current NALU to be the start of a new access unit, according to 7.4.1.2.3 + if(nalu->nal_unit_type==NALU_TYPE_AUD || nalu->nal_unit_type==NALU_TYPE_SPS || + nalu->nal_unit_type==NALU_TYPE_PPS || nalu->nal_unit_type==NALU_TYPE_SEI || + (nalu->nal_unit_type>=13 && nalu->nal_unit_type<=18)) + { + if(LastAccessUnitExists) + { + LastAccessUnitExists=0; //deliver the last access unit to decoder + NALUCount=0; + } + } + NALUCount++; + //for the first NAL unit in an access unit, zero_byte shall exists + if(NALUCount==1) + CheckZeroByte=1; + if(CheckZeroByte && nalu->startcodeprefix_len==3) + { + printf("warning: zero_byte shall exist\n"); + //because it is not a very serious problem, we may not indicate an error by setting ret to -1 + //*ret=-1; + } + } + + void CheckZeroByteVCL(NALU_t *nalu, int * ret) + { + int CheckZeroByte=0; + + //This function deals only with VCL NAL units + if(!(nalu->nal_unit_type>=1&&nalu->nal_unit_type<=5)) + return; + + if(LastAccessUnitExists) + { + NALUCount=0; + } + NALUCount++; + //the first VCL NAL unit that is the first NAL unit after last VCL NAL unit indicates + //the start of a new access unit and hence the first NAL unit of the new access unit. (sounds like a tongue twister :-) + if(NALUCount==1) + CheckZeroByte=1; + LastAccessUnitExists=1; + if(CheckZeroByte && nalu->startcodeprefix_len==3) + { + printf("warning: zero_byte shall exist\n"); + //because it is not a very serious problem, we may not indicate an error by setting ret to -1 + //*ret=-1; + } + } Index: llvm-test/MultiSource/Applications/JM/ldecod/annexb.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/annexb.h:1.1 *** /dev/null Sat Feb 11 04:33:41 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/annexb.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,28 ---- + + /*! + ************************************************************************************* + * \file annexb.h + * + * \brief + * Annex B byte stream buffer handling. + * + ************************************************************************************* + */ + + #ifndef _ANNEXB_H_ + #define _ANNEXB_H_ + + #include "nalucommon.h" + + extern int IsFirstByteStreamNALU; + extern int LastAccessUnitExists; + extern int NALUCount; + + int GetAnnexbNALU (NALU_t *nalu); + void OpenBitstreamFile (char *fn); + void CloseBitstreamFile(); + void CheckZeroByteNonVCL(NALU_t *nalu, int * ret); + void CheckZeroByteVCL(NALU_t *nalu, int * ret); + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/biaridecod.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/biaridecod.c:1.1 *** /dev/null Sat Feb 11 04:33:41 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/biaridecod.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,386 ---- + + /*! + ************************************************************************************* + * \file biaridecod.c + * + * \brief + * binary arithmetic decoder routines + * \date + * 21. Oct 2000 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Gabi Blaettermann + ************************************************************************************* + */ + + #include + + #include "global.h" + #include "memalloc.h" + + extern int symbolCount; + + int binCount = 0; + + #define Dbuffer (dep->Dbuffer) + #define Dbits_to_go (dep->Dbits_to_go) + #define Dcodestrm (dep->Dcodestrm) + #define Dcodestrm_len (dep->Dcodestrm_len) + + #define B_BITS 10 // Number of bits to represent the whole coding interval + #define HALF (1 << (B_BITS-1)) + #define QUARTER (1 << (B_BITS-2)) + + /* Range table for LPS */ + const byte rLPS_table_64x4[64][4]= + { + { 128, 176, 208, 240}, + { 128, 167, 197, 227}, + { 128, 158, 187, 216}, + { 123, 150, 178, 205}, + { 116, 142, 169, 195}, + { 111, 135, 160, 185}, + { 105, 128, 152, 175}, + { 100, 122, 144, 166}, + { 95, 116, 137, 158}, + { 90, 110, 130, 150}, + { 85, 104, 123, 142}, + { 81, 99, 117, 135}, + { 77, 94, 111, 128}, + { 73, 89, 105, 122}, + { 69, 85, 100, 116}, + { 66, 80, 95, 110}, + { 62, 76, 90, 104}, + { 59, 72, 86, 99}, + { 56, 69, 81, 94}, + { 53, 65, 77, 89}, + { 51, 62, 73, 85}, + { 48, 59, 69, 80}, + { 46, 56, 66, 76}, + { 43, 53, 63, 72}, + { 41, 50, 59, 69}, + { 39, 48, 56, 65}, + { 37, 45, 54, 62}, + { 35, 43, 51, 59}, + { 33, 41, 48, 56}, + { 32, 39, 46, 53}, + { 30, 37, 43, 50}, + { 29, 35, 41, 48}, + { 27, 33, 39, 45}, + { 26, 31, 37, 43}, + { 24, 30, 35, 41}, + { 23, 28, 33, 39}, + { 22, 27, 32, 37}, + { 21, 26, 30, 35}, + { 20, 24, 29, 33}, + { 19, 23, 27, 31}, + { 18, 22, 26, 30}, + { 17, 21, 25, 28}, + { 16, 20, 23, 27}, + { 15, 19, 22, 25}, + { 14, 18, 21, 24}, + { 14, 17, 20, 23}, + { 13, 16, 19, 22}, + { 12, 15, 18, 21}, + { 12, 14, 17, 20}, + { 11, 14, 16, 19}, + { 11, 13, 15, 18}, + { 10, 12, 15, 17}, + { 10, 12, 14, 16}, + { 9, 11, 13, 15}, + { 9, 11, 12, 14}, + { 8, 10, 12, 14}, + { 8, 9, 11, 13}, + { 7, 9, 11, 12}, + { 7, 9, 10, 12}, + { 7, 8, 10, 11}, + { 6, 8, 9, 11}, + { 6, 7, 9, 10}, + { 6, 7, 8, 9}, + { 2, 2, 2, 2} + }; + + + + const unsigned short AC_next_state_MPS_64[64] = + { + 1,2,3,4,5,6,7,8,9,10, + 11,12,13,14,15,16,17,18,19,20, + 21,22,23,24,25,26,27,28,29,30, + 31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50, + 51,52,53,54,55,56,57,58,59,60, + 61,62,62,63 + }; + + + const unsigned short AC_next_state_LPS_64[64] = + { + 0, 0, 1, 2, 2, 4, 4, 5, 6, 7, + 8, 9, 9,11,11,12,13,13,15,15, + 16,16,18,18,19,19,21,21,22,22, + 23,24,24,25,26,26,27,27,28,29, + 29,30,30,30,31,32,32,33,33,33, + 34,34,35,35,35,36,36,36,37,37, + 37,38,38,63 + }; + + /************************************************************************ + * M a c r o s + ************************************************************************ + */ + + #define get_byte(){ \ + Dbuffer = Dcodestrm[(*Dcodestrm_len)++];\ + Dbits_to_go = 7; \ + } + + + /************************************************************************ + ************************************************************************ + init / exit decoder + ************************************************************************ + ************************************************************************/ + + + /*! + ************************************************************************ + * \brief + * Allocates memory for the DecodingEnvironment struct + * \return DecodingContextPtr + * allocates memory + ************************************************************************ + */ + DecodingEnvironmentPtr arideco_create_decoding_environment() + { + DecodingEnvironmentPtr dep; + + if ((dep = calloc(1,sizeof(DecodingEnvironment))) == NULL) + no_mem_exit("arideco_create_decoding_environment: dep"); + return dep; + } + + + /*! + *********************************************************************** + * \brief + * Frees memory of the DecodingEnvironment struct + *********************************************************************** + */ + void arideco_delete_decoding_environment(DecodingEnvironmentPtr dep) + { + if (dep == NULL) + { + snprintf(errortext, ET_SIZE, "Error freeing dep (NULL pointer)"); + error (errortext, 200); + } + else + free(dep); + } + + + /*! + ************************************************************************ + * \brief + * Initializes the DecodingEnvironment for the arithmetic coder + ************************************************************************ + */ + void arideco_start_decoding(DecodingEnvironmentPtr dep, unsigned char *cpixcode, + int firstbyte, int *cpixcode_len, int slice_type ) + { + + int value = 0; + + Dcodestrm = cpixcode; + Dcodestrm_len = cpixcode_len; + *Dcodestrm_len = firstbyte; + + { + int i; + Dbits_to_go = 0; + for (i = 0; i < B_BITS -1 ; i++) // insertion of redundant bit + { + if (--Dbits_to_go < 0) + get_byte(); + value = (value<<1) | ((Dbuffer >> Dbits_to_go) & 0x01); + } + } + dep->Drange = HALF-2; + dep->Dvalue = value; + } + + + /*! + ************************************************************************ + * \brief + * arideco_bits_read + ************************************************************************ + */ + int arideco_bits_read(DecodingEnvironmentPtr dep) + { + return 8 * ((*Dcodestrm_len)-1) + (8 - Dbits_to_go) - 16; + } + + + /*! + ************************************************************************ + * \brief + * arideco_done_decoding(): + ************************************************************************ + */ + void arideco_done_decoding(DecodingEnvironmentPtr dep) + { + (*Dcodestrm_len)++; + } + + + + /*! + ************************************************************************ + * \brief + * biari_decode_symbol(): + * \return + * the decoded symbol + ************************************************************************ + */ + unsigned int biari_decode_symbol(DecodingEnvironmentPtr dep, BiContextTypePtr bi_ct ) + { + register unsigned int bit = bi_ct->MPS; + register unsigned int value = dep->Dvalue; + register unsigned int range = dep->Drange; + register unsigned int rLPS = (unsigned int) rLPS_table_64x4[bi_ct->state][(range>>6) & 0x03]; + + #if (2==TRACE) + fprintf(p_trace, "%d 0x%04x %d %d\n", binCount++, dep->Drange, bi_ct->state, bi_ct->MPS ); + #endif + + range -= rLPS; + + if (value < range) /* MPS */ + bi_ct->state = AC_next_state_MPS_64[bi_ct->state]; // next state + else /* LPS */ + { + value -= range; + range = rLPS; + bit = !bit; + if (!bi_ct->state) // switch meaning of MPS if necessary + bi_ct->MPS ^= 0x01; + bi_ct->state = AC_next_state_LPS_64[bi_ct->state]; // next state + } + + while (range < QUARTER) + { + /* Double range */ + range <<= 1; + if (--Dbits_to_go < 0) + get_byte(); + /* Shift in next bit and add to value */ + value = (value << 1) | ((Dbuffer >> Dbits_to_go) & 0x01); + + } + + dep->Drange = range; + dep->Dvalue = value; + + return(bit); + } + + + /*! + ************************************************************************ + * \brief + * biari_decode_symbol_eq_prob(): + * \return + * the decoded symbol + ************************************************************************ + */ + unsigned int biari_decode_symbol_eq_prob(DecodingEnvironmentPtr dep) + { + register unsigned int bit = 0; + register unsigned int value = (dep->Dvalue<<1); + + #if TRACE + // fprintf(p_trace, "%d 0x%04x\n", binCount++, dep->Drange ); + #endif + + if (--Dbits_to_go < 0) + get_byte(); + /* Shift in next bit and add to value */ + value |= (Dbuffer >> Dbits_to_go) & 0x01; + if (value >= dep->Drange) + { + bit = 1; + value -= dep->Drange; + } + + dep->Dvalue = value; + + return(bit); + } + + /*! + ************************************************************************ + * \brief + * biari_decode_symbol_final(): + * \return + * the decoded symbol + ************************************************************************ + */ + unsigned int biari_decode_final(DecodingEnvironmentPtr dep) + { + register unsigned int value = dep->Dvalue; + register unsigned int range = dep->Drange - 2; + + #if (2==TRACE) + fprintf(p_trace, "%d 0x%04x\n", binCount++, dep->Drange ); + #endif + + if (value >= range) + { + return 1; + } + else + { + while (range < QUARTER) + { + /* Double range */ + range <<= 1; + if (--Dbits_to_go < 0) + get_byte(); + /* Shift in next bit and add to value */ + value = (value << 1) | ((Dbuffer >> Dbits_to_go) & 0x01); + } + dep->Dvalue = value; + dep->Drange = range; + return 0; + } + } + + + + /*! + ************************************************************************ + * \brief + * Initializes a given context with some pre-defined probability state + ************************************************************************ + */ + void biari_init_context (struct img_par* img, BiContextTypePtr ctx, const int* ini) + { + int pstate; + + pstate = ((ini[0]* max(0,img->qp) )>>4) + ini[1]; + pstate = min (max ( 1, pstate), 126); + + if ( pstate >= 64 ) + { + ctx->state = pstate - 64; + ctx->MPS = 1; + } + else + { + ctx->state = 63 - pstate; + ctx->MPS = 0; + } + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/biaridecod.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/biaridecod.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/biaridecod.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,39 ---- + + /*! + *************************************************************************** + * \file + * biaridecod.h + * + * \brief + * Headerfile for binary arithmetic decoder routines + * + * \author + * Detlev Marpe, + * Gabi Bl?ttermann + * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved. + * + * \date + * 21. Oct 2000 + ************************************************************************** + */ + + #ifndef _BIARIDECOD_H_ + #define _BIARIDECOD_H_ + + + /************************************************************************ + * D e f i n i t i o n s + *********************************************************************** + */ + + void arideco_start_decoding(DecodingEnvironmentPtr eep, unsigned char *code_buffer, int firstbyte, int *code_len, int slice_type); + int arideco_bits_read(DecodingEnvironmentPtr dep); + void arideco_done_decoding(DecodingEnvironmentPtr dep); + void biari_init_context (struct img_par *img, BiContextTypePtr ctx, const int* ini); + void rescale_cum_freq(BiContextTypePtr bi_ct); + unsigned int biari_decode_symbol(DecodingEnvironmentPtr dep, BiContextTypePtr bi_ct ); + unsigned int biari_decode_symbol_eq_prob(DecodingEnvironmentPtr dep); + unsigned int biari_decode_final(DecodingEnvironmentPtr dep); + + #endif // BIARIDECOD_H_ + Index: llvm-test/MultiSource/Applications/JM/ldecod/block.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/block.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/block.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1408 ---- + + /*! + *********************************************************************** + * \file + * block.c + * + * \brief + * Block functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Langoy + * - Rickard Sjoberg + *********************************************************************** + */ + + #include "contributors.h" + + #include + #include + + #include "global.h" + #include "block.h" + #include "image.h" + #include "mb_access.h" + + + #define Q_BITS 15 + + static const int quant_coef[6][4][4] = { + {{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243},{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243}}, + {{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660},{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660}}, + {{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194},{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194}}, + {{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647},{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647}}, + {{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355},{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355}}, + {{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893},{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893}} + }; + static const int A[4][4] = { + { 16, 20, 16, 20}, + { 20, 25, 20, 25}, + { 16, 20, 16, 20}, + { 20, 25, 20, 25} + }; + + int quant_intra_default[16] = { + 6,13,20,28, + 13,20,28,32, + 20,28,32,37, + 28,32,37,42 + }; + + int quant_inter_default[16] = { + 10,14,20,24, + 14,20,24,27, + 20,24,27,30, + 24,27,30,34 + }; + + int quant8_intra_default[64] = { + 6,10,13,16,18,23,25,27, + 10,11,16,18,23,25,27,29, + 13,16,18,23,25,27,29,31, + 16,18,23,25,27,29,31,33, + 18,23,25,27,29,31,33,36, + 23,25,27,29,31,33,36,38, + 25,27,29,31,33,36,38,40, + 27,29,31,33,36,38,40,42 + }; + + int quant8_inter_default[64] = { + 9,13,15,17,19,21,22,24, + 13,13,17,19,21,22,24,25, + 15,17,19,21,22,24,25,27, + 17,19,21,22,24,25,27,28, + 19,21,22,24,25,27,28,30, + 21,22,24,25,27,28,30,32, + 22,24,25,27,28,30,32,33, + 24,25,27,28,30,32,33,35 + }; + + int quant_org[16] = { //to be use if no q matrix is chosen + 16,16,16,16, + 16,16,16,16, + 16,16,16,16, + 16,16,16,16 + }; + + int quant8_org[64] = { //to be use if no q matrix is chosen + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16 + }; + + // Notation for comments regarding prediction and predictors. + // The pels of the 4x4 block are labelled a..p. The predictor pels above + // are labelled A..H, from the left I..L, and from above left X, as follows: + // + // X A B C D E F G H + // I a b c d + // J e f g h + // K i j k l + // L m n o p + // + + // Predictor array index definitions + #define P_X (PredPel[0]) + #define P_A (PredPel[1]) + #define P_B (PredPel[2]) + #define P_C (PredPel[3]) + #define P_D (PredPel[4]) + #define P_E (PredPel[5]) + #define P_F (PredPel[6]) + #define P_G (PredPel[7]) + #define P_H (PredPel[8]) + #define P_I (PredPel[9]) + #define P_J (PredPel[10]) + #define P_K (PredPel[11]) + #define P_L (PredPel[12]) + + /*! + *********************************************************************** + * \brief + * makes and returns 4x4 blocks with all 5 intra prediction modes + * + * \return + * DECODING_OK decoding of intraprediction mode was sucessfull \n + * SEARCH_SYNC search next sync element as errors while decoding occured + *********************************************************************** + */ + + int intrapred( + struct img_par *img, //!< image parameters + int ioff, //!< pixel offset X within MB + int joff, //!< pixel offset Y within MB + int img_block_x, //!< location of block X, multiples of 4 + int img_block_y) //!< location of block Y, multiples of 4 + { + int i,j; + int s0; + int img_y,img_x; + int PredPel[13]; // array of predictor pels + + imgpel **imgY = dec_picture->imgY; + + PixelPos pix_a[4]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + int mb_nr=img->current_mb_nr; + + byte predmode = img->ipredmode[img_block_x][img_block_y]; + + img_x=img_block_x*4; + img_y=img_block_y*4; + + for (i=0;i<4;i++) + { + getNeighbour(mb_nr, ioff -1 , joff +i , 1, &pix_a[i]); + } + + getNeighbour(mb_nr, ioff , joff -1 , 1, &pix_b); + getNeighbour(mb_nr, ioff +4 , joff -1 , 1, &pix_c); + getNeighbour(mb_nr, ioff -1 , joff -1 , 1, &pix_d); + + pix_c.available = pix_c.available && !(((ioff==4)||(ioff==12)) && ((joff==4)||(joff==12))); + + if (active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<4;i++) + block_available_left &= pix_a[i].available ? img->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? img->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? img->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? img->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + // form predictor pels + if (block_available_up) + { + P_A = imgY[pix_b.pos_y][pix_b.pos_x+0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x+1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x+2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x+3]; + + } + else + { + P_A = P_B = P_C = P_D = img->dc_pred_value; + } + + if (block_available_up_right) + { + P_E = imgY[pix_c.pos_y][pix_c.pos_x+0]; + P_F = imgY[pix_c.pos_y][pix_c.pos_x+1]; + P_G = imgY[pix_c.pos_y][pix_c.pos_x+2]; + P_H = imgY[pix_c.pos_y][pix_c.pos_x+3]; + } + else + { + P_E = P_F = P_G = P_H = P_D; + } + + if (block_available_left) + { + P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + } + else + { + P_I = P_J = P_K = P_L = img->dc_pred_value; + } + + if (block_available_up_left) + { + P_X = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + P_X = img->dc_pred_value; + } + + + switch (predmode) + { + case DC_PRED: /* DC prediction */ + + s0 = 0; + if (block_available_up && block_available_left) + { + // no edge + s0 = (P_A + P_B + P_C + P_D + P_I + P_J + P_K + P_L + 4)/(2*BLOCK_SIZE); + } + else if (!block_available_up && block_available_left) + { + // upper edge + s0 = (P_I + P_J + P_K + P_L + 2)/BLOCK_SIZE; + } + else if (block_available_up && !block_available_left) + { + // left edge + s0 = (P_A + P_B + P_C + P_D + 2)/BLOCK_SIZE; + } + else //if (!block_available_up && !block_available_left) + { + // top left corner, nothing to predict from + s0 = img->dc_pred_value; + } + + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < BLOCK_SIZE; i++) + { + // store DC prediction + img->mpr[i+ioff][j+joff] = s0; + } + } + break; + + case VERT_PRED: /* vertical prediction from block above */ + if (!block_available_up) + printf ("warning: Intra_4x4_Vertical prediction mode not allowed at mb %d\n",img->current_mb_nr); + + for(j=0;jmpr[i+ioff][j+joff]=imgY[pix_b.pos_y][pix_b.pos_x+i];/* store predicted 4x4 block */ + break; + + case HOR_PRED: /* horizontal prediction from left block */ + if (!block_available_left) + printf ("warning: Intra_4x4_Horizontal prediction mode not allowed at mb %d\n",img->current_mb_nr); + + for(j=0;jmpr[i+ioff][j+joff]=imgY[pix_a[j].pos_y][pix_a[j].pos_x]; /* store predicted 4x4 block */ + break; + + case DIAG_DOWN_RIGHT_PRED: + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_4x4_Diagonal_Down_Right prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][3+joff] = (P_L + 2*P_K + P_J + 2) / 4; + img->mpr[0+ioff][2+joff] = + img->mpr[1+ioff][3+joff] = (P_K + 2*P_J + P_I + 2) / 4; + img->mpr[0+ioff][1+joff] = + img->mpr[1+ioff][2+joff] = + img->mpr[2+ioff][3+joff] = (P_J + 2*P_I + P_X + 2) / 4; + img->mpr[0+ioff][0+joff] = + img->mpr[1+ioff][1+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[3+ioff][3+joff] = (P_I + 2*P_X + P_A + 2) / 4; + img->mpr[1+ioff][0+joff] = + img->mpr[2+ioff][1+joff] = + img->mpr[3+ioff][2+joff] = (P_X + 2*P_A + P_B + 2) / 4; + img->mpr[2+ioff][0+joff] = + img->mpr[3+ioff][1+joff] = (P_A + 2*P_B + P_C + 2) / 4; + img->mpr[3+ioff][0+joff] = (P_B + 2*P_C + P_D + 2) / 4; + break; + + case DIAG_DOWN_LEFT_PRED: + if (!block_available_up) + printf ("warning: Intra_4x4_Diagonal_Down_Left prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = (P_A + P_C + 2*(P_B) + 2) / 4; + img->mpr[1+ioff][0+joff] = + img->mpr[0+ioff][1+joff] = (P_B + P_D + 2*(P_C) + 2) / 4; + img->mpr[2+ioff][0+joff] = + img->mpr[1+ioff][1+joff] = + img->mpr[0+ioff][2+joff] = (P_C + P_E + 2*(P_D) + 2) / 4; + img->mpr[3+ioff][0+joff] = + img->mpr[2+ioff][1+joff] = + img->mpr[1+ioff][2+joff] = + img->mpr[0+ioff][3+joff] = (P_D + P_F + 2*(P_E) + 2) / 4; + img->mpr[3+ioff][1+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[1+ioff][3+joff] = (P_E + P_G + 2*(P_F) + 2) / 4; + img->mpr[3+ioff][2+joff] = + img->mpr[2+ioff][3+joff] = (P_F + P_H + 2*(P_G) + 2) / 4; + img->mpr[3+ioff][3+joff] = (P_G + 3*(P_H) + 2) / 4; + break; + + case VERT_RIGHT_PRED:/* diagonal prediction -22.5 deg to horizontal plane */ + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_4x4_Vertical_Right prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = + img->mpr[1+ioff][2+joff] = (P_X + P_A + 1) / 2; + img->mpr[1+ioff][0+joff] = + img->mpr[2+ioff][2+joff] = (P_A + P_B + 1) / 2; + img->mpr[2+ioff][0+joff] = + img->mpr[3+ioff][2+joff] = (P_B + P_C + 1) / 2; + img->mpr[3+ioff][0+joff] = (P_C + P_D + 1) / 2; + img->mpr[0+ioff][1+joff] = + img->mpr[1+ioff][3+joff] = (P_I + 2*P_X + P_A + 2) / 4; + img->mpr[1+ioff][1+joff] = + img->mpr[2+ioff][3+joff] = (P_X + 2*P_A + P_B + 2) / 4; + img->mpr[2+ioff][1+joff] = + img->mpr[3+ioff][3+joff] = (P_A + 2*P_B + P_C + 2) / 4; + img->mpr[3+ioff][1+joff] = (P_B + 2*P_C + P_D + 2) / 4; + img->mpr[0+ioff][2+joff] = (P_X + 2*P_I + P_J + 2) / 4; + img->mpr[0+ioff][3+joff] = (P_I + 2*P_J + P_K + 2) / 4; + break; + + case VERT_LEFT_PRED:/* diagonal prediction -22.5 deg to horizontal plane */ + if (!block_available_up) + printf ("warning: Intra_4x4_Vertical_Left prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = (P_A + P_B + 1) / 2; + img->mpr[1+ioff][0+joff] = + img->mpr[0+ioff][2+joff] = (P_B + P_C + 1) / 2; + img->mpr[2+ioff][0+joff] = + img->mpr[1+ioff][2+joff] = (P_C + P_D + 1) / 2; + img->mpr[3+ioff][0+joff] = + img->mpr[2+ioff][2+joff] = (P_D + P_E + 1) / 2; + img->mpr[3+ioff][2+joff] = (P_E + P_F + 1) / 2; + img->mpr[0+ioff][1+joff] = (P_A + 2*P_B + P_C + 2) / 4; + img->mpr[1+ioff][1+joff] = + img->mpr[0+ioff][3+joff] = (P_B + 2*P_C + P_D + 2) / 4; + img->mpr[2+ioff][1+joff] = + img->mpr[1+ioff][3+joff] = (P_C + 2*P_D + P_E + 2) / 4; + img->mpr[3+ioff][1+joff] = + img->mpr[2+ioff][3+joff] = (P_D + 2*P_E + P_F + 2) / 4; + img->mpr[3+ioff][3+joff] = (P_E + 2*P_F + P_G + 2) / 4; + break; + + case HOR_UP_PRED:/* diagonal prediction -22.5 deg to horizontal plane */ + if (!block_available_left) + printf ("warning: Intra_4x4_Horizontal_Up prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = (P_I + P_J + 1) / 2; + img->mpr[1+ioff][0+joff] = (P_I + 2*P_J + P_K + 2) / 4; + img->mpr[2+ioff][0+joff] = + img->mpr[0+ioff][1+joff] = (P_J + P_K + 1) / 2; + img->mpr[3+ioff][0+joff] = + img->mpr[1+ioff][1+joff] = (P_J + 2*P_K + P_L + 2) / 4; + img->mpr[2+ioff][1+joff] = + img->mpr[0+ioff][2+joff] = (P_K + P_L + 1) / 2; + img->mpr[3+ioff][1+joff] = + img->mpr[1+ioff][2+joff] = (P_K + 2*P_L + P_L + 2) / 4; + img->mpr[3+ioff][2+joff] = + img->mpr[1+ioff][3+joff] = + img->mpr[0+ioff][3+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[2+ioff][3+joff] = + img->mpr[3+ioff][3+joff] = P_L; + break; + + case HOR_DOWN_PRED:/* diagonal prediction -22.5 deg to horizontal plane */ + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_4x4_Horizontal_Down prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = + img->mpr[2+ioff][1+joff] = (P_X + P_I + 1) / 2; + img->mpr[1+ioff][0+joff] = + img->mpr[3+ioff][1+joff] = (P_I + 2*P_X + P_A + 2) / 4; + img->mpr[2+ioff][0+joff] = (P_X + 2*P_A + P_B + 2) / 4; + img->mpr[3+ioff][0+joff] = (P_A + 2*P_B + P_C + 2) / 4; + img->mpr[0+ioff][1+joff] = + img->mpr[2+ioff][2+joff] = (P_I + P_J + 1) / 2; + img->mpr[1+ioff][1+joff] = + img->mpr[3+ioff][2+joff] = (P_X + 2*P_I + P_J + 2) / 4; + img->mpr[0+ioff][2+joff] = + img->mpr[2+ioff][3+joff] = (P_J + P_K + 1) / 2; + img->mpr[1+ioff][2+joff] = + img->mpr[3+ioff][3+joff] = (P_I + 2*P_J + P_K + 2) / 4; + img->mpr[0+ioff][3+joff] = (P_K + P_L + 1) / 2; + img->mpr[1+ioff][3+joff] = (P_J + 2*P_K + P_L + 2) / 4; + break; + + default: + printf("Error: illegal intra_4x4 prediction mode: %d\n",predmode); + return SEARCH_SYNC; + break; + } + + return DECODING_OK; + } + + + /*! + *********************************************************************** + * \return + * best SAD + *********************************************************************** + */ + int intrapred_luma_16x16(struct img_par *img, //!< image parameters + int predmode) //!< prediction mode + { + int s0=0,s1,s2; + + int i,j; + + int ih,iv; + int ib,ic,iaa; + + imgpel **imgY=dec_picture->imgY; + + int mb_nr=img->current_mb_nr; + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int up_avail, left_avail, left_up_avail; + + s1=s2=0; + + for (i=0;i<17;i++) + { + getNeighbour(mb_nr, -1 , i-1 , 1, &left[i]); + } + + getNeighbour(mb_nr, 0 , -1 , 1, &up); + + if (!active_pps->constrained_intra_pred_flag) + { + up_avail = up.available; + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? img->intra_block[up.mb_addr] : 0; + for (i=1, left_avail=1; i<17;i++) + left_avail &= left[i].available ? img->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? img->intra_block[left[0].mb_addr]: 0; + } + + switch (predmode) + { + case VERT_PRED_16: // vertical prediction from block above + if (!up_avail) + error ("invalid 16x16 intra pred Mode VERT_PRED_16",500); + for(j=0;jmpr[i][j]=imgY[up.pos_y][up.pos_x+i];// store predicted 16x16 block + break; + + case HOR_PRED_16: // horisontal prediction from left block + if (!left_avail) + error ("invalid 16x16 intra pred Mode VERT_PRED_16",500); + for(j=0;jmpr[i][j]=imgY[left[j+1].pos_y][left[j+1].pos_x]; // store predicted 16x16 block + break; + + case DC_PRED_16: // DC prediction + s1=s2=0; + for (i=0; i < MB_BLOCK_SIZE; i++) + { + if (up_avail) + s1 += imgY[up.pos_y][up.pos_x+i]; // sum hor pix + if (left_avail) + s2 += imgY[left[i+1].pos_y][left[i+1].pos_x]; // sum vert pix + } + if (up_avail && left_avail) + s0=(s1+s2+16)>>5; // no edge + if (!up_avail && left_avail) + s0=(s2+8)>>4; // upper edge + if (up_avail && !left_avail) + s0=(s1+8)>>4; // left edge + if (!up_avail && !left_avail) + s0=img->dc_pred_value; // top left corner, nothing to predict from + for(i=0;impr[i][j]=s0; + } + break; + case PLANE_16:// 16 bit integer plan pred + if (!up_avail || !left_up_avail || !left_avail) + error ("invalid 16x16 intra pred Mode PLANE_16",500); + + ih=0; + iv=0; + for (i=1;i<9;i++) + { + if (i<8) + ih += i*(imgY[up.pos_y][up.pos_x+7+i] - imgY[up.pos_y][up.pos_x+7-i]); + else + ih += i*(imgY[up.pos_y][up.pos_x+7+i] - imgY[left[0].pos_y][left[0].pos_x]); + + iv += i*(imgY[left[8+i].pos_y][left[8+i].pos_x] - imgY[left[8-i].pos_y][left[8-i].pos_x]); + } + ib=(5*ih+32)>>6; + ic=(5*iv+32)>>6; + + iaa=16*(imgY[up.pos_y][up.pos_x+15]+imgY[left[16].pos_y][left[16].pos_x]); + for (j=0;j< MB_BLOCK_SIZE;j++) + { + for (i=0;i< MB_BLOCK_SIZE;i++) + { + img->mpr[i][j]=max(0,min((iaa+(i-7)*ib +(j-7)*ic + 16)>>5, img->max_imgpel_value)); + } + }// store plane prediction + break; + + default: + { // indication of fault in bitstream,exit + printf("illegal 16x16 intra prediction mode input: %d\n",predmode); + return SEARCH_SYNC; + } + } + + return DECODING_OK; + } + + + void intrapred_chroma(struct img_par *img, int uv) + { + int i,j, ii, jj, ioff, joff; + + imgpel ***imgUV = dec_picture->imgUV; + + int js[4][4]; + + int pred; + int ih, iv, ib, ic, iaa; + + int b8, b4; + int yuv = dec_picture->chroma_format_idc - 1; + int blk_x, blk_y; + int block_pos[3][4][4]= //[yuv][b8][b4] + { + { {0, 1, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0},{0, 0, 0, 0}}, + { {0, 1, 2, 3},{2, 3, 2, 3},{0, 0, 0, 0},{0, 0, 0, 0}}, + { {0, 1, 2, 3},{1, 1, 3, 3},{2, 3, 2, 3},{3, 3, 3, 3}} + }; + int s0, s1, s2, s3; + + int mb_nr=img->current_mb_nr; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..16) + + int up_avail, left_avail[2], left_up_avail; + + int cr_MB_x = img->mb_cr_size_x; + int cr_MB_y = img->mb_cr_size_y; + + for (i=0;iconstrained_intra_pred_flag) + { + up_avail = up.available; + left_avail[0] = left_avail[1] = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? img->intra_block[up.mb_addr] : 0; + for (i=0, left_avail[0]=1; iintra_block[left[i+1].mb_addr]: 0; + for (i=cr_MB_y/2, left_avail[1]=1; iintra_block[left[i+1].mb_addr]: 0; + left_up_avail = left[0].available ? img->intra_block[left[0].mb_addr]: 0; + } + + + if (currMB->c_ipred_mode == DC_PRED_8) + { + // DC prediction + for(b8=0; b8num_blk8x8_uv/2;b8++) + { + for (b4=0; b4<4; b4++) + { + blk_y = subblk_offset_y[yuv][b8][b4] + 1; + blk_x = subblk_offset_x[yuv][b8][b4]; + + s0=s1=s2=s3=0; + js[b8][b4]=img->dc_pred_value; + + //===== get prediction value ===== + switch (block_pos[yuv][b8][b4]) + { + case 0: //===== TOP LEFT ===== + if (up_avail) for (i=blk_x;i<(blk_x+4);i++) s0 += imgUV[uv][up.pos_y][up.pos_x + i]; + if (left_avail[0]) for (i=blk_y;i<(blk_y+4);i++) s2 += imgUV[uv][left[i].pos_y][left[i].pos_x]; + if (up_avail && left_avail[0]) js[b8][b4] = (s0+s2+4) >> 3; + else if (up_avail) js[b8][b4] = (s0 +2) >> 2; + else if (left_avail[0]) js[b8][b4] = (s2 +2) >> 2; + break; + case 1: //===== TOP RIGHT ===== + if (up_avail) for (i=blk_x;i<(blk_x+4);i++) s1 += imgUV[uv][up.pos_y][up.pos_x + i]; + else if (left_avail[0]) for (i=blk_y;i<(blk_y+4);i++) s2 += imgUV[uv][left[i].pos_y][left[i].pos_x]; + if (up_avail) js[b8][b4] = (s1 +2) >> 2; + else if (left_avail[0]) js[b8][b4] = (s2 +2) >> 2; + break; + case 2: //===== BOTTOM LEFT ===== + if (left_avail[1]) for (i=blk_y;i<(blk_y+4);i++) s3 += imgUV[uv][left[i].pos_y][left[i].pos_x]; + else if (up_avail) for (i=blk_x;i<(blk_x+4);i++) s0 += imgUV[uv][up.pos_y][up.pos_x + i]; + if (left_avail[1]) js[b8][b4] = (s3 +2) >> 2; + else if (up_avail) js[b8][b4] = (s0 +2) >> 2; + break; + case 3: //===== BOTTOM RIGHT ===== + if (up_avail) for (i=blk_x;i<(blk_x+4);i++) s1 += imgUV[uv][up.pos_y][up.pos_x + i]; + if (left_avail[1]) for (i=blk_y;i<(blk_y+4);i++) s3 += imgUV[uv][left[i].pos_y][left[i].pos_x]; + if (up_avail && left_avail[1]) js[b8][b4] = (s1+s3+4) >> 3; + else if (up_avail) js[b8][b4] = (s1 +2) >> 2; + else if (left_avail[1]) js[b8][b4] = (s3 +2) >> 2; + break; + } + } + } + } + if (PLANE_8 == currMB->c_ipred_mode) + { + // plane prediction + if (!left_up_avail || !left_avail[0] || !left_avail[1] || !up_avail) + error("unexpected PLANE_8 chroma intra prediction mode",-1); + + ih = cr_MB_x/2*(imgUV[uv][up.pos_y][up.pos_x+cr_MB_x-1] - imgUV[uv][left[0].pos_y][left[0].pos_x]); + for (i=0;i>(cr_MB_x == 8?5:6); + ic= ((cr_MB_y == 8?17:5)*iv+2*cr_MB_y)>>(cr_MB_y == 8?5:6); + + iaa=16*(imgUV[uv][left[cr_MB_y].pos_y][left[cr_MB_y].pos_x] + + imgUV[uv][up.pos_y][up.pos_x+cr_MB_x-1]); + + for (j=0; jmpr[i][j]=max(0,min(img->max_imgpel_value_uv,(iaa+(i-cr_MB_x/2+1)*ib+(j-cr_MB_y/2+1)*ic+16)>>5)); + } + else + { + switch (currMB->c_ipred_mode) + { + case DC_PRED_8: + for (b8=0;b8num_blk8x8_uv/2;b8++) + { + for (b4=0;b4<4;b4++) + { + joff = subblk_offset_y[yuv][b8][b4]; + ioff = subblk_offset_x[yuv][b8][b4]; + for (ii=0; iimpr[ii+ioff][jj+joff]=js[b8][b4]; + } + } + } + break; + case HOR_PRED_8: + if (!left_avail[0] || !left_avail[1]) + error("unexpected HOR_PRED_8 chroma intra prediction mode",-1); + + for (j=0;j<2;j++) + { + joff=j*cr_MB_y/2; + for(i=0;i<2;i++) + { + ioff=i*cr_MB_x/2; + for (jj=0; jjmpr[ii+ioff][jj+joff]=pred; + } + } + } + break; + case VERT_PRED_8: + if (!up_avail) + error("unexpected VERT_PRED_8 chroma intra prediction mode",-1); + + for (j=0;j<2;j++) + { + joff=j*cr_MB_y/2; + for(i=0;i<2;i++) + { + ioff=i*cr_MB_x/2; + for (ii=0; iimpr[ii+ioff][jj+joff]=pred; + } + } + } + break; + default: + error("illegal chroma intra prediction mode", 600); + break; + } + } + } + + /*! + *********************************************************************** + * \brief + * Inverse 4x4 transformation, transforms cof to m7 + *********************************************************************** + */ + void itrans(struct img_par *img, //!< image parameters + int ioff, //!< index to 4x4 block + int joff, //!< + int i0, //!< + int j0, + int chroma) + { + int i,j,i1,j1; + int m5[4]; + int m6[4]; + + Boolean lossless_qpprime = ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + + // Residue Color Transform + int residue_transform_flag = img->residue_transform_flag; + + // horizontal + for (j=0;jcof[i0][j0][i][j]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (i=0;i<2;i++) + { + i1=3-i; + img->m7[i][j]=m6[i]+m6[i1]; + img->m7[i1][j]=m6[i]-m6[i1]; + } + } + // vertical + for (i=0;im7[i][j]; + + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (j=0;j<2;j++) + { + j1=3-j; + // Residue Color Transform + if(!residue_transform_flag) + { + if(!chroma) + { + img->m7[i][j] =max(0,min(img->max_imgpel_value,(m6[j]+m6[j1]+((long)img->mpr[i+ioff][j+joff] <>DQ_BITS)); + img->m7[i][j1]=max(0,min(img->max_imgpel_value,(m6[j]-m6[j1]+((long)img->mpr[i+ioff][j1+joff]<>DQ_BITS)); + } else + { + img->m7[i][j] =max(0,min(img->max_imgpel_value_uv,(m6[j]+m6[j1]+((long)img->mpr[i+ioff][j+joff] <>DQ_BITS)); + img->m7[i][j1]=max(0,min(img->max_imgpel_value_uv,(m6[j]-m6[j1]+((long)img->mpr[i+ioff][j1+joff]<>DQ_BITS)); + } + } + else{ + img->m7[i][j] =(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS; + img->m7[i][j1]=(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS; + } + } + } + + // Residue Color Transform + if(!residue_transform_flag) + { + for (i=0;im7[i][j] = max(0,min(img->max_imgpel_value,img->cof[i0][j0][i][j]+(long)img->mpr[i+ioff][j+joff])); + else + img->m7[i][j] = max(0,min(img->max_imgpel_value_uv,img->cof[i0][j0][i][j]+(long)img->mpr[i+ioff][j+joff])); + } + else + { + for (i=0;im7[i][j] = img->cof[i0][j0][i][j]; + } + } + + /*! + ************************************************************************ + * \brief + * For mapping the q-matrix to the active id and calculate quantisation values + * + * \param pps + * Picture parameter set + * \param sps + * Sequence parameter set + * + ************************************************************************ + */ + void AssignQuantParam(pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps) + { + int i; + + if(!pps->pic_scaling_matrix_present_flag && !sps->seq_scaling_matrix_present_flag) + { + for(i=0; i<8; i++) + qmatrix[i] = (i<6) ? quant_org:quant8_org; + } + else + { + if(sps->seq_scaling_matrix_present_flag) // check sps first + { + for(i=0; i<8; i++) + { + if(i<6) + { + if(!sps->seq_scaling_list_present_flag[i]) // fall-back rule A + { + if((i==0) || (i==3)) + qmatrix[i] = (i==0) ? quant_intra_default:quant_inter_default; + else + qmatrix[i] = qmatrix[i-1]; + } + else + { + if(sps->UseDefaultScalingMatrix4x4Flag[i]) + qmatrix[i] = (i<3) ? quant_intra_default:quant_inter_default; + else + qmatrix[i] = sps->ScalingList4x4[i]; + } + } + else + { + if(!sps->seq_scaling_list_present_flag[i] || sps->UseDefaultScalingMatrix8x8Flag[i-6]) // fall-back rule A + qmatrix[i] = (i==6) ? quant8_intra_default:quant8_inter_default; + else + qmatrix[i] = sps->ScalingList8x8[i-6]; + } + } + } + + if(pps->pic_scaling_matrix_present_flag) // then check pps + { + for(i=0; i<8; i++) + { + if(i<6) + { + if(!pps->pic_scaling_list_present_flag[i]) // fall-back rule B + { + if((i==0) || (i==3)) + { + if(!sps->seq_scaling_matrix_present_flag) + qmatrix[i] = (i==0) ? quant_intra_default:quant_inter_default; + } + else + qmatrix[i] = qmatrix[i-1]; + } + else + { + if(pps->UseDefaultScalingMatrix4x4Flag[i]) + qmatrix[i] = (i<3) ? quant_intra_default:quant_inter_default; + else + qmatrix[i] = pps->ScalingList4x4[i]; + } + } + else + { + if(!pps->pic_scaling_list_present_flag[i]) // fall-back rule B + { + if(!sps->seq_scaling_matrix_present_flag) + qmatrix[i] = (i==6) ? quant8_intra_default:quant8_inter_default; + } + else if(pps->UseDefaultScalingMatrix8x8Flag[i-6]) + qmatrix[i] = (i==6) ? quant8_intra_default:quant8_inter_default; + else + qmatrix[i] = pps->ScalingList8x8[i-6]; + } + } + } + } + + CalculateQuantParam(); + if(pps->transform_8x8_mode_flag) + CalculateQuant8Param(); + } + + /*! + ************************************************************************ + * \brief + * For calculating the quantisation values at frame level + * + ************************************************************************ + */ + void CalculateQuantParam() + { + int i, j, k, temp; + + for(k=0; k<6; k++) + for(j=0; j<4; j++) + for(i=0; i<4; i++) + { + temp = (i<<2)+j; + InvLevelScale4x4Luma_Intra[k][j][i] = dequant_coef[k][j][i]*qmatrix[0][temp]; + InvLevelScale4x4Chroma_Intra[0][k][j][i] = dequant_coef[k][j][i]*qmatrix[1][temp]; + InvLevelScale4x4Chroma_Intra[1][k][j][i] = dequant_coef[k][j][i]*qmatrix[2][temp]; + + InvLevelScale4x4Luma_Inter[k][j][i] = dequant_coef[k][j][i]*qmatrix[3][temp]; + InvLevelScale4x4Chroma_Inter[0][k][j][i] = dequant_coef[k][j][i]*qmatrix[4][temp]; + InvLevelScale4x4Chroma_Inter[1][k][j][i] = dequant_coef[k][j][i]*qmatrix[5][temp]; + } + } + + /*! + *********************************************************************** + * \brief + * Luma DC inverse transform + *********************************************************************** + */ + void itrans_2( + struct img_par *img) //!< image parameters + { + int i,j,i1,j1; + int M5[4]; + int M6[4]; + + int qp_per = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6; + int qp_rem = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6; + + int qp_const = 1<<(5-qp_per); + + // horizontal + for (j=0;j<4;j++) + { + for (i=0;i<4;i++) + M5[i]=img->cof[i][j][0][0]; + + M6[0]=M5[0]+M5[2]; + M6[1]=M5[0]-M5[2]; + M6[2]=M5[1]-M5[3]; + M6[3]=M5[1]+M5[3]; + + for (i=0;i<2;i++) + { + i1=3-i; + img->cof[i ][j][0][0]= M6[i]+M6[i1]; + img->cof[i1][j][0][0]=M6[i]-M6[i1]; + } + } + + // vertical + for (i=0;i<4;i++) + { + for (j=0;j<4;j++) + M5[j]=img->cof[i][j][0][0]; + + M6[0]=M5[0]+M5[2]; + M6[1]=M5[0]-M5[2]; + M6[2]=M5[1]-M5[3]; + M6[3]=M5[1]+M5[3]; + + for (j=0;j<2;j++) + { + j1=3-j; + + if(qp_per<6) + { + img->cof[i][j][0][0] =((M6[j]+M6[j1])*InvLevelScale4x4Luma_Intra[qp_rem][0][0]+qp_const)>>(6-qp_per); + img->cof[i][j1][0][0]=((M6[j]-M6[j1])*InvLevelScale4x4Luma_Intra[qp_rem][0][0]+qp_const)>>(6-qp_per); + } + else + { + img->cof[i][j][0][0] =((M6[j]+M6[j1])*InvLevelScale4x4Luma_Intra[qp_rem][0][0])<<(qp_per-6); + img->cof[i][j1][0][0]=((M6[j]-M6[j1])*InvLevelScale4x4Luma_Intra[qp_rem][0][0])<<(qp_per-6); + } + } + } + } + + + void itrans_sp(struct img_par *img, //!< image parameters + int ioff, //!< index to 4x4 block + int joff, //!< + int i0, //!< + int j0) //!< + { + int i,j,i1,j1; + int m5[4]; + int m6[4]; + int predicted_block[BLOCK_SIZE][BLOCK_SIZE],ilev; + + int qp_per = (img->qp-MIN_QP)/6; + int qp_rem = (img->qp-MIN_QP)%6; + int q_bits = Q_BITS+qp_per; + + int qp_per_sp = (img->qpsp-MIN_QP)/6; + int qp_rem_sp = (img->qpsp-MIN_QP)%6; + int q_bits_sp = Q_BITS+qp_per_sp; + int qp_const2=(1<type == SI_SLICE) //ES modified + { + qp_per = (img->qpsp-MIN_QP)/6; + qp_rem = (img->qpsp-MIN_QP)%6; + q_bits = Q_BITS+qp_per; + } + + for (j=0; j< BLOCK_SIZE; j++) + for (i=0; i< BLOCK_SIZE; i++) + predicted_block[i][j]=img->mpr[i+ioff][j+joff]; + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=predicted_block[i][j]+predicted_block[i1][j]; + m5[i1]=predicted_block[i][j]-predicted_block[i1][j]; + } + predicted_block[0][j]=(m5[0]+m5[1]); + predicted_block[2][j]=(m5[0]-m5[1]); + predicted_block[1][j]=m5[3]*2+m5[2]; + predicted_block[3][j]=m5[3]-m5[2]*2; + } + + // Vertival transform + + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < 2; j++) + { + j1=3-j; + m5[j]=predicted_block[i][j]+predicted_block[i][j1]; + m5[j1]=predicted_block[i][j]-predicted_block[i][j1]; + } + predicted_block[i][0]=(m5[0]+m5[1]); + predicted_block[i][2]=(m5[0]-m5[1]); + predicted_block[i][1]=m5[3]*2+m5[2]; + predicted_block[i][3]=m5[3]-m5[2]*2; + } + + for (j=0;jcof[i0][j0][i][j]=(img->cof[i0][j0][i][j] >> qp_per) / dequant_coef[qp_rem][i][j]; + if(img->sp_switch || img->type==SI_SLICE) //M.W. patched for SI + { + ilev=(abs(predicted_block[i][j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp; //ES added + ilev= sign(ilev,predicted_block[i][j])+ img->cof[i0][j0][i][j]; //ES added + img->cof[i0][j0][i][j] = sign(abs(ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp ,ilev) ; //ES added + } //ES added + else + { //ES added + ilev=((img->cof[i0][j0][i][j]*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6)+predicted_block[i][j] ; + img->cof[i0][j0][i][j]=sign((abs(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp, ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp; + } + } + // horizontal + for (j=0;jcof[i0][j0][i][j]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (i=0;i<2;i++) + { + i1=3-i; + img->m7[i][j]=m6[i]+m6[i1]; + img->m7[i1][j]=m6[i]-m6[i1]; + } + } + // vertical + for (i=0;im7[i][j]; + + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (j=0;j<2;j++) + { + j1=3-j; + img->m7[i][j] =max(0,min(img->max_imgpel_value,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS)); + img->m7[i][j1]=max(0,min(img->max_imgpel_value,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS)); + } + } + } + + /*! + *********************************************************************** + * \brief + * The routine performs transform,quantization,inverse transform, adds the diff. + * to the prediction and writes the result to the decoded luma frame. Includes the + * RD constrained quantization also. + * + * \par Input: + * block_x,block_y: Block position inside a macro block (0,4,8,12). + * + * \par Output: + * nonzero: 0 if no levels are nonzero. 1 if there are nonzero levels. \n + * coeff_cost: Counter for nonzero coefficients, used to discard expencive levels. + ************************************************************************ + */ + void copyblock_sp(struct img_par *img,int block_x,int block_y) + { + int sign(int a,int b); + + int i,j,i1,j1,m5[4],m6[4]; + + int predicted_block[BLOCK_SIZE][BLOCK_SIZE]; + int qp_per = (img->qpsp-MIN_QP)/6; + int qp_rem = (img->qpsp-MIN_QP)%6; + int q_bits = Q_BITS+qp_per; + int qp_const2=(1<mpr[i+block_x][j+block_y]; + + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=predicted_block[i][j]+predicted_block[i1][j]; + m5[i1]=predicted_block[i][j]-predicted_block[i1][j]; + } + predicted_block[0][j]=(m5[0]+m5[1]); + predicted_block[2][j]=(m5[0]-m5[1]); + predicted_block[1][j]=m5[3]*2+m5[2]; + predicted_block[3][j]=m5[3]-m5[2]*2; + } + + // Vertival transform + + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < 2; j++) + { + j1=3-j; + m5[j]=predicted_block[i][j]+predicted_block[i][j1]; + m5[j1]=predicted_block[i][j]-predicted_block[i][j1]; + } + predicted_block[i][0]=(m5[0]+m5[1]); + predicted_block[i][2]=(m5[0]-m5[1]); + predicted_block[i][1]=m5[3]*2+m5[2]; + predicted_block[i][3]=m5[3]-m5[2]*2; + } + + // Quant + for (j=0;j < BLOCK_SIZE; j++) + for (i=0; i < BLOCK_SIZE; i++) + img->m7[i][j]=sign((abs(predicted_block[i][j])* quant_coef[qp_rem][i][j]+qp_const2)>> q_bits,predicted_block[i][j])*dequant_coef[qp_rem][i][j]<m7[i][j]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (i=0;i<2;i++) + { + i1=3-i; + img->m7[i][j]=m6[i]+m6[i1]; + img->m7[i1][j]=m6[i]-m6[i1]; + } + } + // vertical + for (i=0;im7[i][j]; + + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (j=0;j<2;j++) + { + j1=3-j; + img->m7[i][j] =max(0,min(img->max_imgpel_value,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS)); + img->m7[i][j1]=max(0,min(img->max_imgpel_value,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS)); + } + } + + // Decoded block moved to frame memory + + for (j=0; j < BLOCK_SIZE; j++) + for (i=0; i < BLOCK_SIZE; i++) + dec_picture->imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=img->m7[i][j]; + + } + + void itrans_sp_chroma(struct img_par *img,int ll) + { + int i,j,i1,j2,ilev,n2,n1,j1,mb_y; + int m5[BLOCK_SIZE]; + int predicted_chroma_block[MB_BLOCK_SIZE/2][MB_BLOCK_SIZE/2],mp1[BLOCK_SIZE]; + int qp_per,qp_rem,q_bits; + int qp_per_sp,qp_rem_sp,q_bits_sp,qp_const2; + + qp_per = ((img->qp<0?img->qp:QP_SCALE_CR[img->qp])-MIN_QP)/6; + qp_rem = ((img->qp<0?img->qp:QP_SCALE_CR[img->qp])-MIN_QP)%6; + q_bits = Q_BITS+qp_per; + + qp_per_sp = ((img->qpsp<0?img->qpsp:QP_SCALE_CR[img->qpsp])-MIN_QP)/6; + qp_rem_sp = ((img->qpsp<0?img->qpsp:QP_SCALE_CR[img->qpsp])-MIN_QP)%6; + q_bits_sp = Q_BITS+qp_per_sp; + qp_const2=(1<type == SI_SLICE) + { + qp_per = ((img->qpsp < 0 ? img->qpsp : QP_SCALE_CR[img->qpsp]) - MIN_QP) / 6; + qp_rem = ((img->qpsp < 0 ? img->qpsp : QP_SCALE_CR[img->qpsp]) - MIN_QP) % 6; + q_bits = Q_BITS + qp_per; + } + + for (j=0; j < MB_BLOCK_SIZE/2; j++) + for (i=0; i < MB_BLOCK_SIZE/2; i++) + { + predicted_chroma_block[i][j]=img->mpr[i][j]; + img->mpr[i][j]=0; + } + for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE) + { + for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE) + { + // Horizontal transform. + for (j=0; j < BLOCK_SIZE; j++) + { + mb_y=n2+j; + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=predicted_chroma_block[i+n1][mb_y]+predicted_chroma_block[i1+n1][mb_y]; + m5[i1]=predicted_chroma_block[i+n1][mb_y]-predicted_chroma_block[i1+n1][mb_y]; + } + predicted_chroma_block[n1][mb_y] =(m5[0]+m5[1]); + predicted_chroma_block[n1+2][mb_y]=(m5[0]-m5[1]); + predicted_chroma_block[n1+1][mb_y]=m5[3]*2+m5[2]; + predicted_chroma_block[n1+3][mb_y]=m5[3]-m5[2]*2; + } + + // Vertical transform. + + for (i=0; i < BLOCK_SIZE; i++) + { + j1=n1+i; + for (j=0; j < 2; j++) + { + j2=3-j; + m5[j]=predicted_chroma_block[j1][n2+j]+predicted_chroma_block[j1][n2+j2]; + m5[j2]=predicted_chroma_block[j1][n2+j]-predicted_chroma_block[j1][n2+j2]; + } + predicted_chroma_block[j1][n2+0]=(m5[0]+m5[1]); + predicted_chroma_block[j1][n2+2]=(m5[0]-m5[1]); + predicted_chroma_block[j1][n2+1]=m5[3]*2+m5[2]; + predicted_chroma_block[j1][n2+3]=m5[3]-m5[2]*2; + } + } + } + + // 2X2 transform of DC coeffs. + mp1[0]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]+predicted_chroma_block[0][4]+predicted_chroma_block[4][4]); + mp1[1]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]+predicted_chroma_block[0][4]-predicted_chroma_block[4][4]); + mp1[2]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]-predicted_chroma_block[0][4]-predicted_chroma_block[4][4]); + mp1[3]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]-predicted_chroma_block[0][4]+predicted_chroma_block[4][4]); + + for (n1=0; n1 < 2; n1 ++) + for (n2=0; n2 < 2; n2 ++) + { + if (img->sp_switch || img->type==SI_SLICE) //M.W. patched for SI + { + //quantization fo predicted block + ilev=(abs (mp1[n1+n2*2]) * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp + 1); + //addition + ilev=img->cof[n1+ll][4+n2][0][0]+sign(ilev,mp1[n1+n2*2]); + //dequantization + mp1[n1+n2*2] =ilev*dequant_coef[qp_rem_sp][0][0]<cof[n1+ll][4+n2][0][0]*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5)+mp1[n1+n2*2] ; + mp1[n1+n2*2]=sign((abs(ilev)* quant_coef[qp_rem_sp][0][0]+ 2 * qp_const2)>> (q_bits_sp+1),ilev)*dequant_coef[qp_rem_sp][0][0]<cof[n1+ll][4+n2][i][j] = (img->cof[n1+ll][4+n2][i][j] >> qp_per) / dequant_coef[qp_rem][i][j]; + + if (img->sp_switch || img->type==SI_SLICE) //M.W. patched for SI + { + //quantization of the predicted block + ilev = (abs(predicted_chroma_block[n1*BLOCK_SIZE+i][n2*BLOCK_SIZE+j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp; + //addition of the residual + ilev = sign(ilev,predicted_chroma_block[n1*BLOCK_SIZE+i][n2*BLOCK_SIZE+j]) + img->cof[n1+ll][4+n2][i][j]; + // Inverse quantization + img->cof[n1+ll][4+n2][i][j] = ilev * dequant_coef[qp_rem_sp][i][j] << qp_per_sp ; + } + else + { + //dequantization and addition of the predicted block + ilev=((img->cof[n1+ll][4+n2][i][j]*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6)+predicted_chroma_block[n1*BLOCK_SIZE+i][n2*BLOCK_SIZE+j] ; + //quantization and dequantization + img->cof[n1+ll][4+n2][i][j] = sign((abs(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2)>> q_bits_sp,ilev)*dequant_coef[qp_rem_sp][i][j]<cof[0+ll][4][0][0]=(mp1[0]+mp1[1]+mp1[2]+mp1[3])>>1; + img->cof[1+ll][4][0][0]=(mp1[0]-mp1[1]+mp1[2]-mp1[3])>>1; + img->cof[0+ll][5][0][0]=(mp1[0]+mp1[1]-mp1[2]-mp1[3])>>1; + img->cof[1+ll][5][0][0]=(mp1[0]-mp1[1]-mp1[2]+mp1[3])>>1; + } + + int sign(int a , int b) + { + int x; + + x=abs(a); + if (b>0) + return(x); + else return(-x); + } Index: llvm-test/MultiSource/Applications/JM/ldecod/block.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/block.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/block.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,32 ---- + + /*! + ************************************************************************ + * \file block.h + * + * \brief + * definitions for block decoding functions + * + * \author + * Inge Lille-Langoy \n + * Telenor Satellite Services \n + * P.O.Box 6914 St.Olavs plass \n + * N-0130 Oslo, Norway + * + ************************************************************************ + */ + + #ifndef _BLOCK_H_ + #define _BLOCK_H_ + + #include "global.h" + + #define DQ_BITS 6 + #define DQ_ROUND (1<<(DQ_BITS-1)) + + extern const byte QP_SCALE_CR[52] ; + extern const int dequant_coef[6][4][4]; + extern const unsigned char subblk_offset_x[3][8][4]; + extern const unsigned char subblk_offset_y[3][8][4]; + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/cabac.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/cabac.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/cabac.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1802 ---- + + /*! + ************************************************************************************* + * \file cabac.c + * + * \brief + * CABAC entropy coding routines + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + ************************************************************************************** + */ + + #include + #include + + #include "global.h" + #include "cabac.h" + #include "memalloc.h" + #include "elements.h" + #include "image.h" + #include "biaridecod.h" + #include "mb_access.h" + + int symbolCount = 0; + int last_dquant = 0; + + + /*********************************************************************** + * L O C A L L Y D E F I N E D F U N C T I O N P R O T O T Y P E S + *********************************************************************** + */ + unsigned int unary_bin_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + int ctx_offset); + + + unsigned int unary_bin_max_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + int ctx_offset, + unsigned int max_symbol); + + unsigned int unary_exp_golomb_level_decode( DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx); + + unsigned int unary_exp_golomb_mv_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + unsigned int max_bin); + + + void CheckAvailabilityOfNeighborsCABAC() + { + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + PixelPos up, left; + + getNeighbour(img->current_mb_nr, -1, 0, 1, &left); + getNeighbour(img->current_mb_nr, 0, -1, 1, &up); + + if (up.available) + currMB->mb_available_up = &img->mb_data[up.mb_addr]; + else + currMB->mb_available_up = NULL; + + if (left.available) + currMB->mb_available_left = &img->mb_data[left.mb_addr]; + else + currMB->mb_available_left = NULL; + } + + void cabac_new_slice() + { + last_dquant=0; + } + + /*! + ************************************************************************ + * \brief + * Allocation of contexts models for the motion info + * used for arithmetic decoding + * + ************************************************************************ + */ + MotionInfoContexts* create_contexts_MotionInfo(void) + { + MotionInfoContexts *deco_ctx; + + deco_ctx = (MotionInfoContexts*) calloc(1, sizeof(MotionInfoContexts) ); + if( deco_ctx == NULL ) + no_mem_exit("create_contexts_MotionInfo: deco_ctx"); + + return deco_ctx; + } + + + /*! + ************************************************************************ + * \brief + * Allocates of contexts models for the texture info + * used for arithmetic decoding + ************************************************************************ + */ + TextureInfoContexts* create_contexts_TextureInfo(void) + { + TextureInfoContexts *deco_ctx; + + deco_ctx = (TextureInfoContexts*) calloc(1, sizeof(TextureInfoContexts) ); + if( deco_ctx == NULL ) + no_mem_exit("create_contexts_TextureInfo: deco_ctx"); + + return deco_ctx; + } + + + + + /*! + ************************************************************************ + * \brief + * Frees the memory of the contexts models + * used for arithmetic decoding of the motion info. + ************************************************************************ + */ + void delete_contexts_MotionInfo(MotionInfoContexts *deco_ctx) + { + if( deco_ctx == NULL ) + return; + + free( deco_ctx ); + + return; + } + + + /*! + ************************************************************************ + * \brief + * Frees the memory of the contexts models + * used for arithmetic decoding of the texture info. + ************************************************************************ + */ + void delete_contexts_TextureInfo(TextureInfoContexts *deco_ctx) + { + if( deco_ctx == NULL ) + return; + + free( deco_ctx ); + + return; + } + + void readFieldModeInfo_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + int a,b,act_ctx; + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + if (currMB->mbAvailA) + a = img->mb_data[currMB->mbAddrA].mb_field; + else + a = 0; + if (currMB->mbAvailB) + b = img->mb_data[currMB->mbAddrB].mb_field; + else + b=0; + + act_ctx = a + b; + + se->value1 = biari_decode_symbol (dep_dp, &ctx->mb_aff_contexts[act_ctx]); + + #if TRACE + fprintf(p_trace, "@%d %s\t\t%d\n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + } + + + int check_next_mb_and_get_field_mode_CABAC( SyntaxElement *se, + struct img_par *img, + struct inp_par *inp, + DataPartition *act_dp) + { + BiContextTypePtr mb_type_ctx_copy[4]; + BiContextTypePtr mb_aff_ctx_copy; + DecodingEnvironmentPtr dep_dp_copy; + + int length; + DecodingEnvironmentPtr dep_dp = &(act_dp->de_cabac); + + int bframe = (img->type==B_SLICE); + int skip = 0; + int field = 0; + int i; + + Macroblock *currMB; + + //get next MB + img->current_mb_nr++; + + currMB = &img->mb_data[img->current_mb_nr]; + currMB->slice_nr = img->current_slice_nr; + currMB->mb_field = img->mb_data[img->current_mb_nr-1].mb_field; + + CheckAvailabilityOfNeighbors(); + CheckAvailabilityOfNeighborsCABAC(); + + //create + dep_dp_copy = (DecodingEnvironmentPtr) calloc(1, sizeof(DecodingEnvironment) ); + for (i=0;i<4;i++) + mb_type_ctx_copy[i] = (BiContextTypePtr) calloc(NUM_MB_TYPE_CTX, sizeof(BiContextType) ); + mb_aff_ctx_copy = (BiContextTypePtr) calloc(NUM_MB_AFF_CTX, sizeof(BiContextType) ); + + //copy + memcpy(dep_dp_copy,dep_dp,sizeof(DecodingEnvironment)); + length = *(dep_dp_copy->Dcodestrm_len) = *(dep_dp->Dcodestrm_len); + for (i=0;i<4;i++) + memcpy(mb_type_ctx_copy[i], img->currentSlice->mot_ctx->mb_type_contexts[i],NUM_MB_TYPE_CTX*sizeof(BiContextType) ); + memcpy(mb_aff_ctx_copy, img->currentSlice->mot_ctx->mb_aff_contexts,NUM_MB_AFF_CTX*sizeof(BiContextType) ); + + + //check_next_mb + #if TRACE + strncpy(se->tracestring, "mb_skip_flag (of following bottom MB)", TRACESTRING_SIZE); + #endif + last_dquant = 0; + readMB_skip_flagInfo_CABAC(se,inp,img,dep_dp); + + skip = (bframe)? (se->value1==0 && se->value2==0) : (se->value1==0); + if (!skip) + { + #if TRACE + strncpy(se->tracestring, "mb_field_decoding_flag (of following bottom MB)", TRACESTRING_SIZE); + #endif + readFieldModeInfo_CABAC( se,inp,img,dep_dp); + field = se->value1; + img->mb_data[img->current_mb_nr-1].mb_field = field; + } + + //reset + img->current_mb_nr--; + + memcpy(dep_dp,dep_dp_copy,sizeof(DecodingEnvironment)); + *(dep_dp->Dcodestrm_len) = length; + for (i=0;i<4;i++) + memcpy(img->currentSlice->mot_ctx->mb_type_contexts[i],mb_type_ctx_copy[i], NUM_MB_TYPE_CTX*sizeof(BiContextType) ); + memcpy( img->currentSlice->mot_ctx->mb_aff_contexts,mb_aff_ctx_copy,NUM_MB_AFF_CTX*sizeof(BiContextType) ); + + CheckAvailabilityOfNeighborsCABAC(); + + //delete + free(dep_dp_copy); + for (i=0;i<4;i++) + free(mb_type_ctx_copy[i]); + free(mb_aff_ctx_copy); + + return skip; + } + + + + + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the motion + * vector data of a B-frame MB. + ************************************************************************ + */ + void readMVD_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + int i = img->subblock_x; + int j = img->subblock_y; + int a, b; + int act_ctx; + int act_sym; + int mv_local_err; + int mv_sign; + int list_idx = se->value2 & 0x01; + int k = (se->value2>>1); // MVD component + + PixelPos block_a, block_b; + + MotionInfoContexts *ctx = img->currentSlice->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + getLuma4x4Neighbour(img->current_mb_nr, i, j, -1, 0, &block_a); + getLuma4x4Neighbour(img->current_mb_nr, i, j, 0, -1, &block_b); + + if (block_b.available) + { + b = absm(img->mb_data[block_b.mb_addr].mvd[list_idx][block_b.y][block_b.x][k]); + if (img->MbaffFrameFlag && (k==1)) + { + if ((currMB->mb_field==0) && (img->mb_data[block_b.mb_addr].mb_field==1)) + b *= 2; + else if ((currMB->mb_field==1) && (img->mb_data[block_b.mb_addr].mb_field==0)) + b /= 2; + } + } + else + b=0; + + if (block_a.available) + { + a = absm(img->mb_data[block_a.mb_addr].mvd[list_idx][block_a.y][block_a.x][k]); + if (img->MbaffFrameFlag && (k==1)) + { + if ((currMB->mb_field==0) && (img->mb_data[block_a.mb_addr].mb_field==1)) + a *= 2; + else if ((currMB->mb_field==1) && (img->mb_data[block_a.mb_addr].mb_field==0)) + a /= 2; + } + } + else + a = 0; + + if ((mv_local_err=a+b)<3) + act_ctx = 5*k; + else + { + if (mv_local_err>32) + act_ctx=5*k+3; + else + act_ctx=5*k+2; + } + se->context = act_ctx; + + act_sym = biari_decode_symbol(dep_dp,&ctx->mv_res_contexts[0][act_ctx] ); + + if (act_sym != 0) + { + act_ctx=5*k; + act_sym = unary_exp_golomb_mv_decode(dep_dp,ctx->mv_res_contexts[1]+act_ctx,3); + act_sym++; + mv_sign = biari_decode_symbol_eq_prob(dep_dp); + + if(mv_sign) + act_sym = -act_sym; + } + se->value1 = act_sym; + + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d \n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + } + + + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the 8x8 block type. + ************************************************************************ + */ + void readB8_typeInfo_CABAC (SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + int act_sym = 0; + int bframe = (img->type==B_SLICE); + + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + + + if (!bframe) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][1])) + { + act_sym = 0; + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][3])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[0][4])) act_sym = 2; + else act_sym = 3; + } + else + { + act_sym = 1; + } + } + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][0])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][1])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][2])) + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) + { + act_sym = 10; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym++; + } + else + { + act_sym = 6; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=2; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym++; + } + } + else + { + act_sym=2; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=2; + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym+=1; + } + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->b8_type_contexts[1][3])) act_sym = 1; + else act_sym = 0; + } + act_sym++; + } + else + { + act_sym= 0; + } + } + se->value1 = act_sym; + + #if TRACE + fprintf(p_trace, "@%d %s\t\t%d\n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + } + + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the macroblock + * type info of a given MB. + ************************************************************************ + */ + void readMB_skip_flagInfo_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + int a, b; + int act_ctx; + int bframe=(img->type==B_SLICE); + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + + if (bframe) + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (currMB->mb_available_up->skip_flag==0 ? 1 : 0); + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (currMB->mb_available_left->skip_flag==0 ? 1 : 0); + + act_ctx = 7 + a + b; + + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx]) == 1) + se->value1 = se->value2 = 0; + else + se->value1 = se->value2 = 1; + } + else + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (( (currMB->mb_available_up)->skip_flag == 0) ? 1 : 0 ); + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (( (currMB->mb_available_left)->skip_flag == 0) ? 1 : 0 ); + + act_ctx = a + b; + + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][act_ctx]) == 1) + se->value1 = 0; + else + se->value1 = 1; + } + + + #if TRACE + fprintf(p_trace, "@%d %s\t\t%d\t%d %d\n",symbolCount++, se->tracestring, se->value1,a,b); + fflush(p_trace); + #endif + if (!se->value1) + { + last_dquant=0; + } + return; + } + + /*! + *************************************************************************** + * \brief + * This function is used to arithmetically decode the macroblock + * intra_pred_size flag info of a given MB. + *************************************************************************** + */ + + void readMB_transform_size_flag_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + int a, b; + int act_ctx = 0; + int act_sym; + + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + if (currMB->mb_available_up == NULL) + b = 0; + else + b = currMB->mb_available_up->luma_transform_size_8x8_flag; + + if (currMB->mb_available_left == NULL) + a = 0; + else + a = currMB->mb_available_left->luma_transform_size_8x8_flag; + + act_ctx = a + b; + + + act_sym = biari_decode_symbol(dep_dp, ctx->transform_size_contexts + act_ctx ); + se->value1 = act_sym; + + #if TRACE + fprintf(p_trace, "@%d %s\t\t%d\t%d %d\n",symbolCount++, se->tracestring, se->value1,a,b); + fflush(p_trace); + #endif + + } + + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the macroblock + * type info of a given MB. + ************************************************************************ + */ + void readMB_typeInfo_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + int a, b; + int act_ctx; + int act_sym; + int bframe=(img->type==B_SLICE); + int mode_sym; + int ct = 0; + int curr_mb_type; + + + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + if(img->type == I_SLICE) // INTRA-frame + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (((currMB->mb_available_up)->mb_type != I4MB && currMB->mb_available_up->mb_type != I8MB) ? 1 : 0 ); + + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (((currMB->mb_available_left)->mb_type != I4MB && currMB->mb_available_left->mb_type != I8MB) ? 1 : 0 ); + + act_ctx = a + b; + act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx); + se->context = act_ctx; // store context + + if (act_sym==0) // 4x4 Intra + { + curr_mb_type = act_sym; + } + else // 16x16 Intra + { + mode_sym = biari_decode_final(dep_dp); + if(mode_sym == 1) + { + curr_mb_type = 25; + } + else + { + act_sym = 1; + act_ctx = 4; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); // decoding of AC/no AC + act_sym += mode_sym*12; + act_ctx = 5; + // decoding of cbp: 0,1,2 + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + if (mode_sym!=0) + { + act_ctx=6; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym+=4; + if (mode_sym!=0) + act_sym+=4; + } + // decoding of I pred-mode: 0,1,2,3 + act_ctx = 7; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym*2; + act_ctx = 8; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym; + curr_mb_type = act_sym; + } + } + } + else if(img->type == SI_SLICE) // SI-frame + { + // special ctx's for SI4MB + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (( (currMB->mb_available_up)->mb_type != SI4MB) ? 1 : 0 ); + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (( (currMB->mb_available_left)->mb_type != SI4MB) ? 1 : 0 ); + + act_ctx = a + b; + act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx); + se->context = act_ctx; // store context + + if (act_sym==0) // SI 4x4 Intra + { + curr_mb_type = 0; + } + else // analog INTRA_IMG + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (( (currMB->mb_available_up)->mb_type != I4MB) ? 1 : 0 ); + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (( (currMB->mb_available_left)->mb_type != I4MB) ? 1 : 0 ); + + act_ctx = a + b; + act_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx); + se->context = act_ctx; // store context + + + if (act_sym==0) // 4x4 Intra + { + curr_mb_type = 1; + } + else // 16x16 Intra + { + mode_sym = biari_decode_final(dep_dp); + if( mode_sym==1 ) + { + curr_mb_type = 26; + } + else + { + act_sym = 2; + act_ctx = 4; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); // decoding of AC/no AC + act_sym += mode_sym*12; + act_ctx = 5; + // decoding of cbp: 0,1,2 + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + if (mode_sym!=0) + { + act_ctx=6; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym+=4; + if (mode_sym!=0) + act_sym+=4; + } + // decoding of I pred-mode: 0,1,2,3 + act_ctx = 7; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym*2; + act_ctx = 8; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[0] + act_ctx ); + act_sym += mode_sym; + curr_mb_type = act_sym; + } + } + } + } + else + { + if (bframe) + { + ct = 1; + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (( (currMB->mb_available_up)->mb_type != 0) ? 1 : 0 ); + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (( (currMB->mb_available_left)->mb_type != 0) ? 1 : 0 ); + + act_ctx = a + b; + + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][act_ctx])) + { + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][4])) + { + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][5])) + { + act_sym=12; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=8; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=4; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=2; + + if (act_sym==24) act_sym=11; + else if (act_sym==26) act_sym=22; + else + { + if (act_sym==22) act_sym=23; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=1; + } + } + else + { + act_sym=3; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=4; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=2; + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym+=1; + } + } + else + { + if (biari_decode_symbol (dep_dp, &ctx->mb_type_contexts[2][6])) act_sym=2; + else act_sym=1; + } + } + else + { + act_sym = 0; + } + } + else // P-frame + { + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][4] )) + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][7] )) act_sym = 7; + else act_sym = 6; + } + else + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][5] )) + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][7] )) act_sym = 2; + else act_sym = 3; + } + else + { + if (biari_decode_symbol(dep_dp, &ctx->mb_type_contexts[1][6] )) act_sym = 4; + else act_sym = 1; + } + } + } + } + + if (act_sym<=6 || (((img->type == B_SLICE)?1:0) && act_sym<=23)) + { + curr_mb_type = act_sym; + } + else // additional info for 16x16 Intra-mode + { + mode_sym = biari_decode_final(dep_dp); + if( mode_sym==1 ) + { + if(bframe) // B frame + curr_mb_type = 48; + else // P frame + curr_mb_type = 31; + } + else + { + act_ctx = 8; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); // decoding of AC/no AC + act_sym += mode_sym*12; + + // decoding of cbp: 0,1,2 + act_ctx = 9; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + if (mode_sym != 0) + { + act_sym+=4; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + if (mode_sym != 0) + act_sym+=4; + } + + // decoding of I pred-mode: 0,1,2,3 + act_ctx = 10; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + act_sym += mode_sym*2; + mode_sym = biari_decode_symbol(dep_dp, ctx->mb_type_contexts[1] + act_ctx ); + act_sym += mode_sym; + curr_mb_type = act_sym; + } + } + } + se->value1 = curr_mb_type; + + // if (curr_mb_type >= 23) printf(" stopx"); + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d\n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + } + + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode a pair of + * intra prediction modes of a given MB. + ************************************************************************ + */ + void readIntraPredMode_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + TextureInfoContexts *ctx = img->currentSlice->tex_ctx; + int act_sym; + + // use_most_probable_mode + act_sym = biari_decode_symbol(dep_dp, ctx->ipr_contexts); + + // remaining_mode_selector + if (act_sym == 1) + se->value1 = -1; + else + { + se->value1 = 0; + se->value1 |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) ); + se->value1 |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 1); + se->value1 |= (biari_decode_symbol(dep_dp, ctx->ipr_contexts+1) << 2); + } + + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d\n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + } + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the reference + * parameter of a given MB. + ************************************************************************ + */ + void readRefFrame_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + MotionInfoContexts *ctx = img->currentSlice->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + int addctx = 0; + int a, b; + int act_ctx; + int act_sym; + char** refframe_array = dec_picture->ref_idx[se->value2]; + int b8a, b8b; + + PixelPos block_a, block_b; + + getLuma4x4Neighbour(img->current_mb_nr, img->subblock_x, img->subblock_y, -1, 0, &block_a); + getLuma4x4Neighbour(img->current_mb_nr, img->subblock_x, img->subblock_y, 0, -1, &block_b); + + b8a=((block_a.x/2)%2)+2*((block_a.y/2)%2); + b8b=((block_b.x/2)%2)+2*((block_b.y/2)%2); + + if (!block_b.available) + b=0; + else if ( (img->mb_data[block_b.mb_addr].mb_type==IPCM) || IS_DIRECT(&img->mb_data[block_b.mb_addr]) || (img->mb_data[block_b.mb_addr].b8mode[b8b]==0 && img->mb_data[block_b.mb_addr].b8pdir[b8b]==2)) + b=0; + else + { + if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_b.mb_addr].mb_field == 1)) + b = (refframe_array[block_b.pos_y][block_b.pos_x] > 1 ? 1 : 0); + else + b = (refframe_array[block_b.pos_y][block_b.pos_x] > 0 ? 1 : 0); + } + + if (!block_a.available) + a=0; + else if ((img->mb_data[block_a.mb_addr].mb_type==IPCM) || IS_DIRECT(&img->mb_data[block_a.mb_addr]) || (img->mb_data[block_a.mb_addr].b8mode[b8a]==0 && img->mb_data[block_a.mb_addr].b8pdir[b8a]==2)) + a=0; + else + { + if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_a.mb_addr].mb_field == 1)) + a = (refframe_array[block_a.pos_y][block_a.pos_x] > 1 ? 1 : 0); + else + a = (refframe_array[block_a.pos_y][block_a.pos_x] > 0 ? 1 : 0); + } + + act_ctx = a + 2*b; + se->context = act_ctx; // store context + + act_sym = biari_decode_symbol(dep_dp,ctx->ref_no_contexts[addctx] + act_ctx ); + + if (act_sym != 0) + { + act_ctx = 4; + act_sym = unary_bin_decode(dep_dp,ctx->ref_no_contexts[addctx]+act_ctx,1); + act_sym++; + } + se->value1 = act_sym; + + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d \n",symbolCount++, se->tracestring, se->value1); + // fprintf(p_trace," c: %d :%d \n",ctx->ref_no_contexts[addctx][act_ctx].cum_freq[0],ctx->ref_no_contexts[addctx][act_ctx].cum_freq[1]); + fflush(p_trace); + #endif + } + + + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the delta qp + * of a given MB. + ************************************************************************ + */ + void readDquant_CABAC( SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + MotionInfoContexts *ctx = img->currentSlice->mot_ctx; + + int act_ctx; + int act_sym; + int dquant; + + act_ctx = ( (last_dquant != 0) ? 1 : 0); + + act_sym = biari_decode_symbol(dep_dp,ctx->delta_qp_contexts + act_ctx ); + if (act_sym != 0) + { + act_ctx = 2; + act_sym = unary_bin_decode(dep_dp,ctx->delta_qp_contexts+act_ctx,1); + act_sym++; + } + + dquant = (act_sym+1)/2; + if((act_sym & 0x01)==0) // lsb is signed bit + dquant = -dquant; + se->value1 = dquant; + + last_dquant = dquant; + + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d\n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + } + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the coded + * block pattern of a given MB. + ************************************************************************ + */ + void readCBP_CABAC(SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + TextureInfoContexts *ctx = img->currentSlice->tex_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + int mb_x, mb_y; + int a, b; + int curr_cbp_ctx, curr_cbp_idx; + int cbp = 0; + int cbp_bit; + int mask; + PixelPos block_a; + + // coding of luma part (bit by bit) + for (mb_y=0; mb_y < 4; mb_y += 2) + { + for (mb_x=0; mb_x < 4; mb_x += 2) + { + if (currMB->b8mode[mb_y+(mb_x/2)]==IBLOCK) + curr_cbp_idx = 0; + else + curr_cbp_idx = 1; + + if (mb_y == 0) + { + if (currMB->mb_available_up == NULL) + b = 0; + else + { + if((currMB->mb_available_up)->mb_type==IPCM) + b=0; + else + b = (( ((currMB->mb_available_up)->cbp & (1<<(2+mb_x/2))) == 0) ? 1 : 0); + } + + } + else + b = ( ((cbp & (1<<(mb_x/2))) == 0) ? 1: 0); + + if (mb_x == 0) + { + getLuma4x4Neighbour(img->current_mb_nr, mb_x, mb_y, -1, 0, &block_a); + if (block_a.available) + { + { + if(img->mb_data[block_a.mb_addr].mb_type==IPCM) + a=0; + else + a = (( (img->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y/2)+1))) == 0) ? 1 : 0); + } + + } + else + a=0; + } + else + a = ( ((cbp & (1<cbp_contexts[0] + curr_cbp_ctx ); + if (cbp_bit) cbp += mask; + } + } + + + if (dec_picture->chroma_format_idc != YUV400) + { + // coding of chroma part + // CABAC decoding for BinIdx 0 + b = 0; + if (currMB->mb_available_up != NULL) + { + if((currMB->mb_available_up)->mb_type==IPCM) + b=1; + else + b = ((currMB->mb_available_up)->cbp > 15) ? 1 : 0; + } + + + a = 0; + if (currMB->mb_available_left != NULL) + { + if((currMB->mb_available_left)->mb_type==IPCM) + a=1; + else + a = ((currMB->mb_available_left)->cbp > 15) ? 1 : 0; + } + + + curr_cbp_ctx = a+2*b; + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[1] + curr_cbp_ctx ); + + // CABAC decoding for BinIdx 1 + if (cbp_bit) // set the chroma bits + { + b = 0; + if (currMB->mb_available_up != NULL) + { + if((currMB->mb_available_up)->mb_type==IPCM) + b=1; + else + if ((currMB->mb_available_up)->cbp > 15) + b = (( ((currMB->mb_available_up)->cbp >> 4) == 2) ? 1 : 0); + } + + + a = 0; + if (currMB->mb_available_left != NULL) + { + if((currMB->mb_available_left)->mb_type==IPCM) + a=1; + else + if ((currMB->mb_available_left)->cbp > 15) + a = (( ((currMB->mb_available_left)->cbp >> 4) == 2) ? 1 : 0); + } + + + curr_cbp_ctx = a+2*b; + cbp_bit = biari_decode_symbol(dep_dp, ctx->cbp_contexts[2] + curr_cbp_ctx ); + cbp += (cbp_bit == 1) ? 32 : 16; + } + } + + se->value1 = cbp; + + if (!cbp) + { + last_dquant=0; + } + + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d\n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + } + + /*! + ************************************************************************ + * \brief + * This function is used to arithmetically decode the chroma + * intra prediction mode of a given MB. + ************************************************************************ + */ //GB + void readCIPredMode_CABAC(SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + + TextureInfoContexts *ctx = img->currentSlice->tex_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int act_ctx,a,b; + int act_sym = se->value1; + + if (currMB->mb_available_up == NULL) b = 0; + else + { + if( (currMB->mb_available_up)->mb_type==IPCM) + b=0; + else + b = ( ((currMB->mb_available_up)->c_ipred_mode != 0) ? 1 : 0); + } + + + if (currMB->mb_available_left == NULL) a = 0; + else + { + if( (currMB->mb_available_left)->mb_type==IPCM) + a=0; + else + a = ( ((currMB->mb_available_left)->c_ipred_mode != 0) ? 1 : 0); + } + + + act_ctx = a+b; + + act_sym = biari_decode_symbol(dep_dp, ctx->cipr_contexts + act_ctx ); + + if (act_sym!=0) + act_sym = unary_bin_max_decode(dep_dp,ctx->cipr_contexts+3,0,2)+1; + + + se->value1 = act_sym; + + + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d\n",symbolCount++, se->tracestring, se->value1); + fflush(p_trace); + #endif + + } + + static const int maxpos [] = {16, 15, 64, 32, 32, 16, 4, 15, 8, 16}; + static const int c1isdc [] = { 1, 0, 1, 1, 1, 1, 1, 0, 1, 1}; + + static const int type2ctx_bcbp[] = { 0, 1, 2, 2, 3, 4, 5, 6, 5, 5}; // 7 + static const int type2ctx_map [] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6}; // 8 + static const int type2ctx_last[] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6}; // 8 + static const int type2ctx_one [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5}; // 7 + static const int type2ctx_abs [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5}; // 7 + static const int max_c2 [] = { 4, 4, 4, 4, 4, 4, 3, 4, 3, 3}; // 9 + + /*! + ************************************************************************ + * \brief + * Read CBP4-BIT + ************************************************************************ + */ + int read_and_store_CBP_block_bit (Macroblock *currMB, + DecodingEnvironmentPtr dep_dp, + struct img_par *img, + int type) + { + #define BIT_SET(x,n) ((int)(((x)&((int64)1<<(n)))>>(n))) + + int y_ac = (type==LUMA_16AC || type==LUMA_8x8 || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4); + int y_dc = (type==LUMA_16DC); + int u_ac = (type==CHROMA_AC && !img->is_v_block); + int v_ac = (type==CHROMA_AC && img->is_v_block); + int chroma_dc = (type==CHROMA_DC || type==CHROMA_DC_2x4 || type==CHROMA_DC_4x4); + int u_dc = (chroma_dc && !img->is_v_block); + int v_dc = (chroma_dc && img->is_v_block); + int j = (y_ac || u_ac || v_ac ? img->subblock_y : 0); + int i = (y_ac || u_ac || v_ac ? img->subblock_x : 0); + int bit = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 35); + int default_bit = (img->is_intra_block ? 1 : 0); + int upper_bit = default_bit; + int left_bit = default_bit; + int cbp_bit = 1; // always one for 8x8 mode + int ctx; + int bit_pos_a = 0; + int bit_pos_b = 0; + + PixelPos block_a, block_b; + if (y_ac || y_dc) + { + getLuma4x4Neighbour(img->current_mb_nr, i, j, -1, 0, &block_a); + getLuma4x4Neighbour(img->current_mb_nr, i, j, 0, -1, &block_b); + if (y_ac) + { + if (block_a.available) + bit_pos_a = 4*block_a.y + block_a.x; + if (block_b.available) + bit_pos_b = 4*block_b.y + block_b.x; + } + } + else + { + getChroma4x4Neighbour(img->current_mb_nr, i, j, -1, 0, &block_a); + getChroma4x4Neighbour(img->current_mb_nr, i, j, 0, -1, &block_b); + if (u_ac||v_ac) + { + if (block_a.available) + bit_pos_a = 4*block_a.y + block_a.x; + if (block_b.available) + bit_pos_b = 4*block_b.y + block_b.x; + } + } + + if (type!=LUMA_8x8) + { + //--- get bits from neighbouring blocks --- + if (block_b.available) + { + if(img->mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = BIT_SET(img->mb_data[block_b.mb_addr].cbp_bits,bit+bit_pos_b); + } + + + if (block_a.available) + { + if(img->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = BIT_SET(img->mb_data[block_a.mb_addr].cbp_bits,bit+bit_pos_a); + } + + + ctx = 2*upper_bit+left_bit; + + + //===== encode symbol ===== + cbp_bit = biari_decode_symbol (dep_dp, img->currentSlice->tex_ctx->bcbp_contexts[type2ctx_bcbp[type]] + ctx); + } + + //--- set bits for current block --- + bit = (y_dc ? 0 : y_ac ? 1+4*j+i : u_dc ? 17 : v_dc ? 18 : u_ac ? 19+4*j+i : 35+4*j+i); + + if (cbp_bit) + { + if (type==LUMA_8x8) + { + currMB->cbp_bits |= (1<< bit ); + currMB->cbp_bits |= (1<<(bit+1)); + currMB->cbp_bits |= (1<<(bit+4)); + currMB->cbp_bits |= (1<<(bit+5)); + } + else if (type==LUMA_8x4) + { + currMB->cbp_bits |= (1<< bit ); + currMB->cbp_bits |= (1<<(bit+1)); + } + else if (type==LUMA_4x8) + { + currMB->cbp_bits |= (1<< bit ); + currMB->cbp_bits |= (1<<(bit+4)); + } + else + { + currMB->cbp_bits |= ((int64)1< ctx for MAP ===== + //--- zig-zag scan ---- + static const int pos2ctx_map8x8 [] = { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9, 10, 9, 8, 7, + 7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6, 11, + 12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11 ,14, 10, 12, 14}; // 15 CTX + static const int pos2ctx_map8x4 [] = { 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 9, 8, 6, 7, 8, + 9, 10, 11, 9, 8, 6, 12, 8, 9, 10, 11, 9, 13, 13, 14, 14}; // 15 CTX + static const int pos2ctx_map4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14}; // 15 CTX + static const int pos2ctx_map2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int pos2ctx_map4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int* pos2ctx_map [] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8, pos2ctx_map8x4, + pos2ctx_map8x4, pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4, + pos2ctx_map2x4c, pos2ctx_map4x4c}; + //--- interlace scan ---- + //taken from ABT + static const int pos2ctx_map8x8i[] = { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, + 6, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 11, 12, 11, + 9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 13, 13, 9, + 9, 10, 10, 8, 13, 13, 9, 9, 10, 10, 14, 14, 14, 14, 14, 14}; // 15 CTX + static const int pos2ctx_map8x4i[] = { 0, 1, 2, 3, 4, 5, 6, 3, 4, 5, 6, 3, 4, 7, 6, 8, + 9, 7, 6, 8, 9, 10, 11, 12, 12, 10, 11, 13, 13, 14, 14, 14}; // 15 CTX + static const int pos2ctx_map4x8i[] = { 0, 1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 6, 2, 7, 7, 8, + 8, 8, 5, 6, 9, 10, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14}; // 15 CTX + static const int* pos2ctx_map_int[] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i, + pos2ctx_map4x8i,pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4, + pos2ctx_map2x4c, pos2ctx_map4x4c}; + + + //===== position -> ctx for LAST ===== + static const int pos2ctx_last8x8 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8}; // 9 CTX + static const int pos2ctx_last8x4 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8}; // 9 CTX + + static const int pos2ctx_last4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; // 15 CTX + static const int pos2ctx_last2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int pos2ctx_last4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int* pos2ctx_last [] = {pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8, pos2ctx_last8x4, + pos2ctx_last8x4, pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last4x4, + pos2ctx_last2x4c, pos2ctx_last4x4c}; + + + + + + /*! + ************************************************************************ + * \brief + * Read Significance MAP + ************************************************************************ + */ + int read_significance_map (Macroblock *currMB, + DecodingEnvironmentPtr dep_dp, + struct img_par *img, + int type, + int coeff[]) + { + int i, sig; + int coeff_ctr = 0; + int i0 = 0; + int i1 = maxpos[type]-1; + + int fld = ( img->structure!=FRAME || currMB->mb_field ); + BiContextTypePtr map_ctx = ( fld ? img->currentSlice->tex_ctx-> fld_map_contexts[type2ctx_map [type]] + : img->currentSlice->tex_ctx-> map_contexts[type2ctx_map [type]] ); + BiContextTypePtr last_ctx = ( fld ? img->currentSlice->tex_ctx->fld_last_contexts[type2ctx_last[type]] + : img->currentSlice->tex_ctx-> last_contexts[type2ctx_last[type]] ); + + if (!c1isdc[type]) + { + i0++; i1++; coeff--; + } + + for (i=i0; istructure!=FRAME || currMB->mb_field) + sig = biari_decode_symbol (dep_dp, map_ctx + pos2ctx_map_int [type][i]); + else + sig = biari_decode_symbol (dep_dp, map_ctx + pos2ctx_map [type][i]); + if (sig) + { + coeff[i] = 1; + coeff_ctr++; + //--- read last coefficient symbol --- + if (biari_decode_symbol (dep_dp, last_ctx + pos2ctx_last[type][i])) + { + for (i++; i=0; i--) + { + if (coeff[i]!=0) + { + ctx = min (c1,4); + coeff[i] += biari_decode_symbol (dep_dp, img->currentSlice->tex_ctx->one_contexts[type2ctx_one[type]] + ctx); + if (coeff[i]==2) + { + ctx = min (c2, max_c2[type]); + coeff[i] += unary_exp_golomb_level_decode (dep_dp, img->currentSlice->tex_ctx->abs_contexts[type2ctx_abs[type]]+ctx); + c1=0; + c2++; + } + else if (c1) + { + c1++; + } + if (biari_decode_symbol_eq_prob(dep_dp)) + { + coeff[i] *= -1; + } + } + } + } + + + /*! + ************************************************************************ + * \brief + * Read Block-Transform Coefficients + ************************************************************************ + */ + void readRunLevel_CABAC (SyntaxElement *se, + struct inp_par *inp, + struct img_par *img, + DecodingEnvironmentPtr dep_dp) + { + static int coeff[64]; // one more for EOB + static int coeff_ctr = -1; + static int pos = 0; + + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + //--- read coefficients for whole block --- + if (coeff_ctr < 0) + { + //===== decode CBP-BIT ===== + if ((coeff_ctr = read_and_store_CBP_block_bit (currMB, dep_dp, img, se->context))) + { + //===== decode significance map ===== + coeff_ctr = read_significance_map (currMB, dep_dp, img, se->context, coeff); + + //===== decode significant coefficients ===== + read_significant_coefficients (currMB, dep_dp, img, se->context, coeff); + } + } + + //--- set run and level --- + if (coeff_ctr) + { + //--- set run and level (coefficient) --- + for (se->value2=0; coeff[pos]==0; pos++, se->value2++); + se->value1=coeff[pos++]; + } + else + { + //--- set run and level (EOB) --- + se->value1 = se->value2 = 0; + } + //--- decrement coefficient counter and re-set position --- + if (coeff_ctr-- == 0) pos=0; + + #if TRACE + fprintf(p_trace, "@%d %s\t\t\t%d\t%d\n",symbolCount++, se->tracestring, se->value1,se->value2); + fflush(p_trace); + #endif + } + + + + /*! + ************************************************************************ + * \brief + * arithmetic decoding + ************************************************************************ + */ + int readSyntaxElement_CABAC(SyntaxElement *se, struct img_par *img, struct inp_par *inp, DataPartition *this_dataPart) + { + int curr_len; + DecodingEnvironmentPtr dep_dp = &(this_dataPart->de_cabac); + + curr_len = arideco_bits_read(dep_dp); + + // perform the actual decoding by calling the appropriate method + se->reading(se, inp, img, dep_dp); + + return (se->len = (arideco_bits_read(dep_dp) - curr_len)); + } + + + /*! + ************************************************************************ + * \brief + * decoding of unary binarization using one or 2 distinct + * models for the first and all remaining bins; no terminating + * "0" for max_symbol + *********************************************************************** + */ + unsigned int unary_bin_max_decode(DecodingEnvironmentPtr dep_dp, + BiContextTypePtr ctx, + int ctx_offset, + unsigned int max_symbol) + { + unsigned int l; + unsigned int symbol; + BiContextTypePtr ictx; + + symbol = biari_decode_symbol(dep_dp, ctx ); + + if (symbol==0) + return 0; + else + { + if (max_symbol == 1) + return symbol; + symbol=0; + ictx=ctx+ctx_offset; + do + { + l=biari_decode_symbol(dep_dp, ictx); + symbol++; + } + while( (l!=0) && (symbolcurrentSlice; + int *partMap = assignSE2partition[currSlice->dp_mode]; + DataPartition *dP; + unsigned int bit; + DecodingEnvironmentPtr dep_dp; + + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + dep_dp = &(dP->de_cabac); + + if( eos_bit ) + { + bit = biari_decode_final (dep_dp); //GB + + #if TRACE + // strncpy(se->tracestring, "Decode Sliceterm", TRACESTRING_SIZE); + fprintf(p_trace, "@%d %s\t\t%d\n",symbolCount++, "Decode Sliceterm", bit); + fflush(p_trace); + #endif + } + else + { + bit = 0; + } + + return (bit==1?1:0); + } + + + + + + /*! + ************************************************************************ + * \brief + * Exp Golomb binarization and decoding of a symbol + * with prob. of 0.5 + ************************************************************************ + */ + unsigned int exp_golomb_decode_eq_prob( DecodingEnvironmentPtr dep_dp, + int k) + { + unsigned int l; + int symbol = 0; + int binary_symbol = 0; + + do + { + l=biari_decode_symbol_eq_prob(dep_dp); + if (l==1) + { + symbol += (1<read_len will be modified + * (for IPCM CABAC 28/11/2003) + * + * \author + * Dong Wang + ************************************************************************ + */ + void readIPCMBytes_CABAC(SyntaxElement *sym, Bitstream *currStream) + { + int read_len = currStream->read_len; + int code_len = currStream->code_len; + byte *buf = currStream->streamBuffer; + + sym->len=8; + + if(read_leninf=buf[read_len++]; + + sym->value1=sym->inf; + + currStream->read_len=read_len; + + #if TRACE + tracebits2(sym->tracestring, sym->len, sym->inf); + #endif + + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/cabac.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/cabac.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/cabac.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,54 ---- + + /*! + *************************************************************************** + * \file + * cabac.h + * + * \brief + * Headerfile for entropy coding routines + * + * \author + * Detlev Marpe \n + * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved. + * + * \date + * 21. Oct 2000 (Changes by Tobias Oelbaum 28.08.2001) + *************************************************************************** + */ + + #ifndef _CABAC_H_ + #define _CABAC_H_ + + #include "global.h" + + MotionInfoContexts* create_contexts_MotionInfo(void); + TextureInfoContexts* create_contexts_TextureInfo(void); + void init_contexts_MotionInfo(struct img_par *img, MotionInfoContexts *enco_ctx); + void init_contexts_TextureInfo(struct img_par *img, TextureInfoContexts *enco_ctx); + void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx); + void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx); + + void cabac_new_slice(); + + void readMB_typeInfo_CABAC(SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readB8_typeInfo_CABAC(SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readIntraPredMode_CABAC(SyntaxElement *se, struct inp_par *inp,struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readRefFrame_CABAC(SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readMVD_CABAC(SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readCBP_CABAC(SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readRunLevel_CABAC(SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readDquant_CABAC(SyntaxElement *se,struct inp_par *inp,struct img_par *img,DecodingEnvironmentPtr dep_dp); + void readCIPredMode_CABAC(SyntaxElement *se,struct inp_par *inp,struct img_par *img,DecodingEnvironmentPtr dep_dp); + void readMB_skip_flagInfo_CABAC( SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + void readFieldModeInfo_CABAC(SyntaxElement *se,struct inp_par *inp,struct img_par *img,DecodingEnvironmentPtr dep_dp); + + void readMB_transform_size_flag_CABAC( SyntaxElement *se, struct inp_par *inp, struct img_par *img, DecodingEnvironmentPtr dep_dp); + + int readSyntaxElement_CABAC(SyntaxElement *se, struct img_par *img, struct inp_par *inp, DataPartition *this_dataPart); + + int check_next_mb_and_get_field_mode_CABAC(SyntaxElement *se,struct img_par *img,struct inp_par *inp,DataPartition *act_dp); + void CheckAvailabilityOfNeighborsCABAC(); + + + #endif // _CABAC_H_ + Index: llvm-test/MultiSource/Applications/JM/ldecod/context_ini.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/context_ini.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/context_ini.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,74 ---- + + /*! + ************************************************************************************* + * \file context_ini.c + * + * \brief + * CABAC context initializations + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Heiko Schwarz + ************************************************************************************** + */ + + #define CONTEXT_INI_C + + #include "defines.h" + #include "global.h" + #include "biaridecod.h" + #include "ctx_tables.h" + + + #define BIARI_CTX_INIT2(ii,jj,ctx,tab,num) \ + { \ + for (i=0; itype==I_SLICE)||(img->type==SI_SLICE)) biari_init_context (img, &(ctx[i][j]), &(tab ## _I[num][i][j][0])); \ + else biari_init_context (img, &(ctx[i][j]), &(tab ## _P[num][i][j][0])); \ + } \ + } + #define BIARI_CTX_INIT1(jj,ctx,tab,num) \ + { \ + for (j=0; jtype==I_SLICE)||(img->type==SI_SLICE)) biari_init_context (img, &(ctx[j]), &(tab ## _I[num][0][j][0])); \ + else biari_init_context (img, &(ctx[j]), &(tab ## _P[num][0][j][0])); \ + } \ + } + + + void + init_contexts (struct img_par* img) + { + MotionInfoContexts* mc = img->currentSlice->mot_ctx; + TextureInfoContexts* tc = img->currentSlice->tex_ctx; + int i, j; + + //printf("%d -", img->model_number); + + //--- motion coding contexts --- + BIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX, mc->mb_type_contexts, INIT_MB_TYPE, img->model_number); + BIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX, mc->b8_type_contexts, INIT_B8_TYPE, img->model_number); + BIARI_CTX_INIT2 (2, NUM_MV_RES_CTX, mc->mv_res_contexts, INIT_MV_RES, img->model_number); + BIARI_CTX_INIT2 (2, NUM_REF_NO_CTX, mc->ref_no_contexts, INIT_REF_NO, img->model_number); + BIARI_CTX_INIT1 ( NUM_DELTA_QP_CTX, mc->delta_qp_contexts, INIT_DELTA_QP, img->model_number); + BIARI_CTX_INIT1 ( NUM_MB_AFF_CTX, mc->mb_aff_contexts, INIT_MB_AFF, img->model_number); + BIARI_CTX_INIT1 ( NUM_TRANSFORM_SIZE_CTX, mc->transform_size_contexts, INIT_TRANSFORM_SIZE, img->model_number); + + + //--- texture coding contexts --- + BIARI_CTX_INIT1 ( NUM_IPR_CTX, tc->ipr_contexts, INIT_IPR, img->model_number); + BIARI_CTX_INIT1 ( NUM_CIPR_CTX, tc->cipr_contexts, INIT_CIPR, img->model_number); + BIARI_CTX_INIT2 (3, NUM_CBP_CTX, tc->cbp_contexts, INIT_CBP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts, INIT_BCBP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts, INIT_MAP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts, INIT_LAST, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX, tc->one_contexts, INIT_ONE, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX, tc->abs_contexts, INIT_ABS, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->fld_map_contexts, INIT_FLD_MAP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->fld_last_contexts,INIT_FLD_LAST, img->model_number); + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/context_ini.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/context_ini.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/context_ini.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,23 ---- + + /*! + ************************************************************************************* + * \file context_ini.h + * + * \brief + * CABAC context initializations + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Heiko Schwarz + ************************************************************************************** + */ + + + #ifndef _CONTEXT_INI_ + #define _CONTEXT_INI_ + + void init_contexts (struct img_par* img); + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/contributors.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/contributors.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/contributors.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,202 ---- + + /*! \file + * contributors.h + * \brief + * List of contributors and copyright information. + * + * \par Copyright statements + \verbatim + H.264 JM coder/decoder + + Copyright (C) 2000 by + Telenor Satellite Services, Norway + Ericsson Radio Systems, Sweden + TELES AG, Germany + Nokia Inc., USA + Nokia Corporation, Finland + Siemens AG, Germany + Heinrich-Hertz-Institute for Communication Technology GmbH, Germany + University of Hannover, Institut of Communication Theory and Signal Processing,Germany + TICSP, Tampere University of Technology, Finland + Munich University of Technology, Institute for Communications Engineering, Germany + Videolocus, Canada + Motorola Inc., USA + Microsoft Corp., USA + Apple Computer, Inc. + RealNetworks, Inc., USA + Thomson, Inc., USA + \endverbatim + \par Full Contact Information + \verbatim + + Lowell Winger + Guy C?t? + Michael Gallant + VideoLocus Inc. + 97 Randall Dr. + Waterloo, ON, Canada N2V1C5 + + Inge Lille-Lang?y + Telenor Satellite Services + P.O.Box 6914 St.Olavs plass + N-0130 Oslo, Norway + + Rickard Sjoberg + Ericsson Radio Systems + KI/ERA/T/VV + 164 80 Stockholm, Sweden + + Stephan Wenger + TU Berlin / TELES AG + Sekr. FR 6-3 + Franklinstr. 28-29 + D-10587 Berlin, Germany + + Jani Lainema + Nokia Inc. / Nokia Research Center + 6000 Connection Drive + Irving, TX 75039, USA + + Sebastian Purreiter + Siemens AG + ICM MD MP RD MCH 83 + P.O.Box 80 17 07 + D-81617 Munich, Germany + + Thomas Wedi + University of Hannover + Institut of Communication Theory and Signal Processing + Appelstr. 9a + 30167 Hannover, Germany + + Guido Heising + Heinrich-Hertz-Institute + Einsteinufer 37 + 10587 Berlin + Germany + + Gabi Blaettermann + Heinrich-Hertz-Institute + Einsteinufer 37 + 10587 Berlin + Germany + + Detlev Marpe + Heinrich-Hertz-Institute + Einsteinufer 37 + 10587 Berlin + Germany + + Ragip Kurceren + Nokia Inc. / Nokia Research Center + 6000 Connection Drive + Irving, TX 75039, USA + + Viktor Varsa + Nokia Inc. / Nokia Research Center + 6000 Connection Drive + Irving, TX 75039, USA + + Ye-Kui Wang + Tampere University of Technology + Tampere International Center for Signal Processing + 33720 Tampere, Finland + + Ari Hourunranta + Nokia Corporation / Nokia Mobile Phones + P.O. Box 88 + 33721 Tampere, Finland + + Yann Le Maguet + Philips Research France + + Dong Tian + Tampere University of Technology + Tampere International Center for Signal Processing + 33720 Tampere, Finland + + Miska M. Hannuksela + Nokia Corporation / Nokia Mobile Phones + P.O. Box 88 + 33721 Tampere, Finland + + Karsten Suehring + Heinrich-Hertz-Institute + Einsteinufer 37 + 10587 Berlin + Germany + + Heiko Schwarz + Heinrich-Hertz-Institute + Einsteinufer 37 + 10587 Berlin + Germany + + Tobias Oelbaum + Institute for Communications Engineering + Munich University of Technology + Germany + + Limin Wang + Krit Panusopone + Rajeev Gandhi + Yue Yu + Motorola Inc. + 6450 Sequence Drive + San Diego, CA 92121 USA + + Feng Wu + Xiaoyan Sun + Microsoft Research Asia + 3/F, Beijing Sigma Center + No.49, Zhichun Road, Hai Dian District, + Beijing China 100080 + + Yoshihiro Kikuchi + Takeshi Chujoh + Toshiba Corporation + Research and Development Center + Kawasaki 212-8582, Japan + + Shinya Kadono + Matsushita Electric Industrial Co., Ltd. + 1006 Kadoma, Kadoma + Osaka 663-8113, Japan + + Dzung Hoang + Eric Viscito + Conexant Systems. Inc. + MPEG Compression Group + 20450 Stevens Creek Blvd. + Cupertino, CA 95014 + + Barry Haskell + Apple Computer, Inc. + 2 Infinite Loop + Cupertino, California 95014 + + Greg Conklin + RealNetworks, Inc. + 2601 Elliott Ave + Seattle, WA 98101 + + Jill Boyce + Cristina Gomila + Thomson + 2 Independence Way + Princeton, NJ 08540 + + Alexis Michael Tourapis + + Saurav K Bandyopadhyay + Purvin Pandit + Zhenyu Wu + Thomson Inc. + 2 Independence Way + Princeton, NJ 08540 + + + + \endverbatim + */ + Index: llvm-test/MultiSource/Applications/JM/ldecod/ctx_tables.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/ctx_tables.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/ctx_tables.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,729 ---- + + /*! + ************************************************************************************* + * \file ctx_tables.h + * + * \brief + * CABAC context initialization tables + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Heiko Schwarz + ************************************************************************************** + */ + + #define CTX_UNUSED {0,64} + #define CTX_UNDEF {0,63} + + #ifdef CONTEXT_INI_C + + + #define NUM_CTX_MODELS_I 1 + #define NUM_CTX_MODELS_P 3 + + + static const int INIT_MB_TYPE_I[1][3][11][2] = + { + //----- model 0 ----- + { + { { 20, -15} , { 2, 54} , { 3, 74} , CTX_UNUSED , { -28, 127} , { -23, 104} , { -6, 53} , { -1, 54} , { 7, 51} , CTX_UNUSED , CTX_UNUSED }, + { { 20, -15} , { 2, 54} , { 3, 74} , { 20, -15} , { 2, 54} , { 3, 74} , { -28, 127} , { -23, 104} , { -6, 53} , { -1, 54} , { 7, 51} }, // SI (unused at the moment) + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + static const int INIT_MB_TYPE_P[3][3][11][2] = + { + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 23, 33} , { 23, 2} , { 21, 0} , CTX_UNUSED , { 1, 9} , { 0, 49} , { -37, 118} , { 5, 57} , { -13, 78} , { -11, 65} , { 1, 62} }, + { { 26, 67} , { 16, 90} , { 9, 104} , CTX_UNUSED , { -46, 127} , { -20, 104} , { 1, 67} , { 18, 64} , { 9, 43} , { 29, 0} , CTX_UNUSED } + }, + //----- model 1 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 22, 25} , { 34, 0} , { 16, 0} , CTX_UNUSED , { -2, 9} , { 4, 41} , { -29, 118} , { 2, 65} , { -6, 71} , { -13, 79} , { 5, 52} }, + { { 57, 2} , { 41, 36} , { 26, 69} , CTX_UNUSED , { -45, 127} , { -15, 101} , { -4, 76} , { 26, 34} , { 19, 22} , { 40, 0} , CTX_UNUSED } + }, + //----- model 2 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 29, 16} , { 25, 0} , { 14, 0} , CTX_UNUSED , { -10, 51} , { -3, 62} , { -27, 99} , { 26, 16} , { -4, 85} , { -24, 102} , { 5, 57} }, + { { 54, 0} , { 37, 42} , { 12, 97} , CTX_UNUSED , { -32, 127} , { -22, 117} , { -2, 74} , { 20, 40} , { 20, 10} , { 29, 0} , CTX_UNUSED } + } + }; + + + + + + static const int INIT_B8_TYPE_I[1][2][9][2] = + { + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + static const int INIT_B8_TYPE_P[3][2][9][2] = + { + //----- model 0 ----- + { + { CTX_UNUSED , { 12, 49} , CTX_UNUSED , { -4, 73} , { 17, 50} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 86} , { -17, 95} , { -6, 61} , { 9, 45} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { CTX_UNUSED , { 9, 50} , CTX_UNUSED , { -3, 70} , { 10, 54} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 6, 69} , { -13, 90} , { 0, 52} , { 8, 43} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { CTX_UNUSED , { 6, 57} , CTX_UNUSED , { -17, 73} , { 14, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 93} , { -14, 88} , { -6, 44} , { 4, 55} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + + + + + + static const int INIT_MV_RES_I[1][2][10][2] = + { + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + static const int INIT_MV_RES_P[3][2][10][2] = + { + //----- model 0 ----- + { + { { -3, 69} , CTX_UNUSED , { -6, 81} , { -11, 96} , CTX_UNUSED , { 0, 58} , CTX_UNUSED , { -3, 76} , { -10, 94} , CTX_UNUSED }, + { { 6, 55} , { 7, 67} , { -5, 86} , { 2, 88} , CTX_UNUSED , { 5, 54} , { 4, 69} , { -3, 81} , { 0, 88} , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { -2, 69} , CTX_UNUSED , { -5, 82} , { -10, 96} , CTX_UNUSED , { 1, 56} , CTX_UNUSED , { -3, 74} , { -6, 85} , CTX_UNUSED }, + { { 2, 59} , { 2, 75} , { -3, 87} , { -3, 100} , CTX_UNUSED , { 0, 59} , { -3, 81} , { -7, 86} , { -5, 95} , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { -11, 89} , CTX_UNUSED , { -15, 103} , { -21, 116} , CTX_UNUSED , { 1, 63} , CTX_UNUSED , { -5, 85} , { -13, 106} , CTX_UNUSED }, + { { 19, 57} , { 20, 58} , { 4, 84} , { 6, 96} , CTX_UNUSED , { 5, 63} , { 6, 75} , { -3, 90} , { -1, 101} , CTX_UNUSED } + } + }; + + + + + + static const int INIT_REF_NO_I[1][2][6][2] = + { + //----- model 0 ----- + { + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + static const int INIT_REF_NO_P[3][2][6][2] = + { + //----- model 0 ----- + { + { { -7, 67} , { -5, 74} , { -4, 74} , { -5, 80} , { -7, 72} , { 1, 58} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { -1, 66} , { -1, 77} , { 1, 70} , { -2, 86} , { -5, 72} , { 0, 61} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { 3, 55} , { -4, 79} , { -2, 75} , { -12, 97} , { -7, 50} , { 1, 60} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + + + static const int INIT_TRANSFORM_SIZE_I[1][1][3][2]= + { + //----- model 0 ----- + { + { { 31, 21} , { 31, 31} , { 25, 50} }, + // { { 0, 41} , { 0, 63} , { 0, 63} }, + } + }; + + static const int INIT_TRANSFORM_SIZE_P[3][1][3][2]= + { + //----- model 0 ----- + { + { { 12, 40} , { 11, 51} , { 14, 59} }, + // { { 0, 41} , { 0, 63} , { 0, 63} }, + }, + //----- model 1 ----- + { + { { 25, 32} , { 21, 49} , { 21, 54} }, + // { { 0, 41} , { 0, 63} , { 0, 63} }, + }, + //----- model 2 ----- + { + { { 21, 33} , { 19, 50} , { 17, 61} }, + // { { 0, 41} , { 0, 63} , { 0, 63} }, + } + }; + + static const int INIT_DELTA_QP_I[1][1][4][2]= + { + //----- model 0 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + } + }; + static const int INIT_DELTA_QP_P[3][1][4][2]= + { + //----- model 0 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + }, + //----- model 1 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + }, + //----- model 2 ----- + { + { { 0, 41} , { 0, 63} , { 0, 63} , { 0, 63} }, + } + }; + + + + + + static const int INIT_MB_AFF_I[1][1][4][2] = + { + //----- model 0 ----- + { + { { 0, 11} , { 1, 55} , { 0, 69} , CTX_UNUSED } + } + }; + static const int INIT_MB_AFF_P[3][1][4][2] = + { + //----- model 0 ----- + { + { { 0, 45} , { -4, 78} , { -3, 96} , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { 13, 15} , { 7, 51} , { 2, 80} , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { 7, 34} , { -9, 88} , { -20, 127} , CTX_UNUSED } + } + }; + + + + + + static const int INIT_IPR_I[1][1][2][2] = + { + //----- model 0 ----- + { + { { 13, 41} , { 3, 62} } + } + }; + static const int INIT_IPR_P[3][1][2][2] = + { + //----- model 0 ----- + { + { { 13, 41} , { 3, 62} } + }, + //----- model 1 ----- + { + { { 13, 41} , { 3, 62} } + }, + //----- model 2 ----- + { + { { 13, 41} , { 3, 62} } + } + }; + + + + + + static const int INIT_CIPR_I[1][1][4][2] = + { + //----- model 0 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + } + }; + static const int INIT_CIPR_P[3][1][4][2] = + { + //----- model 0 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + }, + //----- model 1 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + }, + //----- model 2 ----- + { + { { -9, 83} , { 4, 86} , { 0, 97} , { -7, 72} } + } + }; + + + + + + + static const int INIT_CBP_I[1][3][4][2] = + { + //----- model 0 ----- + { + { { -17, 127} , { -13, 102} , { 0, 82} , { -7, 74} }, + { { -21, 107} , { -27, 127} , { -31, 127} , { -24, 127} }, + { { -18, 95} , { -27, 127} , { -21, 114} , { -30, 127} } + } + }; + static const int INIT_CBP_P[3][3][4][2] = + { + //----- model 0 ----- + { + { { -27, 126} , { -28, 98} , { -25, 101} , { -23, 67} }, + { { -28, 82} , { -20, 94} , { -16, 83} , { -22, 110} }, + { { -21, 91} , { -18, 102} , { -13, 93} , { -29, 127} } + }, + //----- model 1 ----- + { + { { -39, 127} , { -18, 91} , { -17, 96} , { -26, 81} }, + { { -35, 98} , { -24, 102} , { -23, 97} , { -27, 119} }, + { { -24, 99} , { -21, 110} , { -18, 102} , { -36, 127} } + }, + //----- model 2 ----- + { + { { -36, 127} , { -17, 91} , { -14, 95} , { -25, 84} }, + { { -25, 86} , { -12, 89} , { -17, 91} , { -31, 127} }, + { { -14, 76} , { -18, 103} , { -13, 90} , { -37, 127} } + } + }; + + + + + + static const int INIT_BCBP_I[1][8][4][2] = + { + //----- model 0 ----- + { + { { -17, 123} , { -12, 115} , { -16, 122} , { -11, 115} }, + { { -12, 63} , { -2, 68} , { -15, 84} , { -13, 104} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 70} , { -8, 93} , { -10, 90} , { -30, 127} }, + { { -1, 74} , { -6, 97} , { -7, 91} , { -20, 127} }, + { { -4, 56} , { -5, 82} , { -7, 76} , { -22, 125} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + static const int INIT_BCBP_P[3][8][4][2] = + { + //----- model 0 ----- + { + { { -7, 92} , { -5, 89} , { -7, 96} , { -13, 108} }, + { { -3, 46} , { -1, 65} , { -1, 57} , { -9, 93} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -3, 74} , { -9, 92} , { -8, 87} , { -23, 126} }, + { { 5, 54} , { 6, 60} , { 6, 59} , { 6, 69} }, + { { -1, 48} , { 0, 68} , { -4, 69} , { -8, 88} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { 0, 80} , { -5, 89} , { -7, 94} , { -4, 92} }, + { { 0, 39} , { 0, 65} , { -15, 84} , { -35, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -2, 73} , { -12, 104} , { -9, 91} , { -31, 127} }, + { { 3, 55} , { 7, 56} , { 7, 55} , { 8, 61} }, + { { -3, 53} , { 0, 68} , { -7, 74} , { -9, 88} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { 11, 80} , { 5, 76} , { 2, 84} , { 5, 78} }, + { { -6, 55} , { 4, 61} , { -14, 83} , { -37, 127} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -5, 79} , { -11, 104} , { -11, 91} , { -30, 127} }, + { { 0, 65} , { -2, 79} , { 0, 72} , { -4, 92} }, + { { -6, 56} , { 3, 68} , { -8, 71} , { -13, 98} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + + + + + + static const int INIT_MAP_I[1][8][15][2] = + { + //----- model 0 ----- + { + { { -7, 93} , { -11, 87} , { -3, 77} , { -5, 71} , { -4, 63} , { -4, 68} , { -12, 84} , { -7, 62} , { -7, 65} , { 8, 61} , { 5, 56} , { -2, 66} , { 1, 64} , { 0, 61} , { -2, 78} }, + { CTX_UNUSED , { 1, 50} , { 7, 52} , { 10, 35} , { 0, 44} , { 11, 38} , { 1, 45} , { 0, 46} , { 5, 44} , { 31, 17} , { 1, 51} , { 7, 50} , { 28, 19} , { 16, 33} , { 14, 62} }, + { { -17, 120} , { -20, 112} , { -18, 114} , { -11, 85} , { -15, 92} , { -14, 89} , { -26, 71} , { -15, 81} , { -14, 80} , { 0, 68} , { -14, 70} , { -24, 56} , { -23, 68} , { -24, 50} , { -11, 74} }, + // { { -1, 73} , { -7, 73} , { -6, 76} , { -7, 71} , { -9, 72} , { -5, 65} , { -14, 83} , { -8, 72} , { -10, 75} , { -5, 64} , { -4, 59} , { -13, 79} , { -9, 69} , { -8, 66} , { 3, 55} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -13, 108} , { -15, 100} , { -13, 101} , { -13, 91} , { -12, 94} , { -10, 88} , { -16, 84} , { -10, 86} , { -7, 83} , { -13, 87} , { -19, 94} , { 1, 70} , { 0, 72} , { -5, 74} , { 18, 59} }, + { { -8, 102} , { -15, 100} , { 0, 95} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { -4, 75} , { 2, 72} , { -11, 75} , { -3, 71} , { 15, 46} , { -13, 69} , { 0, 62} , { 0, 65} , { 21, 37} , { -15, 72} , { 9, 57} , { 16, 54} , { 0, 62} , { 12, 72} } + } + }; + static const int INIT_MAP_P[3][8][15][2] = + { + //----- model 0 ----- + { + { { -2, 85} , { -6, 78} , { -1, 75} , { -7, 77} , { 2, 54} , { 5, 50} , { -3, 68} , { 1, 50} , { 6, 42} , { -4, 81} , { 1, 63} , { -4, 70} , { 0, 67} , { 2, 57} , { -2, 76} }, + { CTX_UNUSED , { 11, 35} , { 4, 64} , { 1, 61} , { 11, 35} , { 18, 25} , { 12, 24} , { 13, 29} , { 13, 36} , { -10, 93} , { -7, 73} , { -2, 73} , { 13, 46} , { 9, 49} , { -7, 100} }, + { { -4, 79} , { -7, 71} , { -5, 69} , { -9, 70} , { -8, 66} , { -10, 68} , { -19, 73} , { -12, 69} , { -16, 70} , { -15, 67} , { -20, 62} , { -19, 70} , { -16, 66} , { -22, 65} , { -20, 63} }, + // { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 9, 53} , { 2, 53} , { 5, 53} , { -2, 61} , { 0, 56} , { 0, 56} , { -13, 63} , { -5, 60} , { -1, 62} , { 4, 57} , { -6, 69} , { 4, 57} , { 14, 39} , { 4, 51} , { 13, 68} }, + { { 3, 64} , { 1, 61} , { 9, 63} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 7, 50} , { 16, 39} , { 5, 44} , { 4, 52} , { 11, 48} , { -5, 60} , { -1, 59} , { 0, 59} , { 22, 33} , { 5, 44} , { 14, 43} , { -1, 78} , { 0, 60} , { 9, 69} } + }, + //----- model 1 ----- + { + { { -13, 103} , { -13, 91} , { -9, 89} , { -14, 92} , { -8, 76} , { -12, 87} , { -23, 110} , { -24, 105} , { -10, 78} , { -20, 112} , { -17, 99} , { -78, 127} , { -70, 127} , { -50, 127} , { -46, 127} }, + { CTX_UNUSED , { -4, 66} , { -5, 78} , { -4, 71} , { -8, 72} , { 2, 59} , { -1, 55} , { -7, 70} , { -6, 75} , { -8, 89} , { -34, 119} , { -3, 75} , { 32, 20} , { 30, 22} , { -44, 127} }, + { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} }, + // { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 0, 54} , { -5, 61} , { 0, 58} , { -1, 60} , { -3, 61} , { -8, 67} , { -25, 84} , { -14, 74} , { -5, 65} , { 5, 52} , { 2, 57} , { 0, 61} , { -9, 69} , { -11, 70} , { 18, 55} }, + { { -4, 71} , { 0, 58} , { 7, 61} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 9, 41} , { 18, 25} , { 9, 32} , { 5, 43} , { 9, 47} , { 0, 44} , { 0, 51} , { 2, 46} , { 19, 38} , { -4, 66} , { 15, 38} , { 12, 42} , { 9, 34} , { 0, 89} } + }, + //----- model 2 ----- + { + { { -4, 86} , { -12, 88} , { -5, 82} , { -3, 72} , { -4, 67} , { -8, 72} , { -16, 89} , { -9, 69} , { -1, 59} , { 5, 66} , { 4, 57} , { -4, 71} , { -2, 71} , { 2, 58} , { -1, 74} }, + { CTX_UNUSED , { -4, 44} , { -1, 69} , { 0, 62} , { -7, 51} , { -4, 47} , { -6, 42} , { -3, 41} , { -6, 53} , { 8, 76} , { -9, 78} , { -11, 83} , { 9, 52} , { 0, 67} , { -5, 90} }, + { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} }, + // { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 67} , { -15, 72} , { -5, 75} , { -8, 80} , { -21, 83} , { -21, 64} , { -13, 31} , { -25, 64} , { -29, 94} , { 9, 75} , { 17, 63} , { -8, 74} , { -5, 35} , { -2, 27} , { 13, 91} }, + { { 3, 65} , { -7, 69} , { 8, 77} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { -10, 66} , { 3, 62} , { -3, 68} , { -20, 81} , { 0, 30} , { 1, 7} , { -3, 23} , { -21, 74} , { 16, 66} , { -23, 124} , { 17, 37} , { 44, -18} , { 50, -34} , { -22, 127} } + } + }; + + + + + static const int INIT_LAST_I[1][8][15][2] = + { + //----- model 0 ----- + { + { { 24, 0} , { 15, 9} , { 8, 25} , { 13, 18} , { 15, 9} , { 13, 19} , { 10, 37} , { 12, 18} , { 6, 29} , { 20, 33} , { 15, 30} , { 4, 45} , { 1, 58} , { 0, 62} , { 7, 61} }, + { CTX_UNUSED , { 12, 38} , { 11, 45} , { 15, 39} , { 11, 42} , { 13, 44} , { 16, 45} , { 12, 41} , { 10, 49} , { 30, 34} , { 18, 42} , { 10, 55} , { 17, 51} , { 17, 46} , { 0, 89} }, + { { 23, -13} , { 26, -13} , { 40, -15} , { 49, -14} , { 44, 3} , { 45, 6} , { 44, 34} , { 33, 54} , { 19, 82} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 12, 33} , { 5, 38} , { 9, 34} , { 18, 22} , { 19, 22} , { 23, 19} , { 26, 16} , { 14, 44} , { 40, 14} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 26, -19} , { 22, -17} , { 26, -17} , { 30, -25} , { 28, -20} , { 33, -23} , { 37, -27} , { 33, -23} , { 40, -28} , { 38, -17} , { 33, -11} , { 40, -15} , { 41, -6} , { 38, 1} , { 41, 17} }, + { { 30, -6} , { 27, 3} , { 26, 22} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 37, -16} , { 35, -4} , { 38, -8} , { 38, -3} , { 37, 3} , { 38, 5} , { 42, 0} , { 35, 16} , { 39, 22} , { 14, 48} , { 27, 37} , { 21, 60} , { 12, 68} , { 2, 97} } + } + }; + static const int INIT_LAST_P[3][8][15][2] = + { + //----- model 0 ----- + { + { { 11, 28} , { 2, 40} , { 3, 44} , { 0, 49} , { 0, 46} , { 2, 44} , { 2, 51} , { 0, 47} , { 4, 39} , { 2, 62} , { 6, 46} , { 0, 54} , { 3, 54} , { 2, 58} , { 4, 63} }, + { CTX_UNUSED , { 6, 51} , { 6, 57} , { 7, 53} , { 6, 52} , { 6, 55} , { 11, 45} , { 14, 36} , { 8, 53} , { -1, 82} , { 7, 55} , { -3, 78} , { 15, 46} , { 22, 31} , { -1, 84} }, + { { 9, -2} , { 26, -9} , { 33, -9} , { 39, -7} , { 41, -2} , { 45, 3} , { 49, 9} , { 45, 27} , { 36, 59} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 25, 7} , { 30, -7} , { 28, 3} , { 28, 4} , { 32, 0} , { 34, -1} , { 30, 6} , { 30, 6} , { 32, 9} , { 31, 19} , { 26, 27} , { 26, 30} , { 37, 20} , { 28, 34} , { 17, 70} }, + { { 1, 67} , { 5, 59} , { 9, 67} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 16, 30} , { 18, 32} , { 18, 35} , { 22, 29} , { 24, 31} , { 23, 38} , { 18, 43} , { 20, 41} , { 11, 63} , { 9, 59} , { 9, 64} , { -1, 94} , { -2, 89} , { -9, 108} } + }, + //----- model 1 ----- + { + { { 4, 45} , { 10, 28} , { 10, 31} , { 33, -11} , { 52, -43} , { 18, 15} , { 28, 0} , { 35, -22} , { 38, -25} , { 34, 0} , { 39, -18} , { 32, -12} , { 102, -94} , { 0, 0} , { 56, -15} }, + { CTX_UNUSED , { 33, -4} , { 29, 10} , { 37, -5} , { 51, -29} , { 39, -9} , { 52, -34} , { 69, -58} , { 67, -63} , { 44, -5} , { 32, 7} , { 55, -29} , { 32, 1} , { 0, 0} , { 27, 36} }, + { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 33, -25} , { 34, -30} , { 36, -28} , { 38, -28} , { 38, -27} , { 34, -18} , { 35, -16} , { 34, -14} , { 32, -8} , { 37, -6} , { 35, 0} , { 30, 10} , { 28, 18} , { 26, 25} , { 29, 41} }, + { { 0, 75} , { 2, 72} , { 8, 77} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 14, 35} , { 18, 31} , { 17, 35} , { 21, 30} , { 17, 45} , { 20, 42} , { 18, 45} , { 27, 26} , { 16, 54} , { 7, 66} , { 16, 56} , { 11, 73} , { 10, 67} , { -10, 116} } + }, + //----- model 2 ----- + { + { { 4, 39} , { 0, 42} , { 7, 34} , { 11, 29} , { 8, 31} , { 6, 37} , { 7, 42} , { 3, 40} , { 8, 33} , { 13, 43} , { 13, 36} , { 4, 47} , { 3, 55} , { 2, 58} , { 6, 60} }, + { CTX_UNUSED , { 8, 44} , { 11, 44} , { 14, 42} , { 7, 48} , { 4, 56} , { 4, 52} , { 13, 37} , { 9, 49} , { 19, 58} , { 10, 48} , { 12, 45} , { 0, 69} , { 20, 33} , { 8, 63} }, + { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 35, -18} , { 33, -25} , { 28, -3} , { 24, 10} , { 27, 0} , { 34, -14} , { 52, -44} , { 39, -24} , { 19, 17} , { 31, 25} , { 36, 29} , { 24, 33} , { 34, 15} , { 30, 20} , { 22, 73} }, + { { 20, 34} , { 19, 31} , { 27, 44} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 19, 16} , { 15, 36} , { 15, 36} , { 21, 28} , { 25, 21} , { 30, 20} , { 31, 12} , { 27, 16} , { 24, 42} , { 0, 93} , { 14, 56} , { 15, 57} , { 26, 38} , { -24, 127} } + } + }; + + + + + + static const int INIT_ONE_I[1][8][5][2] = + { + //----- model 0 ----- + { + { { -3, 71} , { -6, 42} , { -5, 50} , { -3, 54} , { -2, 62} }, + { { -5, 67} , { -5, 27} , { -3, 39} , { -2, 44} , { 0, 46} }, + { { -3, 75} , { -1, 23} , { 1, 34} , { 1, 43} , { 0, 54} }, + // { { -9, 75} , { -1, 44} , { -2, 49} , { -2, 51} , { -1, 51} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 92} , { -15, 55} , { -10, 60} , { -6, 62} , { -4, 65} }, + { { -11, 97} , { -20, 84} , { -11, 79} , { -6, 73} , { -4, 74} }, + { { -8, 78} , { -5, 33} , { -4, 48} , { -2, 53} , { -3, 62} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + static const int INIT_ONE_P[3][8][5][2] = + { + //----- model 0 ----- + { + { { -6, 76} , { -2, 44} , { 0, 45} , { 0, 52} , { -3, 64} }, + { { -9, 77} , { 3, 24} , { 0, 42} , { 0, 48} , { 0, 55} }, + { { -6, 66} , { -7, 35} , { -7, 42} , { -8, 45} , { -5, 48} }, + // { { -3, 58} , { -1, 28} , { 0, 29} , { 2, 30} , { 1, 35} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 58} , { -3, 29} , { -1, 36} , { 1, 38} , { 2, 43} }, + { { 0, 70} , { -4, 29} , { 5, 31} , { 7, 42} , { 1, 59} }, + { { 0, 58} , { 8, 5} , { 10, 14} , { 14, 18} , { 13, 27} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { -23, 112} , { -15, 71} , { -7, 61} , { 0, 53} , { -5, 66} }, + { { -21, 101} , { -3, 39} , { -5, 53} , { -7, 61} , { -11, 75} }, + { { -5, 71} , { 0, 24} , { -1, 36} , { -2, 42} , { -2, 52} }, + // { { -3, 58} , { -1, 28} , { 0, 29} , { 2, 30} , { 1, 35} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -11, 76} , { -10, 44} , { -10, 52} , { -10, 57} , { -9, 58} }, + { { 2, 66} , { -9, 34} , { 1, 32} , { 11, 31} , { 5, 52} }, + { { 3, 52} , { 7, 4} , { 10, 8} , { 17, 8} , { 16, 19} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { -24, 115} , { -22, 82} , { -9, 62} , { 0, 53} , { 0, 59} }, + { { -21, 100} , { -14, 57} , { -12, 67} , { -11, 71} , { -10, 77} }, + { { -9, 71} , { -7, 37} , { -8, 44} , { -11, 49} , { -10, 56} }, + // { { -3, 58} , { -1, 28} , { 0, 29} , { 2, 30} , { 1, 35} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -10, 82} , { -8, 48} , { -8, 61} , { -8, 66} , { -7, 70} }, + { { -4, 79} , { -22, 69} , { -16, 75} , { -2, 58} , { 1, 58} }, + { { -13, 81} , { -6, 38} , { -13, 62} , { -6, 58} , { -2, 59} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + + + + + + static const int INIT_ABS_I[1][8][5][2] = + { + //----- model 0 ----- + { + { { 0, 58} , { 1, 63} , { -2, 72} , { -1, 74} , { -9, 91} }, + { { -16, 64} , { -8, 68} , { -10, 78} , { -6, 77} , { -10, 86} }, + { { -2, 55} , { 0, 61} , { 1, 64} , { 0, 68} , { -9, 92} }, + // { { -4, 56} , { -1, 59} , { -6, 71} , { -8, 74} , { -11, 85} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 73} , { -8, 76} , { -7, 80} , { -9, 88} , { -17, 110} }, + { { -13, 86} , { -13, 96} , { -11, 97} , { -19, 117} , CTX_UNUSED }, + { { -13, 71} , { -10, 79} , { -12, 86} , { -13, 90} , { -14, 97} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + static const int INIT_ABS_P[3][8][5][2] = + { + //----- model 0 ----- + { + { { -2, 59} , { -4, 70} , { -4, 75} , { -8, 82} , { -17, 102} }, + { { -6, 59} , { -7, 71} , { -12, 83} , { -11, 87} , { -30, 119} }, + { { -12, 56} , { -6, 60} , { -5, 62} , { -8, 66} , { -8, 76} }, + // { { -7, 54} , { -2, 58} , { -4, 63} , { -5, 66} , { 1, 64} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -6, 55} , { 0, 58} , { 0, 64} , { -3, 74} , { -10, 90} }, + { { -2, 58} , { -3, 72} , { -3, 81} , { -11, 97} , CTX_UNUSED }, + { { 2, 40} , { 0, 58} , { -3, 70} , { -6, 79} , { -8, 85} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 1 ----- + { + { { -11, 77} , { -9, 80} , { -9, 84} , { -10, 87} , { -34, 127} }, + { { -15, 77} , { -17, 91} , { -25, 107} , { -25, 111} , { -28, 122} }, + { { -9, 57} , { -6, 63} , { -4, 65} , { -4, 67} , { -7, 82} }, + // { { -7, 54} , { -2, 58} , { -4, 63} , { -5, 66} , { 1, 64} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -16, 72} , { -7, 69} , { -4, 69} , { -5, 74} , { -9, 86} }, + { { -2, 55} , { -2, 67} , { 0, 73} , { -8, 89} , CTX_UNUSED }, + { { 3, 37} , { -1, 61} , { -5, 73} , { -1, 70} , { -4, 78} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + }, + //----- model 2 ----- + { + { { -14, 85} , { -13, 89} , { -13, 94} , { -11, 92} , { -29, 127} }, + { { -21, 85} , { -16, 88} , { -23, 104} , { -15, 98} , { -37, 127} }, + { { -12, 59} , { -8, 63} , { -9, 67} , { -6, 68} , { -10, 79} }, + // { { -7, 54} , { -2, 58} , { -4, 63} , { -5, 66} , { 1, 64} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -14, 75} , { -10, 79} , { -9, 83} , { -12, 92} , { -18, 108} }, + { { -13, 78} , { -9, 83} , { -4, 81} , { -13, 99} , CTX_UNUSED }, + { { -16, 73} , { -10, 76} , { -13, 86} , { -9, 83} , { -10, 87} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED } + } + }; + + + + + + static const int INIT_FLD_MAP_I[1][8][15][2] = + { + //----- model 0 ----- + { + { { -6, 93} , { -6, 84} , { -8, 79} , { 0, 66} , { -1, 71} , { 0, 62} , { -2, 60} , { -2, 59} , { -5, 75} , { -3, 62} , { -4, 58} , { -9, 66} , { -1, 79} , { 0, 71} , { 3, 68} }, + { CTX_UNUSED , { 10, 44} , { -7, 62} , { 15, 36} , { 14, 40} , { 16, 27} , { 12, 29} , { 1, 44} , { 20, 36} , { 18, 32} , { 5, 42} , { 1, 48} , { 10, 62} , { 17, 46} , { 9, 64} }, + { { -14, 106} , { -13, 97} , { -15, 90} , { -12, 90} , { -18, 88} , { -10, 73} , { -9, 79} , { -14, 86} , { -10, 73} , { -10, 70} , { -10, 69} , { -5, 66} , { -9, 64} , { -5, 58} , { 2, 59} }, + // { { -1, 73} , { -7, 73} , { -6, 76} , { -7, 71} , { -9, 72} , { -5, 65} , { -14, 83} , { -8, 72} , { -10, 75} , { -5, 64} , { -4, 59} , { -13, 79} , { -9, 69} , { -8, 66} , { 3, 55} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -12, 104} , { -11, 97} , { -16, 96} , { -7, 88} , { -8, 85} , { -7, 85} , { -9, 85} , { -13, 88} , { 4, 66} , { -3, 77} , { -3, 76} , { -6, 76} , { 10, 58} , { -1, 76} , { -1, 83} }, + { { -7, 99} , { -14, 95} , { 2, 95} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 0, 76} , { -5, 74} , { 0, 70} , { -11, 75} , { 1, 68} , { 0, 65} , { -14, 73} , { 3, 62} , { 4, 62} , { -1, 68} , { -13, 75} , { 11, 55} , { 5, 64} , { 12, 70} } + } + }; + static const int INIT_FLD_MAP_P[3][8][15][2] = + { + //----- model 0 ----- + { + { { -13, 106} , { -16, 106} , { -10, 87} , { -21, 114} , { -18, 110} , { -14, 98} , { -22, 110} , { -21, 106} , { -18, 103} , { -21, 107} , { -23, 108} , { -26, 112} , { -10, 96} , { -12, 95} , { -5, 91} }, + { CTX_UNUSED , { -9, 93} , { -22, 94} , { -5, 86} , { 9, 67} , { -4, 80} , { -10, 85} , { -1, 70} , { 7, 60} , { 9, 58} , { 5, 61} , { 12, 50} , { 15, 50} , { 18, 49} , { 17, 54} }, + { { -5, 85} , { -6, 81} , { -10, 77} , { -7, 81} , { -17, 80} , { -18, 73} , { -4, 74} , { -10, 83} , { -9, 71} , { -9, 67} , { -1, 61} , { -8, 66} , { -14, 66} , { 0, 59} , { 2, 59} }, + // { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 10, 41} , { 7, 46} , { -1, 51} , { 7, 49} , { 8, 52} , { 9, 41} , { 6, 47} , { 2, 55} , { 13, 41} , { 10, 44} , { 6, 50} , { 5, 53} , { 13, 49} , { 4, 63} , { 6, 64} }, + { { -2, 69} , { -2, 59} , { 6, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 10, 44} , { 9, 31} , { 12, 43} , { 3, 53} , { 14, 34} , { 10, 38} , { -3, 52} , { 13, 40} , { 17, 32} , { 7, 44} , { 7, 38} , { 13, 50} , { 10, 57} , { 26, 43} } + }, + //----- model 1 ----- + { + { { -21, 126} , { -23, 124} , { -20, 110} , { -26, 126} , { -25, 124} , { -17, 105} , { -27, 121} , { -27, 117} , { -17, 102} , { -26, 117} , { -27, 116} , { -33, 122} , { -10, 95} , { -14, 100} , { -8, 95} }, + { CTX_UNUSED , { -17, 111} , { -28, 114} , { -6, 89} , { -2, 80} , { -4, 82} , { -9, 85} , { -8, 81} , { -1, 72} , { 5, 64} , { 1, 67} , { 9, 56} , { 0, 69} , { 1, 69} , { 7, 69} }, + { { -3, 81} , { -3, 76} , { -7, 72} , { -6, 78} , { -12, 72} , { -14, 68} , { -3, 70} , { -6, 76} , { -5, 66} , { -5, 62} , { 0, 57} , { -4, 61} , { -9, 60} , { 1, 54} , { 2, 58} }, + // { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { -7, 69} , { -6, 67} , { -16, 77} , { -2, 64} , { 2, 61} , { -6, 67} , { -3, 64} , { 2, 57} , { -3, 65} , { -3, 66} , { 0, 62} , { 9, 51} , { -1, 66} , { -2, 71} , { -2, 75} }, + { { -1, 70} , { -9, 72} , { 14, 60} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 16, 37} , { 0, 47} , { 18, 35} , { 11, 37} , { 12, 41} , { 10, 41} , { 2, 48} , { 12, 41} , { 13, 41} , { 0, 59} , { 3, 50} , { 19, 40} , { 3, 66} , { 18, 50} } + }, + //----- model 2 ----- + { + { { -22, 127} , { -25, 127} , { -25, 120} , { -27, 127} , { -19, 114} , { -23, 117} , { -25, 118} , { -26, 117} , { -24, 113} , { -28, 118} , { -31, 120} , { -37, 124} , { -10, 94} , { -15, 102} , { -10, 99} }, + { CTX_UNUSED , { -13, 106} , { -50, 127} , { -5, 92} , { 17, 57} , { -5, 86} , { -13, 94} , { -12, 91} , { -2, 77} , { 0, 71} , { -1, 73} , { 4, 64} , { -7, 81} , { 5, 64} , { 15, 57} }, + { { -3, 78} , { -8, 74} , { -9, 72} , { -10, 72} , { -18, 75} , { -12, 71} , { -11, 63} , { -5, 70} , { -17, 75} , { -14, 72} , { -16, 67} , { -8, 53} , { -14, 59} , { -9, 52} , { -11, 68} }, + // { { -4, 60} , { -3, 49} , { -2, 50} , { -4, 49} , { -5, 48} , { -2, 46} , { -7, 54} , { -1, 45} , { -4, 49} , { 4, 39} , { 0, 42} , { 2, 43} , { 0, 44} , { 5, 32} , { 15, 30} }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 1, 67} , { 0, 68} , { -10, 67} , { 1, 68} , { 0, 77} , { 2, 64} , { 0, 68} , { -5, 78} , { 7, 55} , { 5, 59} , { 2, 65} , { 14, 54} , { 15, 44} , { 5, 60} , { 2, 70} }, + { { -2, 76} , { -18, 86} , { 12, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 5, 64} , { -12, 70} , { 11, 55} , { 5, 56} , { 0, 69} , { 2, 65} , { -6, 74} , { 5, 54} , { 7, 54} , { -6, 76} , { -11, 82} , { -2, 77} , { -2, 77} , { 25, 42} } + } + }; + + + + + + static const int INIT_FLD_LAST_I[1][8][15][2] = + { + //----- model 0 ----- + { + { { 15, 6} , { 6, 19} , { 7, 16} , { 12, 14} , { 18, 13} , { 13, 11} , { 13, 15} , { 15, 16} , { 12, 23} , { 13, 23} , { 15, 20} , { 14, 26} , { 14, 44} , { 17, 40} , { 17, 47} }, + { CTX_UNUSED , { 24, 17} , { 21, 21} , { 25, 22} , { 31, 27} , { 22, 29} , { 19, 35} , { 14, 50} , { 10, 57} , { 7, 63} , { -2, 77} , { -4, 82} , { -3, 94} , { 9, 69} , { -12, 109} }, + { { 21, -10} , { 24, -11} , { 28, -8} , { 28, -1} , { 29, 3} , { 29, 9} , { 35, 20} , { 29, 36} , { 14, 67} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 12, 33} , { 5, 38} , { 9, 34} , { 18, 22} , { 19, 22} , { 23, 19} , { 26, 16} , { 14, 44} , { 40, 14} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 36, -35} , { 36, -34} , { 32, -26} , { 37, -30} , { 44, -32} , { 34, -18} , { 34, -15} , { 40, -15} , { 33, -7} , { 35, -5} , { 33, 0} , { 38, 2} , { 33, 13} , { 23, 35} , { 13, 58} }, + { { 29, -3} , { 26, 0} , { 22, 30} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 31, -7} , { 35, -15} , { 34, -3} , { 34, 3} , { 36, -1} , { 34, 5} , { 32, 11} , { 35, 5} , { 34, 12} , { 39, 11} , { 30, 29} , { 34, 26} , { 29, 39} , { 19, 66} } + } + }; + static const int INIT_FLD_LAST_P[3][8][15][2] = + { + //----- model 0 ----- + { + { { 14, 11} , { 11, 14} , { 9, 11} , { 18, 11} , { 21, 9} , { 23, -2} , { 32, -15} , { 32, -15} , { 34, -21} , { 39, -23} , { 42, -33} , { 41, -31} , { 46, -28} , { 38, -12} , { 21, 29} }, + { CTX_UNUSED , { 45, -24} , { 53, -45} , { 48, -26} , { 65, -43} , { 43, -19} , { 39, -10} , { 30, 9} , { 18, 26} , { 20, 27} , { 0, 57} , { -14, 82} , { -5, 75} , { -19, 97} , { -35, 125} }, + { { 21, -13} , { 33, -14} , { 39, -7} , { 46, -2} , { 51, 2} , { 60, 6} , { 61, 17} , { 55, 34} , { 42, 62} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 27, 0} , { 28, 0} , { 31, -4} , { 27, 6} , { 34, 8} , { 30, 10} , { 24, 22} , { 33, 19} , { 22, 32} , { 26, 31} , { 21, 41} , { 26, 44} , { 23, 47} , { 16, 65} , { 14, 71} }, + { { 8, 60} , { 6, 63} , { 17, 65} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 21, 24} , { 23, 20} , { 26, 23} , { 27, 32} , { 28, 23} , { 28, 24} , { 23, 40} , { 24, 32} , { 28, 29} , { 23, 42} , { 19, 57} , { 22, 53} , { 22, 61} , { 11, 86} } + }, + //----- model 1 ----- + { + { { 19, -6} , { 18, -6} , { 14, 0} , { 26, -12} , { 31, -16} , { 33, -25} , { 33, -22} , { 37, -28} , { 39, -30} , { 42, -30} , { 47, -42} , { 45, -36} , { 49, -34} , { 41, -17} , { 32, 9} }, + { CTX_UNUSED , { 69, -71} , { 63, -63} , { 66, -64} , { 77, -74} , { 54, -39} , { 52, -35} , { 41, -10} , { 36, 0} , { 40, -1} , { 30, 14} , { 28, 26} , { 23, 37} , { 12, 55} , { 11, 65} }, + { { 17, -10} , { 32, -13} , { 42, -9} , { 49, -5} , { 53, 0} , { 64, 3} , { 68, 10} , { 66, 27} , { 47, 57} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 37, -33} , { 39, -36} , { 40, -37} , { 38, -30} , { 46, -33} , { 42, -30} , { 40, -24} , { 49, -29} , { 38, -12} , { 40, -10} , { 38, -3} , { 46, -5} , { 31, 20} , { 29, 30} , { 25, 44} }, + { { 12, 48} , { 11, 49} , { 26, 45} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 22, 22} , { 23, 22} , { 27, 21} , { 33, 20} , { 26, 28} , { 30, 24} , { 27, 34} , { 18, 42} , { 25, 39} , { 18, 50} , { 12, 70} , { 21, 54} , { 14, 71} , { 11, 83} } + }, + //----- model 2 ----- + { + { { 17, -13} , { 16, -9} , { 17, -12} , { 27, -21} , { 37, -30} , { 41, -40} , { 42, -41} , { 48, -47} , { 39, -32} , { 46, -40} , { 52, -51} , { 46, -41} , { 52, -39} , { 43, -19} , { 32, 11} }, + { CTX_UNUSED , { 61, -55} , { 56, -46} , { 62, -50} , { 81, -67} , { 45, -20} , { 35, -2} , { 28, 15} , { 34, 1} , { 39, 1} , { 30, 17} , { 20, 38} , { 18, 45} , { 15, 54} , { 0, 79} }, + { { 9, -2} , { 30, -10} , { 31, -4} , { 33, -1} , { 33, 7} , { 31, 12} , { 37, 23} , { 31, 38} , { 20, 64} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + // { { 17, 27} , { 23, 13} , { 24, 16} , { 22, 25} , { 23, 27} , { 23, 32} , { 17, 43} , { 17, 49} , { 2, 70} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { { 36, -16} , { 37, -14} , { 37, -17} , { 32, 1} , { 34, 15} , { 29, 15} , { 24, 25} , { 34, 22} , { 31, 16} , { 35, 18} , { 31, 28} , { 33, 41} , { 36, 28} , { 27, 47} , { 21, 62} }, + { { 18, 31} , { 19, 26} , { 36, 24} , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED , CTX_UNUSED }, + { CTX_UNUSED , { 24, 23} , { 27, 16} , { 24, 30} , { 31, 29} , { 22, 41} , { 22, 42} , { 16, 60} , { 15, 52} , { 14, 60} , { 3, 78} , { -16, 123} , { 21, 53} , { 22, 56} , { 25, 61} } + } + }; + + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/defines.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/defines.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/defines.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,165 ---- + + /*! + ************************************************************************** + * \file defines.h + * + * \brief + * Headerfile containing some useful global definitions + * + * \author + * Detlev Marpe + * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved. + * + * \date + * 21. March 2001 + ************************************************************************** + */ + + #ifndef _DEFINES_H_ + #define _DEFINES_H_ + + #if defined _DEBUG + #define TRACE 0 //!< 0:Trace off 1:Trace on 2:detailed CABAC context information + #else + #define TRACE 0 //!< 0:Trace off 1:Trace on 2:detailed CABAC context information + #endif + + //#define PAIR_FIELDS_IN_OUTPUT + + //#define MAX_NUM_SLICES 150 + #define MAX_NUM_SLICES 50 + + //FREXT Profile IDC definitions + #define FREXT_HP 100 //!< YUV 4:2:0/8 "High" + #define FREXT_Hi10P 110 //!< YUV 4:2:0/10 "High 10" + #define FREXT_Hi422 122 //!< YUV 4:2:2/10 "High 4:2:2" + #define FREXT_Hi444 144 //!< YUV 4:4:4/12 "High 4:4:4" + + #define YUV400 0 + #define YUV420 1 + #define YUV422 2 + #define YUV444 3 + + + #define ZEROSNR 0 + + // CAVLC + #define LUMA 0 + #define LUMA_INTRA16x16DC 1 + #define LUMA_INTRA16x16AC 2 + + #define TOTRUN_NUM 15 + #define RUNBEFORE_NUM 7 + + + //--- block types for CABAC ---- + #define LUMA_16DC 0 + #define LUMA_16AC 1 + #define LUMA_8x8 2 + #define LUMA_8x4 3 + #define LUMA_4x8 4 + #define LUMA_4x4 5 + #define CHROMA_DC 6 + #define CHROMA_AC 7 + #define CHROMA_DC_2x4 8 + #define CHROMA_DC_4x4 9 + #define NUM_BLOCK_TYPES 10 + + + #define MAX_CODED_FRAME_SIZE 8000000 //!< bytes for one frame + + //#define _LEAKYBUCKET_ + + #define absm(A) ((A)<(0) ? (-(A)):(A)) //!< abs macro, faster than procedure + + #define Clip1(a) ((a)>img->max_imgpel_value?img->max_imgpel_value:((a)<0?0:(a))) + #define Clip1_Chr(a) ((a)>img->max_imgpel_value_uv?img->max_imgpel_value_uv:((a)<0?0:(a))) + #define Clip3(min,max,val) (((val)<(min))?(min):(((val)>(max))?(max):(val))) + + #define P8x8 8 + #define I4MB 9 + #define I16MB 10 + #define IBLOCK 11 + #define SI4MB 12 + #define I8MB 13 + #define IPCM 14 + #define MAXMODE 15 + + #define IS_INTRA(MB) ((MB)->mb_type==I4MB || (MB)->mb_type==I16MB ||(MB)->mb_type==IPCM || (MB)->mb_type==I8MB || (MB)->mb_type==SI4MB) + #define IS_NEWINTRA(MB) ((MB)->mb_type==I16MB || (MB)->mb_type==IPCM) + #define IS_OLDINTRA(MB) ((MB)->mb_type==I4MB) + + #define IS_INTER(MB) ((MB)->mb_type!=I4MB && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB && (MB)->mb_type!=IPCM) + #define IS_INTERMV(MB) ((MB)->mb_type!=I4MB && (MB)->mb_type!=I16MB && (MB)->mb_type!=I8MB && (MB)->mb_type!=0 && (MB)->mb_type!=IPCM) + #define IS_DIRECT(MB) ((MB)->mb_type==0 && (img->type==B_SLICE )) + #define IS_COPY(MB) ((MB)->mb_type==0 && (img->type==P_SLICE || img->type==SP_SLICE)) + #define IS_P8x8(MB) ((MB)->mb_type==P8x8) + + + // Quantization parameter range + + #define MIN_QP 0 + #define MAX_QP 51 + + #define BLOCK_SIZE 4 + #define MB_BLOCK_SIZE 16 + + + #define NO_INTRA_PMODE 9 //!< #intra prediction modes + /* 4x4 intra prediction modes */ + #define VERT_PRED 0 + #define HOR_PRED 1 + #define DC_PRED 2 + #define DIAG_DOWN_LEFT_PRED 3 + #define DIAG_DOWN_RIGHT_PRED 4 + #define VERT_RIGHT_PRED 5 + #define HOR_DOWN_PRED 6 + #define VERT_LEFT_PRED 7 + #define HOR_UP_PRED 8 + + // 16x16 intra prediction modes + #define VERT_PRED_16 0 + #define HOR_PRED_16 1 + #define DC_PRED_16 2 + #define PLANE_16 3 + + // 8x8 chroma intra prediction modes + #define DC_PRED_8 0 + #define HOR_PRED_8 1 + #define VERT_PRED_8 2 + #define PLANE_8 3 + + #define EOS 1 //!< End Of Sequence + #define SOP 2 //!< Start Of Picture + #define SOS 3 //!< Start Of Slice + + #define DECODING_OK 0 + #define SEARCH_SYNC 1 + #define PICTURE_DECODED 2 + + #define MAX_REFERENCE_PICTURES 32 //!< H264 allows 32 fields + + #define INVALIDINDEX (-135792468) + + #if !defined(WIN32) || defined(__GNUC__) + #define max(a, b) ((a) > (b) ? (a) : (b)) //!< Macro returning max value + #define min(a, b) ((a) < (b) ? (a) : (b)) //!< Macro returning min value + #endif + + + #define MVPRED_MEDIAN 0 + #define MVPRED_L 1 + #define MVPRED_U 2 + #define MVPRED_UR 3 + + #define DECODE_COPY_MB 0 + #define DECODE_MB 1 + //#define DECODE_MB_BFRAME 2 + + #define BLOCK_MULTIPLE (MB_BLOCK_SIZE/BLOCK_SIZE) + + //Start code and Emulation Prevention need this to be defined in identical manner at encoder and decoder + #define ZEROBYTES_SHORTSTARTCODE 2 //indicates the number of zero bytes in the short start-code prefix + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/elements.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/elements.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/elements.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,107 ---- + + /*! + ************************************************************************************* + * \file elements.h + * + * \brief + * Header file for elements in H.264 streams + * + * \date + * 6.10.2000 + * + * \version + * 1.0 + * + * \author + * Sebastian Purreiter \n + * Siemens AG, Information and Communication Mobile \n + * P.O.Box 80 17 07 \n + * D-81617 Munich, Germany \n + ************************************************************************************* + */ + + #ifndef _ELEMENTS_H_ + #define _ELEMENTS_H_ + + /*! + * definition of H.264 syntax elements + * order of elements follow dependencies for picture reconstruction + */ + /*! + * \brief Assignment of old TYPE partition elements to new + * elements + * + * old element | new elements + * ----------------+------------------------------------------------------------------- + * TYPE_HEADER | SE_HEADER, SE_PTYPE + * TYPE_MBHEADER | SE_MBTYPE, SE_REFFRAME, SE_INTRAPREDMODE + * TYPE_MVD | SE_MVD + * TYPE_CBP | SE_CBP_INTRA, SE_CBP_INTER + * SE_DELTA_QUANT_INTER + * SE_DELTA_QUANT_INTRA + * TYPE_COEFF_Y | SE_LUM_DC_INTRA, SE_LUM_AC_INTRA, SE_LUM_DC_INTER, SE_LUM_AC_INTER + * TYPE_2x2DC | SE_CHR_DC_INTRA, SE_CHR_DC_INTER + * TYPE_COEFF_C | SE_CHR_AC_INTRA, SE_CHR_AC_INTER + * TYPE_EOS | SE_EOS + */ + + #define SE_HEADER 0 + #define SE_PTYPE 1 + #define SE_MBTYPE 2 + #define SE_REFFRAME 3 + #define SE_INTRAPREDMODE 4 + #define SE_MVD 5 + #define SE_CBP_INTRA 6 + #define SE_LUM_DC_INTRA 7 + #define SE_CHR_DC_INTRA 8 + #define SE_LUM_AC_INTRA 9 + #define SE_CHR_AC_INTRA 10 + #define SE_CBP_INTER 11 + #define SE_LUM_DC_INTER 12 + #define SE_CHR_DC_INTER 13 + #define SE_LUM_AC_INTER 14 + #define SE_CHR_AC_INTER 15 + #define SE_DELTA_QUANT_INTER 16 + #define SE_DELTA_QUANT_INTRA 17 + #define SE_BFRAME 18 + #define SE_EOS 19 + #define SE_MAX_ELEMENTS 20 + + + #define NO_EC 0 //!< no error concealment necessary + #define EC_REQ 1 //!< error concealment required + #define EC_SYNC 2 //!< search and sync on next header element + + #define MAXPARTITIONMODES 2 //!< maximum possible partition modes as defined in assignSE2partition[][] + + /*! + * \brief lookup-table to assign different elements to partition + * + * \note here we defined up to 6 different partitions similar to + * document Q15-k-18 described in the PROGFRAMEMODE. + * The Sliceheader contains the PSYNC information. \par + * + * Elements inside a partition are not ordered. They are + * ordered by occurence in the stream. + * Assumption: Only partitionlosses are considered. \par + * + * The texture elements luminance and chrominance are + * not ordered in the progressive form + * This may be changed in image.c \par + * + * We also defined the proposed internet partition mode + * of Stephan Wenger here. To select the desired mode + * uncomment one of the two following lines. \par + * + * -IMPORTANT: + * Picture- or Sliceheaders must be assigned to partition 0. \par + * Furthermore partitions must follow syntax dependencies as + * outlined in document Q15-J-23. + */ + + + extern int assignSE2partition[][SE_MAX_ELEMENTS]; + extern int PartitionMode; + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/erc_api.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/erc_api.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/erc_api.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,382 ---- + + /*! + ************************************************************************************* + * \file erc_api.c + * + * \brief + * External (still inside video decoder) interface for error concealment module + * + * \author + * - Ari Hourunranta + * - Viktor Varsa + * - Ye-Kui Wang + * + ************************************************************************************* + */ + + + #include + + #include "global.h" + #include "memalloc.h" + #include "erc_api.h" + + objectBuffer_t *erc_object_list = NULL; + ercVariables_t *erc_errorVar = NULL; + frame erc_recfr; + int erc_mvperMB; + + /*! + ************************************************************************ + * \brief + * Initinize the error concealment module + ************************************************************************ + */ + void ercInit(int pic_sizex, int pic_sizey, int flag) + { + ercClose(erc_errorVar); + erc_object_list = (objectBuffer_t *) calloc((pic_sizex * pic_sizey) >> 6, sizeof(objectBuffer_t)); + if (erc_object_list == NULL) no_mem_exit("ercInit: erc_object_list"); + + /* the error concelament instance is allocated */ + erc_errorVar = ercOpen(); + + /* set error concealment ON */ + ercSetErrorConcealment(erc_errorVar, flag); + } + + /*! + ************************************************************************ + * \brief + * Allocates data structures used in error concealment. + *\return + * The allocated ercVariables_t is returned. + ************************************************************************ + */ + ercVariables_t *ercOpen( void ) + { + ercVariables_t *errorVar = NULL; + + errorVar = (ercVariables_t *)malloc( sizeof(ercVariables_t)); + if ( errorVar == NULL ) no_mem_exit("ercOpen: errorVar"); + + errorVar->nOfMBs = 0; + errorVar->segments = NULL; + errorVar->currSegment = 0; + errorVar->yCondition = NULL; + errorVar->uCondition = NULL; + errorVar->vCondition = NULL; + errorVar->prevFrameYCondition = NULL; + + errorVar->concealment = 1; + + return errorVar; + } + + /*! + ************************************************************************ + * \brief + * Resets the variables used in error detection. + * Should be called always when starting to decode a new frame. + * \param errorVar + * Variables for error concealment + * \param nOfMBs + * Number of macroblocks in a frame + * \param numOfSegments + * Estimated number of segments (memory reserved) + * \param picSizeX + * Width of the frame in pixels. + ************************************************************************ + */ + void ercReset( ercVariables_t *errorVar, int nOfMBs, int numOfSegments, int32 picSizeX ) + { + int *tmp = NULL; + int i = 0; + + if ( errorVar && errorVar->concealment ) + { + /* If frame size has been changed */ + if ( nOfMBs != errorVar->nOfMBs && errorVar->yCondition != NULL ) + { + free( errorVar->yCondition ); + errorVar->yCondition = NULL; + free( errorVar->prevFrameYCondition ); + errorVar->prevFrameYCondition = NULL; + free( errorVar->uCondition ); + errorVar->uCondition = NULL; + free( errorVar->vCondition ); + errorVar->vCondition = NULL; + free( errorVar->segments ); + errorVar->segments = NULL; + } + + /* If the structures are uninitialized (first frame, or frame size is chaned) */ + if ( errorVar->yCondition == NULL ) + { + errorVar->segments = (ercSegment_t *)malloc( numOfSegments*sizeof(ercSegment_t) ); + if ( errorVar->segments == NULL ) no_mem_exit("ercReset: errorVar->segments"); + memset( errorVar->segments, 0, numOfSegments*sizeof(ercSegment_t)); + errorVar->nOfSegments = numOfSegments; + + errorVar->yCondition = (int *)malloc( 4*nOfMBs*sizeof(int) ); + if ( errorVar->yCondition == NULL ) no_mem_exit("ercReset: errorVar->yCondition"); + errorVar->prevFrameYCondition = (int *)malloc( 4*nOfMBs*sizeof(int) ); + if ( errorVar->prevFrameYCondition == NULL ) no_mem_exit("ercReset: errorVar->prevFrameYCondition"); + errorVar->uCondition = (int *)malloc( nOfMBs*sizeof(int) ); + if ( errorVar->uCondition == NULL ) no_mem_exit("ercReset: errorVar->uCondition"); + errorVar->vCondition = (int *)malloc( nOfMBs*sizeof(int) ); + if ( errorVar->vCondition == NULL ) no_mem_exit("ercReset: errorVar->vCondition"); + errorVar->nOfMBs = nOfMBs; + } + else + { + /* Store the yCondition struct of the previous frame */ + tmp = errorVar->prevFrameYCondition; + errorVar->prevFrameYCondition = errorVar->yCondition; + errorVar->yCondition = tmp; + } + + /* Reset tables and parameters */ + memset( errorVar->yCondition, 0, 4*nOfMBs*sizeof(*errorVar->yCondition)); + memset( errorVar->uCondition, 0, nOfMBs*sizeof(*errorVar->uCondition)); + memset( errorVar->vCondition, 0, nOfMBs*sizeof(*errorVar->vCondition)); + + if (errorVar->nOfSegments != numOfSegments) + { + free( errorVar->segments ); + errorVar->segments = NULL; + errorVar->segments = (ercSegment_t *)malloc( numOfSegments*sizeof(ercSegment_t) ); + if ( errorVar->segments == NULL ) no_mem_exit("ercReset: errorVar->segments"); + errorVar->nOfSegments = numOfSegments; + } + + memset( errorVar->segments, 0, errorVar->nOfSegments*sizeof(ercSegment_t)); + + for ( i = 0; i < errorVar->nOfSegments; i++ ) + { + errorVar->segments[i].fCorrupted = 1; //! mark segments as corrupted + errorVar->segments[i].startMBPos = 0; + errorVar->segments[i].endMBPos = nOfMBs - 1; + } + + errorVar->currSegment = 0; + errorVar->nOfCorruptedSegments = 0; + } + } + + /*! + ************************************************************************ + * \brief + * Resets the variables used in error detection. + * Should be called always when starting to decode a new frame. + * \param errorVar + * Variables for error concealment + ************************************************************************ + */ + void ercClose( ercVariables_t *errorVar ) + { + if ( errorVar != NULL ) + { + if (errorVar->yCondition != NULL) + { + free( errorVar->segments ); + free( errorVar->yCondition ); + free( errorVar->uCondition ); + free( errorVar->vCondition ); + free( errorVar->prevFrameYCondition ); + } + free( errorVar ); + errorVar = NULL; + } + + if (erc_object_list) + { + free(erc_object_list); + erc_object_list=NULL; + } + } + + /*! + ************************************************************************ + * \brief + * Sets error concealment ON/OFF. Can be invoked only between frames, not during a frame + * \param errorVar + * Variables for error concealment + * \param value + * New value + ************************************************************************ + */ + void ercSetErrorConcealment( ercVariables_t *errorVar, int value ) + { + if ( errorVar != NULL ) + errorVar->concealment = value; + } + + /*! + ************************************************************************ + * \brief + * Creates a new segment in the segment-list, and marks the start MB and bit position. + * If the end of the previous segment was not explicitly marked by "ercStopSegment", + * also marks the end of the previous segment. + * If needed, it reallocates the segment-list for a larger storage place. + * \param currMBNum + * The MB number where the new slice/segment starts + * \param segment + * Segment/Slice No. counted by the caller + * \param bitPos + * Bitstream pointer: number of bits read from the buffer. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ + void ercStartSegment( int currMBNum, int segment, u_int32 bitPos, ercVariables_t *errorVar ) + { + if ( errorVar && errorVar->concealment ) + { + errorVar->currSegmentCorrupted = 0; + + errorVar->segments[ segment ].fCorrupted = 0; + errorVar->segments[ segment ].startMBPos = currMBNum; + + } + } + + /*! + ************************************************************************ + * \brief + * Marks the end position of a segment. + * \param currMBNum + * The last MB number of the previous segment + * \param segment + * Segment/Slice No. counted by the caller + * If (segment<0) the internal segment counter is used. + * \param bitPos + * Bitstream pointer: number of bits read from the buffer. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ + void ercStopSegment( int currMBNum, int segment, u_int32 bitPos, ercVariables_t *errorVar ) + { + if ( errorVar && errorVar->concealment ) + { + errorVar->segments[ segment ].endMBPos = currMBNum; //! Changed TO 12.11.2001 + errorVar->currSegment++; + } + } + + /*! + ************************************************************************ + * \brief + * Marks the current segment (the one which has the "currMBNum" MB in it) + * as lost: all the blocks of the MBs in the segment as corrupted. + * \param picSizeX + * Width of the frame in pixels. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ + void ercMarkCurrSegmentLost(int32 picSizeX, ercVariables_t *errorVar ) + { + int j = 0; + int current_segment; + + current_segment = errorVar->currSegment-1; + if ( errorVar && errorVar->concealment ) + { + if (errorVar->currSegmentCorrupted == 0) + { + errorVar->nOfCorruptedSegments++; + errorVar->currSegmentCorrupted = 1; + } + + for ( j = errorVar->segments[current_segment].startMBPos; j <= errorVar->segments[current_segment].endMBPos; j++ ) + { + errorVar->yCondition[MBNum2YBlock (j, 0, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->yCondition[MBNum2YBlock (j, 1, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->yCondition[MBNum2YBlock (j, 2, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->yCondition[MBNum2YBlock (j, 3, picSizeX)] = ERC_BLOCK_CORRUPTED; + errorVar->uCondition[j] = ERC_BLOCK_CORRUPTED; + errorVar->vCondition[j] = ERC_BLOCK_CORRUPTED; + } + errorVar->segments[current_segment].fCorrupted = 1; + } + } + + /*! + ************************************************************************ + * \brief + * Marks the current segment (the one which has the "currMBNum" MB in it) + * as OK: all the blocks of the MBs in the segment as OK. + * \param picSizeX + * Width of the frame in pixels. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ + void ercMarkCurrSegmentOK(int32 picSizeX, ercVariables_t *errorVar ) + { + int j = 0; + int current_segment; + + current_segment = errorVar->currSegment-1; + if ( errorVar && errorVar->concealment ) + { + // mark all the Blocks belonging to the segment as OK */ + for ( j = errorVar->segments[current_segment].startMBPos; j <= errorVar->segments[current_segment].endMBPos; j++ ) + { + errorVar->yCondition[MBNum2YBlock (j, 0, picSizeX)] = ERC_BLOCK_OK; + errorVar->yCondition[MBNum2YBlock (j, 1, picSizeX)] = ERC_BLOCK_OK; + errorVar->yCondition[MBNum2YBlock (j, 2, picSizeX)] = ERC_BLOCK_OK; + errorVar->yCondition[MBNum2YBlock (j, 3, picSizeX)] = ERC_BLOCK_OK; + errorVar->uCondition[j] = ERC_BLOCK_OK; + errorVar->vCondition[j] = ERC_BLOCK_OK; + } + errorVar->segments[current_segment].fCorrupted = 0; + } + } + + /*! + ************************************************************************ + * \brief + * Marks the Blocks of the given component (YUV) of the current MB as concealed. + * \param currMBNum + * Selects the segment where this MB number is in. + * \param comp + * Component to mark (0:Y, 1:U, 2:V, <0:All) + * \param picSizeX + * Width of the frame in pixels. + * \param errorVar + * Variables for error detector + ************************************************************************ + */ + void ercMarkCurrMBConcealed( int currMBNum, int comp, int32 picSizeX, ercVariables_t *errorVar ) + { + int setAll = 0; + + if ( errorVar && errorVar->concealment ) + { + if (comp < 0) + { + setAll = 1; + comp = 0; + } + + switch (comp) + { + case 0: + errorVar->yCondition[MBNum2YBlock (currMBNum, 0, picSizeX)] = ERC_BLOCK_CONCEALED; + errorVar->yCondition[MBNum2YBlock (currMBNum, 1, picSizeX)] = ERC_BLOCK_CONCEALED; + errorVar->yCondition[MBNum2YBlock (currMBNum, 2, picSizeX)] = ERC_BLOCK_CONCEALED; + errorVar->yCondition[MBNum2YBlock (currMBNum, 3, picSizeX)] = ERC_BLOCK_CONCEALED; + if (!setAll) + break; + case 1: + errorVar->uCondition[currMBNum] = ERC_BLOCK_CONCEALED; + if (!setAll) + break; + case 2: + errorVar->vCondition[currMBNum] = ERC_BLOCK_CONCEALED; + } + } + } Index: llvm-test/MultiSource/Applications/JM/ldecod/erc_api.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/erc_api.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/erc_api.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,169 ---- + + /*! + ************************************************************************ + * \file erc_api.h + * + * \brief + * External (still inside video decoder) interface for error concealment module + * + * \author + * - Ari Hourunranta + * - Ye-Kui Wang + * - Jill Boyce + * - Saurav K Bandyopadhyay + * - Zhenyu Wu + * + * ************************************************************************ + */ + + + #ifndef _ERC_API_H_ + #define _ERC_API_H_ + + #include "erc_globals.h" + + /* + * Defines + */ + + /* If the average motion vector of the correctly received macroblocks is less than the + threshold, concealByCopy is used, otherwise concealByTrial is used. */ + #define MVPERMB_THR 8 + + /* used to determine the size of the allocated memory for a temporal Region (MB) */ + #define DEF_REGION_SIZE 384 /* 8*8*6 */ + + #define ERC_BLOCK_OK 3 + #define ERC_BLOCK_CONCEALED 2 + #define ERC_BLOCK_CORRUPTED 1 + #define ERC_BLOCK_EMPTY 0 + + #define mabs(a) ( (a) < 0 ? -(a) : (a) ) + #define mmax(a,b) ((a) > (b) ? (a) : (b)) + #define mmin(a,b) ((a) < (b) ? (a) : (b)) + + /* + * Functions to convert MBNum representation to blockNum + */ + + #define xPosYBlock(currYBlockNum,picSizeX) \ + ((currYBlockNum)%((picSizeX)>>3)) + + #define yPosYBlock(currYBlockNum,picSizeX) \ + ((currYBlockNum)/((picSizeX)>>3)) + + #define xPosMB(currMBNum,picSizeX) \ + ((currMBNum)%((picSizeX)>>4)) + + #define yPosMB(currMBNum,picSizeX) \ + ((currMBNum)/((picSizeX)>>4)) + + #define MBxy2YBlock(currXPos,currYPos,comp,picSizeX) \ + ((((currYPos)<<1)+((comp)>>1))*((picSizeX)>>3)+((currXPos)<<1)+((comp)&1)) + + #define MBNum2YBlock(currMBNum,comp,picSizeX) \ + MBxy2YBlock(xPosMB((currMBNum),(picSizeX)),yPosMB((currMBNum),(picSizeX)),(comp),(picSizeX)) + + + /* + * typedefs + */ + + /* segment data structure */ + typedef struct ercSegment_s + { + int startMBPos; + int endMBPos; + int fCorrupted; + } ercSegment_t; + + /* Error detector & concealment instance data structure */ + typedef struct ercVariables_s + { + /* Number of macroblocks (size or size/4 of the arrays) */ + int nOfMBs; + /* Number of segments (slices) in frame */ + int nOfSegments; + + /* Array for conditions of Y blocks */ + int *yCondition; + /* Array for conditions of U blocks */ + int *uCondition; + /* Array for conditions of V blocks */ + int *vCondition; + + /* Array for Slice level information */ + ercSegment_t *segments; + int currSegment; + + /* Conditions of the MBs of the previous frame */ + int *prevFrameYCondition; + + /* Flag telling if the current segment was found to be corrupted */ + int currSegmentCorrupted; + /* Counter for corrupted segments per picture */ + int nOfCorruptedSegments; + + /* State variables for error detector and concealer */ + int concealment; + + } ercVariables_t; + + /* + * External function interface + */ + + void ercInit(int pic_sizex, int pic_sizey, int flag); + ercVariables_t *ercOpen( void ); + void ercReset( ercVariables_t *errorVar, int nOfMBs, int numOfSegments, int32 picSizeX ); + void ercClose( ercVariables_t *errorVar ); + void ercSetErrorConcealment( ercVariables_t *errorVar, int value ); + + void ercStartSegment( int currMBNum, int segment, u_int32 bitPos, ercVariables_t *errorVar ); + void ercStopSegment( int currMBNum, int segment, u_int32 bitPos, ercVariables_t *errorVar ); + void ercMarkCurrSegmentLost(int32 picSizeX, ercVariables_t *errorVar ); + void ercMarkCurrSegmentOK(int32 picSizeX, ercVariables_t *errorVar ); + void ercMarkCurrMBConcealed( int currMBNum, int comp, int32 picSizeX, ercVariables_t *errorVar ); + + int ercConcealIntraFrame( frame *recfr, int32 picSizeX, int32 picSizeY, ercVariables_t *errorVar ); + int ercConcealInterFrame( frame *recfr, objectBuffer_t *object_list, + int32 picSizeX, int32 picSizeY, ercVariables_t *errorVar, int chroma_format_idc ); + + + /* Thomson APIs for concealing entire frame loss */ + // This code reflects JVT-P072 + + #include "mbuffer.h" + #include "output.h" + + struct concealment_node { + StorablePicture* picture; + int missingpocs; + struct concealment_node *next; + }; + + struct concealment_node * init_node(StorablePicture* , int ); + void print_node( struct concealment_node * ); + void print_list( struct concealment_node * ); + void add_node( struct concealment_node * ); + void delete_node( struct concealment_node * ); + void init_lists_for_non_reference_loss(int , PictureStructure ); + + void conceal_non_ref_pics(int diff); + void conceal_lost_frames(ImageParameters *img); + + void sliding_window_poc_management(StorablePicture *p); + + void write_lost_non_ref_pic(int poc, int p_out); + void write_lost_ref_after_idr(int pos); + + FrameStore *last_out_fs; + int pocs_in_dpb[100]; + int comp(const void *, const void *); + // JVT-P072 ends + + + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/erc_do.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/erc_do.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/erc_do.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,44 ---- + + /*! + ************************************************************************ + * \file erc_do.h + * + * \brief + * Header for the I & P frame error concealment common functions + * + * \author + * - Viktor Varsa + * - Ye-Kui Wang + * + ************************************************************************ + */ + + #ifndef _ERC_DO_H_ + #define _ERC_DO_H_ + + + #include "erc_api.h" + + void ercPixConcealIMB(imgpel *currFrame, int row, int column, int predBlocks[], int frameWidth, int mbWidthInBlocks); + + int ercCollect8PredBlocks( int predBlocks[], int currRow, int currColumn, int *condition, + int maxRow, int maxColumn, int step, byte fNoCornerNeigh ); + int ercCollectColumnBlocks( int predBlocks[], int currRow, int currColumn, int *condition, int maxRow, int maxColumn, int step ); + + #define isSplitted(object_list,currMBNum) \ + ((object_list+((currMBNum)<<2))->regionMode >= REGMODE_SPLITTED) + + /* this can be used as isBlock(...,INTRA) or isBlock(...,INTER_COPY) */ + #define isBlock(object_list,currMBNum,comp,regMode) \ + (isSplitted(object_list,currMBNum) ? \ + ((object_list+((currMBNum)<<2)+(comp))->regionMode == REGMODE_##regMode##_8x8) : \ + ((object_list+((currMBNum)<<2))->regionMode == REGMODE_##regMode)) + + /* this can be used as getParam(...,mv) or getParam(...,xMin) or getParam(...,yMin) */ + #define getParam(object_list,currMBNum,comp,param) \ + (isSplitted(object_list,currMBNum) ? \ + ((object_list+((currMBNum)<<2)+(comp))->param) : \ + ((object_list+((currMBNum)<<2))->param)) + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/erc_do_i.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/erc_do_i.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/erc_do_i.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,541 ---- + + /*! + ************************************************************************************* + * \file + * erc_do_i.c + * + * \brief + * Intra (I) frame error concealment algorithms for decoder + * + * \author + * - Ari Hourunranta + * - Viktor Varsa + * - Ye-Kui Wang + * + ************************************************************************************* + */ + + #include + #include "global.h" + #include "erc_do.h" + + static void concealBlocks( int lastColumn, int lastRow, int comp, frame *recfr, int32 picSizeX, int *condition ); + static void pixMeanInterpolateBlock( imgpel *src[], imgpel *block, int blockSize, int frameWidth ); + + /*! + ************************************************************************ + * \brief + * The main function for Intra frame concealment. + * Calls "concealBlocks" for each color component (Y,U,V) seperately + * \return + * 0, if the concealment was not successful and simple concealment should be used + * 1, otherwise (even if none of the blocks were concealed) + * \param recfr + * Reconstructed frame buffer + * \param picSizeX + * Width of the frame in pixels + * \param picSizeY + * Height of the frame in pixels + * \param errorVar + * Variables for error concealment + ************************************************************************ + */ + int ercConcealIntraFrame( frame *recfr, int32 picSizeX, int32 picSizeY, ercVariables_t *errorVar ) + { + int lastColumn = 0, lastRow = 0; + + /* if concealment is on */ + if ( errorVar && errorVar->concealment ) + { + /* if there are segments to be concealed */ + if ( errorVar->nOfCorruptedSegments ) + { + /* Y */ + lastRow = (int) (picSizeY>>3); + lastColumn = (int) (picSizeX>>3); + concealBlocks( lastColumn, lastRow, 0, recfr, picSizeX, errorVar->yCondition ); + + /* U (dimensions halved compared to Y) */ + lastRow = (int) (picSizeY>>4); + lastColumn = (int) (picSizeX>>4); + concealBlocks( lastColumn, lastRow, 1, recfr, picSizeX, errorVar->uCondition ); + + /* V ( dimensions equal to U ) */ + concealBlocks( lastColumn, lastRow, 2, recfr, picSizeX, errorVar->vCondition ); + } + return 1; + } + else + return 0; + } + + /*! + ************************************************************************ + * \brief + * Conceals the MB at position (row, column) using pixels from predBlocks[] + * using pixMeanInterpolateBlock() + * \param currFrame + * current frame + * \param row + * y coordinate in blocks + * \param column + * x coordinate in blocks + * \param predBlocks[] + * list of neighboring source blocks (numbering 0 to 7, 1 means: use the neighbor) + * \param frameWidth + * width of frame in pixels + * \param mbWidthInBlocks + * 2 for Y, 1 for U/V components + ************************************************************************ + */ + void ercPixConcealIMB(imgpel *currFrame, int row, int column, int predBlocks[], int frameWidth, int mbWidthInBlocks) + { + imgpel *src[8]={NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}; + imgpel *currBlock = NULL; + + /* collect the reliable neighboring blocks */ + if (predBlocks[0]) + src[0] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + (column+mbWidthInBlocks)*8; + if (predBlocks[1]) + src[1] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + (column-mbWidthInBlocks)*8; + if (predBlocks[2]) + src[2] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + (column-mbWidthInBlocks)*8; + if (predBlocks[3]) + src[3] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + (column+mbWidthInBlocks)*8; + if (predBlocks[4]) + src[4] = currFrame + (row-mbWidthInBlocks)*frameWidth*8 + column*8; + if (predBlocks[5]) + src[5] = currFrame + row*frameWidth*8 + (column-mbWidthInBlocks)*8; + if (predBlocks[6]) + src[6] = currFrame + (row+mbWidthInBlocks)*frameWidth*8 + column*8; + if (predBlocks[7]) + src[7] = currFrame + row*frameWidth*8 + (column+mbWidthInBlocks)*8; + + currBlock = currFrame + row*frameWidth*8 + column*8; + pixMeanInterpolateBlock( src, currBlock, mbWidthInBlocks*8, frameWidth ); + + } + + /*! + ************************************************************************ + * \brief + * This function checks the neighbours of a Macroblock for usability in + * concealment. First the OK Macroblocks are marked, and if there is not + * enough of them, then the CONCEALED ones as well. + * A "1" in the the output array means reliable, a "0" non reliable MB. + * The block order in "predBlocks": + * 1 4 0 + * 5 x 7 + * 2 6 3 + * i.e., corners first. + * \return + * Number of useable neighbour Macroblocks for concealment. + * \param predBlocks[] + * Array for indicating the valid neighbor blocks + * \param currRow + * Current block row in the frame + * \param currColumn + * Current block column in the frame + * \param condition + * The block condition (ok, lost) table + * \param maxRow + * Number of block rows in the frame + * \param maxColumn + * Number of block columns in the frame + * \param step + * Number of blocks belonging to a MB, when counting + * in vertical/horizontal direction. (Y:2 U,V:1) + * \param fNoCornerNeigh + * No corner neighbours are considered + ************************************************************************ + */ + int ercCollect8PredBlocks( int predBlocks[], int currRow, int currColumn, int *condition, + int maxRow, int maxColumn, int step, byte fNoCornerNeigh ) + { + int srcCounter = 0, srcCountMin = (fNoCornerNeigh ? 2 : 4), + threshold = ERC_BLOCK_CONCEALED; + + memset( predBlocks, 0, 8*sizeof(int) ); + + /* collect the reliable neighboring blocks */ + do + { + srcCounter = 0; + /* Top */ + if (currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn ] >= threshold ) + { //ERC_BLOCK_OK (3) or ERC_BLOCK_CONCEALED (2) + predBlocks[4] = condition[ (currRow-1)*maxColumn + currColumn ]; + srcCounter++; + } + /* Bottom */ + if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn ] >= threshold ) + { + predBlocks[6] = condition[ (currRow+step)*maxColumn + currColumn ]; + srcCounter++; + } + + if ( currColumn > 0 ) + { + /* Left */ + if ( condition[ currRow*maxColumn + currColumn - 1 ] >= threshold ) + { + predBlocks[5] = condition[ currRow*maxColumn + currColumn - 1 ]; + srcCounter++; + } + + if ( !fNoCornerNeigh ) + { + /* Top-Left */ + if ( currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn - 1 ] >= threshold ) + { + predBlocks[1] = condition[ (currRow-1)*maxColumn + currColumn - 1 ]; + srcCounter++; + } + /* Bottom-Left */ + if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn - 1 ] >= threshold ) + { + predBlocks[2] = condition[ (currRow+step)*maxColumn + currColumn - 1 ]; + srcCounter++; + } + } + } + + if ( currColumn < (maxColumn-step) ) + { + /* Right */ + if ( condition[ currRow*maxColumn+currColumn + step ] >= threshold ) + { + predBlocks[7] = condition[ currRow*maxColumn+currColumn + step ]; + srcCounter++; + } + + if ( !fNoCornerNeigh ) + { + /* Top-Right */ + if ( currRow > 0 && condition[ (currRow-1)*maxColumn + currColumn + step ] >= threshold ) + { + predBlocks[0] = condition[ (currRow-1)*maxColumn + currColumn + step ]; + srcCounter++; + } + /* Bottom-Right */ + if ( currRow < (maxRow-step) && condition[ (currRow+step)*maxColumn + currColumn + step ] >= threshold ) + { + predBlocks[3] = condition[ (currRow+step)*maxColumn + currColumn + step ]; + srcCounter++; + } + } + } + /* prepare for the next round */ + threshold--; + if (threshold < ERC_BLOCK_CONCEALED) + break; + } while ( srcCounter < srcCountMin); + + return srcCounter; + } + + /*! + ************************************************************************ + * \brief + * collects prediction blocks only from the current column + * \return + * Number of useable neighbour Macroblocks for concealment. + * \param predBlocks[] + * Array for indicating the valid neighbor blocks + * \param currRow + * Current block row in the frame + * \param currColumn + * Current block column in the frame + * \param condition + * The block condition (ok, lost) table + * \param maxRow + * Number of block rows in the frame + * \param maxColumn + * Number of block columns in the frame + * \param step + * Number of blocks belonging to a MB, when counting + * in vertical/horizontal direction. (Y:2 U,V:1) + ************************************************************************ + */ + int ercCollectColumnBlocks( int predBlocks[], int currRow, int currColumn, int *condition, int maxRow, int maxColumn, int step ) + { + int srcCounter = 0, threshold = ERC_BLOCK_CORRUPTED; + + memset( predBlocks, 0, 8*sizeof(int) ); + + /* in this case, row > 0 and row < 17 */ + if ( condition[ (currRow-1)*maxColumn + currColumn ] > threshold ) + { + predBlocks[4] = 1; + srcCounter++; + } + if ( condition[ (currRow+step)*maxColumn + currColumn ] > threshold ) + { + predBlocks[6] = 1; + srcCounter++; + } + + return srcCounter; + } + + /*! + ************************************************************************ + * \brief + * Core for the Intra blocks concealment. + * It is called for each color component (Y,U,V) seperately + * Finds the corrupted blocks and calls pixel interpolation functions + * to correct them, one block at a time. + * Scanning is done vertically and each corrupted column is corrected + * bi-directionally, i.e., first block, last block, first block+1, last block -1 ... + * \param lastColumn + * Number of block columns in the frame + * \param lastRow + * Number of block rows in the frame + * \param comp + * color component + * \param recfr + * Reconstructed frame buffer + * \param picSizeX + * Width of the frame in pixels + * \param condition + * The block condition (ok, lost) table + ************************************************************************ + */ + static void concealBlocks( int lastColumn, int lastRow, int comp, frame *recfr, int32 picSizeX, int *condition ) + { + int row, column, srcCounter = 0, thr = ERC_BLOCK_CORRUPTED, + lastCorruptedRow = -1, firstCorruptedRow = -1, currRow = 0, + areaHeight = 0, i = 0, smoothColumn = 0; + int predBlocks[8], step = 1; + + /* in the Y component do the concealment MB-wise (not block-wise): + this is useful if only whole MBs can be damaged or lost */ + if ( comp == 0 ) + step = 2; + else + step = 1; + + for ( column = 0; column < lastColumn; column += step ) + { + for ( row = 0; row < lastRow; row += step ) + { + if ( condition[row*lastColumn+column] <= thr ) + { + firstCorruptedRow = row; + /* find the last row which has corrupted blocks (in same continuous area) */ + for ( lastCorruptedRow = row+step; lastCorruptedRow < lastRow; lastCorruptedRow += step ) + { + /* check blocks in the current column */ + if ( condition[ lastCorruptedRow*lastColumn + column ] > thr ) + { + /* current one is already OK, so the last was the previous one */ + lastCorruptedRow -= step; + break; + } + } + if ( lastCorruptedRow >= lastRow ) + { + /* correct only from above */ + lastCorruptedRow = lastRow-step; + for ( currRow = firstCorruptedRow; currRow < lastRow; currRow += step ) + { + srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 ); + + switch( comp ) + { + case 0 : + ercPixConcealIMB( recfr->yptr, currRow, column, predBlocks, picSizeX, 2 ); + break; + case 1 : + ercPixConcealIMB( recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + case 2 : + ercPixConcealIMB( recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + } + + if ( comp == 0 ) + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED; + } + else + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + } + + } + row = lastRow; + } + else if ( firstCorruptedRow == 0 ) + { + /* correct only from below */ + for ( currRow = lastCorruptedRow; currRow >= 0; currRow -= step ) + { + srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 ); + + switch( comp ) + { + case 0 : + ercPixConcealIMB( recfr->yptr, currRow, column, predBlocks, picSizeX, 2 ); + break; + case 1 : + ercPixConcealIMB( recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + case 2 : + ercPixConcealIMB( recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + } + + if ( comp == 0 ) + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED; + } + else + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + } + + } + + row = lastCorruptedRow+step; + } + else + { + /* correct bi-directionally */ + + row = lastCorruptedRow+step; + areaHeight = lastCorruptedRow-firstCorruptedRow+step; + + /* + * Conceal the corrupted area switching between the up and the bottom rows + */ + for ( i = 0; i < areaHeight; i += step ) + { + if ( i % 2 ) + { + currRow = lastCorruptedRow; + lastCorruptedRow -= step; + } + else + { + currRow = firstCorruptedRow; + firstCorruptedRow += step; + } + + if (smoothColumn > 0) + { + srcCounter = ercCollectColumnBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step ); + } + else + { + srcCounter = ercCollect8PredBlocks( predBlocks, currRow, column, condition, lastRow, lastColumn, step, 1 ); + } + + switch( comp ) + { + case 0 : + ercPixConcealIMB( recfr->yptr, currRow, column, predBlocks, picSizeX, 2 ); + break; + + case 1 : + ercPixConcealIMB( recfr->uptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + + case 2 : + ercPixConcealIMB( recfr->vptr, currRow, column, predBlocks, (picSizeX>>1), 1 ); + break; + } + + if ( comp == 0 ) + { + condition[ currRow*lastColumn+column] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + 1] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn] = ERC_BLOCK_CONCEALED; + condition[ currRow*lastColumn+column + lastColumn + 1] = ERC_BLOCK_CONCEALED; + } + else + { + condition[ currRow*lastColumn+column ] = ERC_BLOCK_CONCEALED; + } + } + } + + lastCorruptedRow = -1; + firstCorruptedRow = -1; + + } + } + } + } + + /*! + ************************************************************************ + * \brief + * Does the actual pixel based interpolation for block[] + * using weighted average + * \param src[] + * pointers to neighboring source blocks + * \param block + * destination block + * \param blockSize + * 16 for Y, 8 for U/V components + * \param frameWidth + * Width of the frame in pixels + ************************************************************************ + */ + static void pixMeanInterpolateBlock( imgpel *src[], imgpel *block, int blockSize, int frameWidth ) + { + int row, column, k, tmp, srcCounter = 0, weight = 0, bmax = blockSize - 1; + + k = 0; + for ( row = 0; row < blockSize; row++ ) + { + for ( column = 0; column < blockSize; column++ ) + { + tmp = 0; + srcCounter = 0; + /* above */ + if ( src[4] != NULL ) + { + weight = blockSize-row; + tmp += weight * (*(src[4]+bmax*frameWidth+column)); + srcCounter += weight; + } + /* left */ + if ( src[5] != NULL ) + { + weight = blockSize-column; + tmp += weight * (*(src[5]+row*frameWidth+bmax)); + srcCounter += weight; + } + /* below */ + if ( src[6] != NULL ) + { + weight = row+1; + tmp += weight * (*(src[6]+column)); + srcCounter += weight; + } + /* right */ + if ( src[7] != NULL ) + { + weight = column+1; + tmp += weight * (*(src[7]+row*frameWidth)); + srcCounter += weight; + } + + if ( srcCounter > 0 ) + block[ k + column ] = (byte)(tmp/srcCounter); + else + block[ k + column ] = img->dc_pred_value; //FREXT + } + k += frameWidth; + } + + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/erc_do_p.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/erc_do_p.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/erc_do_p.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1817 ---- + + /*! + ************************************************************************************* + * \file + * erc_do_p.c + * + * \brief + * Inter (P) frame error concealment algorithms for decoder + * + * \author + * - Viktor Varsa + * - Ye-Kui Wang + * - Jill Boyce + * - Saurav K Bandyopadhyay + * - Zhenyu Wu + * + ************************************************************************************* + */ + + #include + #include + #include "mbuffer.h" + #include "global.h" + #include "memalloc.h" + #include "erc_do.h" + #include "image.h" + + extern int erc_mvperMB; + struct img_par *erc_img; + + // picture error concealment + // concealment_head points to first node in list, concealment_end points to + // last node in list. Initialise both to NULL, meaning no nodes in list yet + struct concealment_node *concealment_head = NULL; + struct concealment_node *concealment_end = NULL; + + // static function declarations + static int concealByCopy(frame *recfr, int currMBNum, + objectBuffer_t *object_list, int32 picSizeX); + static int concealByTrial(frame *recfr, imgpel *predMB, + int currMBNum, objectBuffer_t *object_list, int predBlocks[], + int32 picSizeX, int32 picSizeY, int *yCondition); + static int edgeDistortion (int predBlocks[], int currYBlockNum, imgpel *predMB, + imgpel *recY, int32 picSizeX, int32 regionSize); + static void copyBetweenFrames (frame *recfr, + int currYBlockNum, int32 picSizeX, int32 regionSize); + static void buildPredRegionYUV(struct img_par *img, int32 *mv, int x, int y, imgpel *predMB); + + // picture error concealment + static void buildPredblockRegionYUV(struct img_par *img, int32 *mv, + int x, int y, imgpel *predMB, int list); + static void CopyImgData(imgpel **inputY, imgpel ***inputUV, imgpel **outputY, + imgpel ***outputUV, int img_width, int img_height); + + + static void copyPredMB (int currYBlockNum, imgpel *predMB, frame *recfr, + int32 picSizeX, int32 regionSize); + + extern const unsigned char subblk_offset_y[3][8][4]; + extern const unsigned char subblk_offset_x[3][8][4]; + static int uv_div[2][4] = {{0, 1, 1, 0}, {0, 1, 0, 0}}; //[x/y][yuv_format] + + /*! + ************************************************************************ + * \brief + * The main function for Inter (P) frame concealment. + * \return + * 0, if the concealment was not successful and simple concealment should be used + * 1, otherwise (even if none of the blocks were concealed) + * \param recfr + * Reconstructed frame buffer + * \param object_list + * Motion info for all MBs in the frame + * \param picSizeX + * Width of the frame in pixels + * \param picSizeY + * Height of the frame in pixels + * \param errorVar + * Variables for error concealment + * \param chroma_format_idc + * Chroma format IDC + ************************************************************************ + */ + int ercConcealInterFrame(frame *recfr, objectBuffer_t *object_list, + int32 picSizeX, int32 picSizeY, ercVariables_t *errorVar, int chroma_format_idc ) + { + int lastColumn = 0, lastRow = 0, predBlocks[8]; + int lastCorruptedRow = -1, firstCorruptedRow = -1, currRow = 0, + row, column, columnInd, areaHeight = 0, i = 0; + imgpel *predMB; + + /* if concealment is on */ + if ( errorVar && errorVar->concealment ) + { + /* if there are segments to be concealed */ + if ( errorVar->nOfCorruptedSegments ) + { + if (chroma_format_idc != YUV400) + predMB = (imgpel *) malloc ( (256 + (img->mb_cr_size_x*img->mb_cr_size_y)*2) * sizeof (imgpel)); + else + predMB = (imgpel *) malloc(256 * sizeof (imgpel)); + + if ( predMB == NULL ) no_mem_exit("ercConcealInterFrame: predMB"); + + lastRow = (int) (picSizeY>>4); + lastColumn = (int) (picSizeX>>4); + + for ( columnInd = 0; columnInd < lastColumn; columnInd ++) + { + + column = ((columnInd%2) ? (lastColumn - columnInd/2 -1) : (columnInd/2)); + + for ( row = 0; row < lastRow; row++) + { + + if ( errorVar->yCondition[MBxy2YBlock(column, row, 0, picSizeX)] <= ERC_BLOCK_CORRUPTED ) + { // ERC_BLOCK_CORRUPTED (1) or ERC_BLOCK_EMPTY (0) + firstCorruptedRow = row; + /* find the last row which has corrupted blocks (in same continuous area) */ + for ( lastCorruptedRow = row+1; lastCorruptedRow < lastRow; lastCorruptedRow++) + { + /* check blocks in the current column */ + if (errorVar->yCondition[MBxy2YBlock(column, lastCorruptedRow, 0, picSizeX)] > ERC_BLOCK_CORRUPTED) + { + /* current one is already OK, so the last was the previous one */ + lastCorruptedRow --; + break; + } + } + if ( lastCorruptedRow >= lastRow ) + { + /* correct only from above */ + lastCorruptedRow = lastRow-1; + for ( currRow = firstCorruptedRow; currRow < lastRow; currRow++ ) + { + + ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1), + errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0); + + if(erc_mvperMB >= MVPERMB_THR) + concealByTrial(recfr, predMB, + currRow*lastColumn+column, object_list, predBlocks, + picSizeX, picSizeY, + errorVar->yCondition); + else + concealByCopy(recfr, currRow*lastColumn+column, + object_list, picSizeX); + + ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar); + } + row = lastRow; + } + else if ( firstCorruptedRow == 0 ) + { + /* correct only from below */ + for ( currRow = lastCorruptedRow; currRow >= 0; currRow-- ) + { + + ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1), + errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0); + + if(erc_mvperMB >= MVPERMB_THR) + concealByTrial(recfr, predMB, + currRow*lastColumn+column, object_list, predBlocks, + picSizeX, picSizeY, + errorVar->yCondition); + else + concealByCopy(recfr, currRow*lastColumn+column, + object_list, picSizeX); + + ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar); + } + + row = lastCorruptedRow+1; + } + else + { + /* correct bi-directionally */ + + row = lastCorruptedRow+1; + + areaHeight = lastCorruptedRow-firstCorruptedRow+1; + + /* + * Conceal the corrupted area switching between the up and the bottom rows + */ + for ( i = 0; i < areaHeight; i++) + { + if ( i % 2 ) + { + currRow = lastCorruptedRow; + lastCorruptedRow --; + } + else + { + currRow = firstCorruptedRow; + firstCorruptedRow ++; + } + + ercCollect8PredBlocks (predBlocks, (currRow<<1), (column<<1), + errorVar->yCondition, (lastRow<<1), (lastColumn<<1), 2, 0); + + if(erc_mvperMB >= MVPERMB_THR) + concealByTrial(recfr, predMB, + currRow*lastColumn+column, object_list, predBlocks, + picSizeX, picSizeY, + errorVar->yCondition); + else + concealByCopy(recfr, currRow*lastColumn+column, + object_list, picSizeX); + + ercMarkCurrMBConcealed (currRow*lastColumn+column, -1, picSizeX, errorVar); + + } + } + lastCorruptedRow = -1; + firstCorruptedRow = -1; + } + } + } + + free(predMB); + } + return 1; + } + else + return 0; + } + + /*! + ************************************************************************ + * \brief + * It conceals a given MB by simply copying the pixel area from the reference image + * that is at the same location as the macroblock in the current image. This correcponds + * to COPY MBs. + * \return + * Always zero (0). + * \param recfr + * Reconstructed frame buffer + * \param currMBNum + * current MB index + * \param object_list + * Motion info for all MBs in the frame + * \param picSizeX + * Width of the frame in pixels + ************************************************************************ + */ + static int concealByCopy(frame *recfr, int currMBNum, + objectBuffer_t *object_list, int32 picSizeX) + { + objectBuffer_t *currRegion; + + currRegion = object_list+(currMBNum<<2); + currRegion->regionMode = REGMODE_INTER_COPY; + + currRegion->xMin = (xPosMB(currMBNum,picSizeX)<<4); + currRegion->yMin = (yPosMB(currMBNum,picSizeX)<<4); + + copyBetweenFrames (recfr, MBNum2YBlock(currMBNum,0,picSizeX), picSizeX, 16); + + return 0; + } + + /*! + ************************************************************************ + * \brief + * Copies the co-located pixel values from the reference to the current frame. + * Used by concealByCopy + * \param recfr + * Reconstructed frame buffer + * \param currYBlockNum + * index of the block (8x8) in the Y plane + * \param picSizeX + * Width of the frame in pixels + * \param regionSize + * can be 16 or 8 to tell the dimension of the region to copy + ************************************************************************ + */ + static void copyBetweenFrames (frame *recfr, + int currYBlockNum, int32 picSizeX, int32 regionSize) + { + int j, k, location, xmin, ymin; + StorablePicture* refPic = listX[0][0]; + + /* set the position of the region to be copied */ + xmin = (xPosYBlock(currYBlockNum,picSizeX)<<3); + ymin = (yPosYBlock(currYBlockNum,picSizeX)<<3); + + for (j = ymin; j < ymin + regionSize; j++) + for (k = xmin; k < xmin + regionSize; k++) + { + location = j * picSizeX + k; + //th recfr->yptr[location] = dec_picture->imgY[j][k]; + recfr->yptr[location] = refPic->imgY[j][k]; + } + + for (j = ymin >> uv_div[1][dec_picture->chroma_format_idc]; j < (ymin + regionSize) >> uv_div[1][dec_picture->chroma_format_idc]; j++) + for (k = xmin >> uv_div[0][dec_picture->chroma_format_idc]; k < (xmin + regionSize) >> uv_div[0][dec_picture->chroma_format_idc]; k++) + { + // location = j * picSizeX / 2 + k; + location = ((j * picSizeX) >> uv_div[0][dec_picture->chroma_format_idc]) + k; + + //th recfr->uptr[location] = dec_picture->imgUV[0][j][k]; + //th recfr->vptr[location] = dec_picture->imgUV[1][j][k]; + recfr->uptr[location] = refPic->imgUV[0][j][k]; + recfr->vptr[location] = refPic->imgUV[1][j][k]; + } + } + + /*! + ************************************************************************ + * \brief + * It conceals a given MB by using the motion vectors of one reliable neighbor. That MV of a + * neighbor is selected wich gives the lowest pixel difference at the edges of the MB + * (see function edgeDistortion). This corresponds to a spatial smoothness criteria. + * \return + * Always zero (0). + * \param recfr + * Reconstructed frame buffer + * \param predMB + * memory area for storing temporary pixel values for a macroblock + * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 + * \param currMBNum + * current MB index + * \param object_list + * array of region structures storing region mode and mv for each region + * \param predBlocks + * status array of the neighboring blocks (if they are OK, concealed or lost) + * \param picSizeX + * Width of the frame in pixels + * \param picSizeY + * Height of the frame in pixels + * \param yCondition + * array for conditions of Y blocks from ercVariables_t + ************************************************************************ + */ + static int concealByTrial(frame *recfr, imgpel *predMB, + int currMBNum, objectBuffer_t *object_list, int predBlocks[], + int32 picSizeX, int32 picSizeY, int *yCondition) + { + int predMBNum = 0, numMBPerLine, + compSplit1 = 0, compSplit2 = 0, compLeft = 1, comp = 0, compPred, order = 1, + fInterNeighborExists, numIntraNeighbours, + fZeroMotionChecked, predSplitted = 0, + threshold = ERC_BLOCK_OK, + minDist, currDist, i, k, bestDir; + int32 regionSize; + objectBuffer_t *currRegion; + int32 mvBest[3] , mvPred[3], *mvptr; + + numMBPerLine = (int) (picSizeX>>4); + + comp = 0; + regionSize = 16; + + do + { /* 4 blocks loop */ + + currRegion = object_list+(currMBNum<<2)+comp; + + /* set the position of the region to be concealed */ + + currRegion->xMin = (xPosYBlock(MBNum2YBlock(currMBNum,comp,picSizeX),picSizeX)<<3); + currRegion->yMin = (yPosYBlock(MBNum2YBlock(currMBNum,comp,picSizeX),picSizeX)<<3); + + do + { /* reliability loop */ + + minDist = 0; + fInterNeighborExists = 0; + numIntraNeighbours = 0; + fZeroMotionChecked = 0; + + /* loop the 4 neighbours */ + for (i = 4; i < 8; i++) + { + + /* if reliable, try it */ + if (predBlocks[i] >= threshold) + { + switch (i) + { + case 4: + predMBNum = currMBNum-numMBPerLine; + compSplit1 = 2; + compSplit2 = 3; + break; + + case 5: + predMBNum = currMBNum-1; + compSplit1 = 1; + compSplit2 = 3; + break; + + case 6: + predMBNum = currMBNum+numMBPerLine; + compSplit1 = 0; + compSplit2 = 1; + break; + + case 7: + predMBNum = currMBNum+1; + compSplit1 = 0; + compSplit2 = 2; + break; + } + + /* try the concealment with the Motion Info of the current neighbour + only try if the neighbour is not Intra */ + if (isBlock(object_list,predMBNum,compSplit1,INTRA) || + isBlock(object_list,predMBNum,compSplit2,INTRA)) + { + numIntraNeighbours++; + } + else + { + /* if neighbour MB is splitted, try both neighbour blocks */ + for (predSplitted = isSplitted(object_list, predMBNum), + compPred = compSplit1; + predSplitted >= 0; + compPred = compSplit2, + predSplitted -= ((compSplit1 == compSplit2) ? 2 : 1)) + { + + /* if Zero Motion Block, do the copying. This option is tried only once */ + if (isBlock(object_list, predMBNum, compPred, INTER_COPY)) + { + + if (fZeroMotionChecked) + { + continue; + } + else + { + fZeroMotionChecked = 1; + + mvPred[0] = mvPred[1] = 0; + mvPred[2] = 0; + + buildPredRegionYUV(erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB); + } + } + /* build motion using the neighbour's Motion Parameters */ + else if (isBlock(object_list,predMBNum,compPred,INTRA)) + { + continue; + } + else + { + mvptr = getParam(object_list, predMBNum, compPred, mv); + + mvPred[0] = mvptr[0]; + mvPred[1] = mvptr[1]; + mvPred[2] = mvptr[2]; + + buildPredRegionYUV(erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB); + } + + /* measure absolute boundary pixel difference */ + currDist = edgeDistortion(predBlocks, + MBNum2YBlock(currMBNum,comp,picSizeX), + predMB, recfr->yptr, picSizeX, regionSize); + + /* if so far best -> store the pixels as the best concealment */ + if (currDist < minDist || !fInterNeighborExists) + { + + minDist = currDist; + bestDir = i; + + for (k=0;k<3;k++) + mvBest[k] = mvPred[k]; + + currRegion->regionMode = + (isBlock(object_list, predMBNum, compPred, INTER_COPY)) ? + ((regionSize == 16) ? REGMODE_INTER_COPY : REGMODE_INTER_COPY_8x8) : + ((regionSize == 16) ? REGMODE_INTER_PRED : REGMODE_INTER_PRED_8x8); + + copyPredMB(MBNum2YBlock(currMBNum,comp,picSizeX), predMB, recfr, + picSizeX, regionSize); + } + + fInterNeighborExists = 1; + } + } + } + } + + threshold--; + + } while ((threshold >= ERC_BLOCK_CONCEALED) && (fInterNeighborExists == 0)); + + /* always try zero motion */ + if (!fZeroMotionChecked) + { + mvPred[0] = mvPred[1] = 0; + mvPred[2] = 0; + + buildPredRegionYUV(erc_img, mvPred, currRegion->xMin, currRegion->yMin, predMB); + + currDist = edgeDistortion(predBlocks, + MBNum2YBlock(currMBNum,comp,picSizeX), + predMB, recfr->yptr, picSizeX, regionSize); + + if (currDist < minDist || !fInterNeighborExists) + { + + minDist = currDist; + for (k=0;k<3;k++) + mvBest[k] = mvPred[k]; + + currRegion->regionMode = + ((regionSize == 16) ? REGMODE_INTER_COPY : REGMODE_INTER_COPY_8x8); + + copyPredMB(MBNum2YBlock(currMBNum,comp,picSizeX), predMB, recfr, + picSizeX, regionSize); + } + } + + for (i=0; i<3; i++) + currRegion->mv[i] = mvBest[i]; + + yCondition[MBNum2YBlock(currMBNum,comp,picSizeX)] = ERC_BLOCK_CONCEALED; + comp = (comp+order+4)%4; + compLeft--; + + } while (compLeft); + + return 0; + } + + /*! + ************************************************************************ + * \brief + * Builds the motion prediction pixels from the given location (in 1/4 pixel units) + * of the reference frame. It not only copies the pixel values but builds the interpolation + * when the pixel positions to be copied from is not full pixel (any 1/4 pixel position). + * It copies the resulting pixel vlaues into predMB. + * \param img + * The pointer of img_par struture of current frame + * \param mv + * The pointer of the predicted MV of the current (being concealed) MB + * \param x + * The x-coordinate of the above-left corner pixel of the current MB + * \param y + * The y-coordinate of the above-left corner pixel of the current MB + * \param predMB + * memory area for storing temporary pixel values for a macroblock + * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 + ************************************************************************ + */ + static void buildPredRegionYUV(struct img_par *img, int32 *mv, int x, int y, imgpel *predMB) + { + int tmp_block[BLOCK_SIZE][BLOCK_SIZE]; + int i=0,j=0,ii=0,jj=0,i1=0,j1=0,j4=0,i4=0; + int jf=0; + int uv; + int vec1_x=0,vec1_y=0; + int ioff,joff; + imgpel *pMB = predMB; + + int ii0,jj0,ii1,jj1,if1,jf1,if0,jf0; + int mv_mul; + + //FRExt + int f1_x, f1_y, f2_x, f2_y, f3, f4, ifx; + int b8, b4; + int yuv = dec_picture->chroma_format_idc - 1; + + int ref_frame = mv[2]; + + /* Update coordinates of the current concealed macroblock */ + img->mb_x = x/MB_BLOCK_SIZE; + img->mb_y = y/MB_BLOCK_SIZE; + img->block_y = img->mb_y * BLOCK_SIZE; + img->pix_c_y = img->mb_y * img->mb_cr_size_y; + img->block_x = img->mb_x * BLOCK_SIZE; + img->pix_c_x = img->mb_x * img->mb_cr_size_x; + + mv_mul=4; + + // luma ******************************************************* + + for(j=0;jblock_y+j; + for(i=0;iblock_x+i; + + vec1_x = i4*4*mv_mul + mv[0]; + vec1_y = j4*4*mv_mul + mv[1]; + + get_block(ref_frame, listX[0], vec1_x,vec1_y,img,tmp_block); + + for(ii=0;iimpr[ii+ioff][jj+joff]=tmp_block[ii][jj]; + } + } + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + pMB[i*16+j] = img->mpr[j][i]; + } + } + pMB += 256; + + if (dec_picture->chroma_format_idc != YUV400) + { + // chroma ******************************************************* + f1_x = 64/img->mb_cr_size_x; + f2_x=f1_x-1; + + f1_y = 64/img->mb_cr_size_y; + f2_y=f1_y-1; + + f3=f1_x*f1_y; + f4=f3>>1; + + for(uv=0;uv<2;uv++) + { + for (b8=0;b8<(img->num_blk8x8_uv/2);b8++) + { + for(b4=0;b4<4;b4++) + { + joff = subblk_offset_y[yuv][b8][b4]; + j4=img->pix_c_y+joff; + ioff = subblk_offset_x[yuv][b8][b4]; + i4=img->pix_c_x+ioff; + + for(jj=0;jj<4;jj++) + { + jf=(j4+jj)/(img->mb_cr_size_y/4); // jf = Subblock_y-coordinate + for(ii=0;ii<4;ii++) + { + ifx=(i4+ii)/(img->mb_cr_size_x/4); // ifx = Subblock_x-coordinate + + i1=(i4+ii)*f1_x + mv[0]; + j1=(j4+jj)*f1_y + mv[1]; + + ii0=max (0, min (i1/f1_x, dec_picture->size_x_cr-1)); + jj0=max (0, min (j1/f1_y, dec_picture->size_y_cr-1)); + ii1=max (0, min ((i1+f2_x)/f1_x, dec_picture->size_x_cr-1)); + jj1=max (0, min ((j1+f2_y)/f1_y, dec_picture->size_y_cr-1)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + img->mpr[ii+ioff][jj+joff]=(if0*jf0*listX[0][ref_frame]->imgUV[uv][jj0][ii0]+ + if1*jf0*listX[0][ref_frame]->imgUV[uv][jj0][ii1]+ + if0*jf1*listX[0][ref_frame]->imgUV[uv][jj1][ii0]+ + if1*jf1*listX[0][ref_frame]->imgUV[uv][jj1][ii1]+f4)/f3; + } + } + } + } + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + pMB[i*8+j] = img->mpr[j][i]; + } + } + pMB += 64; + + } + } + } + /*! + ************************************************************************ + * \brief + * Copies pixel values between a YUV frame and the temporary pixel value storage place. This is + * used to save some pixel values temporarily before overwriting it, or to copy back to a given + * location in a frame the saved pixel values. + * \param currYBlockNum + * index of the block (8x8) in the Y plane + * \param predMB + * memory area where the temporary pixel values are stored + * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 + * \param recfr + * pointer to a YUV frame + * \param picSizeX + * picture width in pixels + * \param regionSize + * can be 16 or 8 to tell the dimension of the region to copy + ************************************************************************ + */ + static void copyPredMB (int currYBlockNum, imgpel *predMB, frame *recfr, + int32 picSizeX, int32 regionSize) + { + + int j, k, xmin, ymin, xmax, ymax; + int32 locationTmp, locationPred; + int uv_x = uv_div[0][dec_picture->chroma_format_idc]; + int uv_y = uv_div[1][dec_picture->chroma_format_idc]; + + xmin = (xPosYBlock(currYBlockNum,picSizeX)<<3); + ymin = (yPosYBlock(currYBlockNum,picSizeX)<<3); + xmax = xmin + regionSize -1; + ymax = ymin + regionSize -1; + + for (j = ymin; j <= ymax; j++) + { + for (k = xmin; k <= xmax; k++) + { + locationPred = j * picSizeX + k; + locationTmp = (j-ymin) * 16 + (k-xmin); + dec_picture->imgY[j][k] = predMB[locationTmp]; + } + } + + if (dec_picture->chroma_format_idc != YUV400) + { + for (j = (ymin>>uv_y); j <= (ymax>>uv_y); j++) + { + for (k = (xmin>>uv_x); k <= (xmax>>uv_x); k++) + { + locationPred = ((j * picSizeX) >> uv_x) + k; + locationTmp = (j-(ymin>>uv_y)) * img->mb_cr_size_x + (k-(xmin>>1)) + 256; + dec_picture->imgUV[0][j][k] = predMB[locationTmp]; + + locationTmp += 64; + + dec_picture->imgUV[1][j][k] = predMB[locationTmp]; + } + } + } + } + + /*! + ************************************************************************ + * \brief + * Calculates a weighted pixel difference between edge Y pixels of the macroblock stored in predMB + * and the pixels in the given Y plane of a frame (recY) that would become neighbor pixels if + * predMB was placed at currYBlockNum block position into the frame. This "edge distortion" value + * is used to determine how well the given macroblock in predMB would fit into the frame when + * considering spatial smoothness. If there are correctly received neighbor blocks (status stored + * in predBlocks) only they are used in calculating the edge distorion; otherwise also the already + * concealed neighbor blocks can also be used. + * \return + * The calculated weighted pixel difference at the edges of the MB. + * \param predBlocks + * status array of the neighboring blocks (if they are OK, concealed or lost) + * \param currYBlockNum + * index of the block (8x8) in the Y plane + * \param predMB + * memory area where the temporary pixel values are stored + * the Y,U,V planes are concatenated y = predMB, u = predMB+256, v = predMB+320 + * \param recY + * pointer to a Y plane of a YUV frame + * \param picSizeX + * picture width in pixels + * \param regionSize + * can be 16 or 8 to tell the dimension of the region to copy + ************************************************************************ + */ + static int edgeDistortion (int predBlocks[], int currYBlockNum, imgpel *predMB, + imgpel *recY, int32 picSizeX, int32 regionSize) + { + int i, j, distortion, numOfPredBlocks, threshold = ERC_BLOCK_OK; + imgpel *currBlock = NULL, *neighbor = NULL; + int32 currBlockOffset = 0; + + currBlock = recY + (yPosYBlock(currYBlockNum,picSizeX)<<3)*picSizeX + (xPosYBlock(currYBlockNum,picSizeX)<<3); + + do + { + + distortion = 0; numOfPredBlocks = 0; + + /* loop the 4 neighbours */ + for (j = 4; j < 8; j++) + { + /* if reliable, count boundary pixel difference */ + if (predBlocks[j] >= threshold) + { + + switch (j) + { + case 4: + neighbor = currBlock - picSizeX; + for ( i = 0; i < regionSize; i++ ) + { + distortion += mabs((int)(predMB[i] - neighbor[i])); + } + break; + case 5: + neighbor = currBlock - 1; + for ( i = 0; i < regionSize; i++ ) + { + distortion += mabs((int)(predMB[i*16] - neighbor[i*picSizeX])); + } + break; + case 6: + neighbor = currBlock + regionSize*picSizeX; + currBlockOffset = (regionSize-1)*16; + for ( i = 0; i < regionSize; i++ ) + { + distortion += mabs((int)(predMB[i+currBlockOffset] - neighbor[i])); + } + break; + case 7: + neighbor = currBlock + regionSize; + currBlockOffset = regionSize-1; + for ( i = 0; i < regionSize; i++ ) + { + distortion += mabs((int)(predMB[i*16+currBlockOffset] - neighbor[i*picSizeX])); + } + break; + } + + numOfPredBlocks++; + } + } + + threshold--; + if (threshold < ERC_BLOCK_CONCEALED) + break; + } while (numOfPredBlocks == 0); + + if(numOfPredBlocks == 0) + assert (numOfPredBlocks != 0); + return (distortion/numOfPredBlocks); + } + + // picture error concealment below + + /*! + ************************************************************************ + * \brief + * The motion prediction pixels are calculated from the given location (in + * 1/4 pixel units) of the referenced frame. It copies the sub block from the + * corresponding reference to the frame to be concealed. + * + ************************************************************************* + */ + static void buildPredblockRegionYUV(struct img_par *img, int32 *mv, + int x, int y, imgpel *predMB, int list) + { + int tmp_block[BLOCK_SIZE][BLOCK_SIZE]; + int i=0,j=0,ii=0,jj=0,i1=0,j1=0,j4=0,i4=0; + int jf=0; + int uv; + int vec1_x=0,vec1_y=0; + int ioff,joff; + imgpel *pMB = predMB; + + int ii0,jj0,ii1,jj1,if1,jf1,if0,jf0; + int mv_mul; + + //FRExt + int f1_x, f1_y, f2_x, f2_y, f3, f4, ifx; + int yuv = dec_picture->chroma_format_idc - 1; + + int ref_frame = mv[2]; + + /* Update coordinates of the current concealed macroblock */ + + img->mb_x = x/BLOCK_SIZE; + img->mb_y = y/BLOCK_SIZE; + img->block_y = img->mb_y * BLOCK_SIZE; + img->pix_c_y = img->mb_y * img->mb_cr_size_y/4; + img->block_x = img->mb_x * BLOCK_SIZE; + img->pix_c_x = img->mb_x * img->mb_cr_size_x/4; + + mv_mul=4; + + // luma ******************************************************* + + vec1_x = x*mv_mul + mv[0]; + vec1_y = y*mv_mul + mv[1]; + + get_block(ref_frame, listX[list], vec1_x,vec1_y,img,tmp_block); + + for(ii=0;iimpr[ii][jj]=tmp_block[ii][jj]; + + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + pMB[i*4+j] = img->mpr[j][i]; + } + } + pMB += 16; + + if (dec_picture->chroma_format_idc != YUV400) + { + // chroma ******************************************************* + f1_x = 64/(img->mb_cr_size_x); + f2_x=f1_x-1; + + f1_y = 64/(img->mb_cr_size_y); + f2_y=f1_y-1; + + f3=f1_x*f1_y; + f4=f3>>1; + + for(uv=0;uv<2;uv++) + { + joff = subblk_offset_y[yuv][0][0]; + j4=img->pix_c_y+joff; + ioff = subblk_offset_x[yuv][0][0]; + i4=img->pix_c_x+ioff; + + for(jj=0;jj<2;jj++) + { + jf=(j4+jj)/(img->mb_cr_size_y/4); // jf = Subblock_y-coordinate + for(ii=0;ii<2;ii++) + { + ifx=(i4+ii)/(img->mb_cr_size_x/4); // ifx = Subblock_x-coordinate + + i1=(i4+ii)*f1_x + mv[0]; + j1=(j4+jj)*f1_y + mv[1]; + + ii0=max (0, min (i1/f1_x, dec_picture->size_x_cr-1)); + jj0=max (0, min (j1/f1_y, dec_picture->size_y_cr-1)); + ii1=max (0, min ((i1+f2_x)/f1_x, dec_picture->size_x_cr-1)); + jj1=max (0, min ((j1+f2_y)/f1_y, dec_picture->size_y_cr-1)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + img->mpr[ii][jj]=(if0*jf0*listX[list][ref_frame]->imgUV[uv][jj0][ii0]+ + if1*jf0*listX[list][ref_frame]->imgUV[uv][jj0][ii1]+ + if0*jf1*listX[list][ref_frame]->imgUV[uv][jj1][ii0]+ + if1*jf1*listX[list][ref_frame]->imgUV[uv][jj1][ii1]+f4)/f3; + } + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + pMB[i*2+j] = img->mpr[j][i]; + } + } + pMB += 4; + + } + } + } + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by picture number for qsort in descending order + * + ************************************************************************ + */ + static int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->pic_num < (*(StorablePicture**)arg2)->pic_num) + return 1; + if ( (*(StorablePicture**)arg1)->pic_num > (*(StorablePicture**)arg2)->pic_num) + return -1; + else + return 0; + } + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by picture number for qsort in descending order + * + ************************************************************************ + */ + static int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->long_term_pic_num < (*(StorablePicture**)arg2)->long_term_pic_num) + return -1; + if ( (*(StorablePicture**)arg1)->long_term_pic_num > (*(StorablePicture**)arg2)->long_term_pic_num) + return 1; + else + return 0; + } + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by poc for qsort in ascending order + * + ************************************************************************ + */ + static int compare_pic_by_poc_asc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc) + return -1; + if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc) + return 1; + else + return 0; + } + + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by poc for qsort in descending order + * + ************************************************************************ + */ + static int compare_pic_by_poc_desc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc) + return 1; + if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc) + return -1; + else + return 0; + } + + /*! + ************************************************************************ + * \brief + * Copy image data from one array to another array + ************************************************************************ + */ + + static + void CopyImgData(imgpel **inputY, imgpel ***inputUV, imgpel **outputY, + imgpel ***outputUV, int img_width, int img_height) + { + int x, y; + + for (y=0; y= 0; i--) + { + if (dpb.fs[i]->is_used==3) + { + if (((dpb.fs[i]->frame->used_for_reference) && + (!dpb.fs[i]->frame->is_long_term)) /*|| ((dpb.fs[i]->frame->used_for_reference==0) + && (dpb.fs[i]->frame->slice_type == P_SLICE))*/ ) + { + return dpb.fs[i]->frame; + } + } + } + + return NULL; + } + + /*! + ************************************************************************ + * \brief + * Conceals the lost reference or non reference frame by either frame copy + * or motion vector copy concealment. + * + ************************************************************************ + */ + + static void + copy_to_conceal(StorablePicture *src, StorablePicture *dst, ImageParameters *img) + { + int i=0; + int mv[3]; + int multiplier; + imgpel *predMB, *storeYUV; + int j, y, x, mb_height, mb_width, ii=0, jj=0; + int uv; + int mm, nn; + int scale = 1; + // struct inp_par *test; + + img->current_mb_nr = 0; + + dst->PicSizeInMbs = src->PicSizeInMbs; + + dst->slice_type = src->slice_type = img->conceal_slice_type; + + dst->idr_flag = 0; //since we do not want to clears the ref list + + dst->no_output_of_prior_pics_flag = src->no_output_of_prior_pics_flag; + dst->long_term_reference_flag = src->long_term_reference_flag; + dst->adaptive_ref_pic_buffering_flag = src->adaptive_ref_pic_buffering_flag = 0; + dst->chroma_format_idc = src->chroma_format_idc; + dst->frame_mbs_only_flag = src->frame_mbs_only_flag; + dst->frame_cropping_flag = src->frame_cropping_flag; + dst->frame_cropping_rect_left_offset = src->frame_cropping_rect_left_offset; + dst->frame_cropping_rect_right_offset = src->frame_cropping_rect_right_offset; + dst->frame_cropping_rect_bottom_offset = src->frame_cropping_rect_bottom_offset; + dst->frame_cropping_rect_top_offset = src->frame_cropping_rect_top_offset; + dst->qp = src->qp; + dst->slice_qp_delta = src->slice_qp_delta; + + dec_picture = src; + + // Conceals the missing frame by frame copy concealment + if (img->conceal_mode==1) + { + // We need these initializations for using deblocking filter for frame copy + // concealment as well. + dst->PicWidthInMbs = src->PicWidthInMbs; + dst->PicSizeInMbs = src->PicSizeInMbs; + + CopyImgData(src->imgY, src->imgUV, + dst->imgY, dst->imgUV, + img->width, img->height); + + } + + // Conceals the missing frame by motion vector copy concealment + if (img->conceal_mode==2) + { + if (dec_picture->chroma_format_idc != YUV400) + { + storeYUV = (imgpel *) malloc ( (16 + (img->mb_cr_size_x*img->mb_cr_size_y)*2/16) * sizeof (imgpel)); + } + else + { + storeYUV = (imgpel *) malloc (16 * sizeof (imgpel)); + } + + erc_img = img; + + dst->PicWidthInMbs = src->PicWidthInMbs; + dst->PicSizeInMbs = src->PicSizeInMbs; + mb_width = dst->PicWidthInMbs; + mb_height = (dst->PicSizeInMbs)/(dst->PicWidthInMbs); + scale = (img->conceal_slice_type == B_SLICE) ? 2 : 1; + + if(img->conceal_slice_type == B_SLICE) + init_lists_for_non_reference_loss(dst->slice_type, img->currentSlice->structure); + else + init_lists(dst->slice_type, img->currentSlice->structure); + + multiplier = BLOCK_SIZE; + + for(i=0;imv[LIST_0][i][j][0] / scale; + mv[1] = src->mv[LIST_0][i][j][1] / scale; + mv[2] = src->ref_idx[LIST_0][i][j]; + + + if(mv[2]<0) + mv[2]=0; + + dst->mv[LIST_0][i][j][0] = mv[0]; + dst->mv[LIST_0][i][j][1] = mv[1]; + dst->ref_idx[LIST_0][i][j] = mv[2]; + + x = (j)*multiplier; + y = (i)*multiplier; + + if ((mm%16==0) && (nn%16==0)) + img->current_mb_nr++; + + buildPredblockRegionYUV(erc_img, mv, x, y, storeYUV, LIST_0); + + predMB = storeYUV; + + for(ii=0;iiimgY[i*multiplier+ii][j*multiplier+jj] = predMB[ii*(multiplier)+jj]; + } + } + + predMB = predMB + (multiplier*multiplier); + + if (dec_picture->chroma_format_idc != YUV400) + { + + for(uv=0;uv<2;uv++) + { + for(ii=0;ii< (multiplier/2);ii++) + { + for(jj=0;jj< (multiplier/2);jj++) + { + dst->imgUV[uv][i*multiplier/2 +ii][j*multiplier/2 +jj] = predMB[ii*(multiplier/2)+jj]; + } + } + predMB = predMB + (multiplier*multiplier/4); + } + } + } + } + free(storeYUV); + } + } + + /*! + ************************************************************************ + * \brief + * Uses the previous reference pic for concealment of reference frames + * + ************************************************************************ + */ + + static void + copy_prev_pic_to_concealed_pic(StorablePicture *picture, ImageParameters *img) + { + + StorablePicture *ref_pic; + /* get the last ref pic in dpb */ + ref_pic = get_last_ref_pic_from_dpb(); + + assert(ref_pic != NULL); + + /* copy all the struc from this to current concealment pic */ + img->conceal_slice_type = P_SLICE; + copy_to_conceal(ref_pic, picture, img); + } + + + /*! + ************************************************************************ + * \brief + * This function conceals a missing reference frame. The routine is called + * based on the difference in frame number. It conceals an IDR frame loss + * based on the sudden decrease in frame number. + * + ************************************************************************ + */ + + void conceal_lost_frames(ImageParameters *img) + { + int CurrFrameNum; + int UnusedShortTermFrameNum; + StorablePicture *picture = NULL; + int tmp1 = img->delta_pic_order_cnt[0]; + int tmp2 = img->delta_pic_order_cnt[1]; + + img->delta_pic_order_cnt[0] = img->delta_pic_order_cnt[1] = 0; + + // printf("A gap in frame number is found, try to fill it.\n"); + + if(img->IDR_concealment_flag == 1) + { + // Conceals an IDR frame loss. Uses the reference frame in the previous + // GOP for concealment. + UnusedShortTermFrameNum = 0; + img->last_ref_pic_poc = -img->poc_gap; + img->earlier_missing_poc = 0; + } + else + UnusedShortTermFrameNum = (img->pre_frame_num + 1) % img->MaxFrameNum; + + CurrFrameNum = img->frame_num; + + while (CurrFrameNum != UnusedShortTermFrameNum) + { + picture = alloc_storable_picture (FRAME, img->width, img->height, img->width_cr, img->height_cr); + + picture->coded_frame = 1; + picture->pic_num = UnusedShortTermFrameNum; + picture->frame_num = UnusedShortTermFrameNum; + picture->non_existing = 0; + picture->is_output = 0; + picture->used_for_reference = 1; + picture->concealed_pic = 1; + + picture->adaptive_ref_pic_buffering_flag = 0; + + img->frame_num = UnusedShortTermFrameNum; + + picture->top_poc=img->last_ref_pic_poc + img->ref_poc_gap; + picture->bottom_poc=picture->top_poc; + picture->frame_poc=picture->top_poc; + picture->poc=picture->top_poc; + img->last_ref_pic_poc = picture->poc; + + copy_prev_pic_to_concealed_pic(picture, img); + + if(UnusedShortTermFrameNum == 0) + { + picture->slice_type = I_SLICE; + picture->idr_flag = 1; + flush_dpb(); + picture->top_poc= 0; + picture->bottom_poc=picture->top_poc; + picture->frame_poc=picture->top_poc; + picture->poc=picture->top_poc; + img->last_ref_pic_poc = picture->poc; + } + + store_picture_in_dpb(picture); + + picture=NULL; + + img->pre_frame_num = UnusedShortTermFrameNum; + UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % img->MaxFrameNum; + } + img->delta_pic_order_cnt[0] = tmp1; + img->delta_pic_order_cnt[1] = tmp2; + img->frame_num = CurrFrameNum; + } + + /*! + ************************************************************************ + * \brief + * Updates the reference list for motion vector copy concealment for non- + * reference frame loss. + * + ************************************************************************ + */ + + static void update_ref_list_for_concealment() + { + unsigned i, j; + for (i=0, j=0; iconcealment_reference) + { + dpb.fs_ref[j++]=dpb.fs[i]; + } + } + + dpb.ref_frames_in_buffer = active_pps->num_ref_idx_l0_active_minus1; + } + + /*! + ************************************************************************ + * \brief + * Initialize the list based on the B frame or non reference 'p' frame + * to be concealed. The function initialize listX[0] and list 1 depending + * on current picture type + * + ************************************************************************ + */ + void init_lists_for_non_reference_loss(int currSliceType, PictureStructure currPicStructure) + { + unsigned i; + int j; + int MaxFrameNum = 1 << (active_sps->log2_max_frame_num_minus4 + 4); + int diff; + + int list0idx = 0; + int list0idx_1 = 0; + + StorablePicture *tmp_s; + + if (currPicStructure == FRAME) + { + for(i=0;iconcealment_reference == 1) + { + if(dpb.fs[i]->frame_num > img->frame_to_conceal) + dpb.fs_ref[i]->frame_num_wrap = dpb.fs[i]->frame_num - MaxFrameNum; + else + dpb.fs_ref[i]->frame_num_wrap = dpb.fs[i]->frame_num; + dpb.fs_ref[i]->frame->pic_num = dpb.fs_ref[i]->frame_num_wrap; + } + } + } + + if (currSliceType == P_SLICE) + { + // Calculate FrameNumWrap and PicNum + if (currPicStructure == FRAME) + { + for(i=0;iconcealment_reference == 1) + { + listX[0][list0idx++] = dpb.fs[i]->frame; + } + } + // order list 0 by PicNum + qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc); + listXsize[0] = list0idx; + } + } + + if (currSliceType == B_SLICE) + { + if (currPicStructure == FRAME) + { + // for(i=0;iconcealment_reference == 1) + { + if(img->earlier_missing_poc > dpb.fs[i]->frame->poc) + listX[0][list0idx++] = dpb.fs[i]->frame; + } + } + + qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc); + list0idx_1 = list0idx; + + // for(i=0;iconcealment_reference == 1) + { + if(img->earlier_missing_poc < dpb.fs[i]->frame->poc) + listX[0][list0idx++] = dpb.fs[i]->frame; + } + } + + qsort((void *)&listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc); + + for (j=0; j 1)) + { + // check if lists are identical, if yes swap first two elements of listX[1] + diff=0; + for (j = 0; j< listXsize[0]; j++) + { + if (listX[0][j]!=listX[1][j]) + diff=1; + } + if (!diff) + { + tmp_s = listX[1][0]; + listX[1][0]=listX[1][1]; + listX[1][1]=tmp_s; + } + } + + + // set max size + listXsize[0] = min (listXsize[0], active_sps->num_ref_frames); + listXsize[1] = min (listXsize[1], active_sps->num_ref_frames); + + listXsize[1] = 0; + // set the unused list entries to NULL + for (i=listXsize[0]; i< (MAX_LIST_SIZE) ; i++) + { + listX[0][i] = NULL; + } + for (i=listXsize[1]; i< (MAX_LIST_SIZE) ; i++) + { + listX[1][i] = NULL; + } + } + + + /*! + ************************************************************************ + * \brief + * Get from the dpb the picture corresponding to a POC. The POC varies + * depending on whether it is a frame copy or motion vector copy concealment. + * The frame corresponding to the POC is returned. + * + ************************************************************************ + */ + + static StorablePicture *get_pic_from_dpb(int missingpoc, int *pos) + { + int used_size = dpb.used_size - 1; + int i, concealfrom; + + if(img->conceal_mode == 1) + concealfrom = missingpoc - img->poc_gap; + else if (img->conceal_mode == 2) + concealfrom = missingpoc + img->poc_gap; + + for(i = used_size; i >= 0; i--) + { + if(dpb.fs[i]->poc == concealfrom) + { + *pos = i; + return dpb.fs[i]->frame; + } + } + + return NULL; + } + + /*! + ************************************************************************ + * \brief + * Function to sort the POC and find the lowest number in the POC list + * Compare the integers + * + ************************************************************************ + */ + + int comp(const void *i, const void *j) + { + return *(int *)i - *(int *)j; + } + + /*! + ************************************************************************ + * \brief + * Initialises a node, allocates memory for the node, and returns + * a pointer to the new node. + * + ************************************************************************ + */ + + struct concealment_node * init_node( StorablePicture* picture, int missingpoc ) + { + struct concealment_node *ptr; + + ptr = (struct concealment_node *) calloc( 1, sizeof(struct concealment_node ) ); + + if( ptr == NULL ) + return (struct concealment_node *) NULL; + else { + ptr->picture = picture; + ptr->missingpocs = missingpoc; + ptr->next = NULL; + return ptr; + } + } + + /*! + ************************************************************************ + * \brief + * Prints the details of a node + * + ************************************************************************ + */ + + void print_node( struct concealment_node *ptr ) + { + printf("Missing POC=%d\n", ptr->missingpocs ); + } + + + /*! + ************************************************************************ + * \brief + * Prints all nodes from the current address passed to it. + * + ************************************************************************ + */ + + void print_list( struct concealment_node *ptr ) + { + while( ptr != NULL ) + { + print_node( ptr ); + ptr = ptr->next; + } + } + + /*! + ************************************************************************ + * \brief + * Adds a node to the end of the list. + * + ************************************************************************ + */ + + + void add_node( struct concealment_node *concealment_new ) + { + if( concealment_head == NULL ) + { + concealment_end = concealment_head = concealment_new; + return; + } + concealment_end->next = concealment_new; + concealment_end = concealment_new; + } + + + /*! + ************************************************************************ + * \brief + * Deletes the specified node pointed to by 'ptr' from the list + * + ************************************************************************ + */ + + + void delete_node( struct concealment_node *ptr ) + { + // We only need to delete the first node in the linked list + if( ptr == concealment_head ) { + concealment_head = concealment_head->next; + if( concealment_end == ptr ) + concealment_end = concealment_end->next; + free(ptr); + } + } + + /*! + ************************************************************************ + * \brief + * Deletes all nodes from the place specified by ptr + * + ************************************************************************ + */ + + void delete_list( struct concealment_node *ptr ) + { + struct concealment_node *temp; + + if( concealment_head == NULL ) return; + + if( ptr == concealment_head ) { + concealment_head = NULL; + concealment_end = NULL; + } + else + { + temp = concealment_head; + + while( temp->next != ptr ) + temp = temp->next; + concealment_end = temp; + } + + while( ptr != NULL ) { + temp = ptr->next; + free( ptr ); + ptr = temp; + } + } + + /*! + ************************************************************************ + * \brief + * Stores the missing non reference frames in the concealment buffer. The + * detection is based on the POC difference in the sorted POC array. A missing + * non reference frame is detected when the dpb is full. A singly linked list + * is maintained for storing the missing non reference frames. + * + ************************************************************************ + */ + + void conceal_non_ref_pics(int diff) + { + int missingpoc = 0; + unsigned int i, pos; + StorablePicture *conceal_from_picture = NULL; + StorablePicture *conceal_to_picture = NULL; + struct concealment_node *concealment_ptr = NULL; + int temp_used_size = dpb.used_size; + + if(dpb.used_size == 0 ) + return; + + qsort(pocs_in_dpb, dpb.size, sizeof(int), comp); + + for(i=0;iimg->poc_gap) + { + conceal_to_picture = alloc_storable_picture (FRAME, img->width, img->height, img->width_cr, img->height_cr); + + missingpoc = pocs_in_dpb[i] + img->poc_gap; + // Diagnostics + // printf("\n missingpoc = %d\n",missingpoc); + + if(missingpoc > img->earlier_missing_poc) + { + img->earlier_missing_poc = missingpoc; + conceal_to_picture->top_poc= missingpoc; + conceal_to_picture->bottom_poc=missingpoc; + conceal_to_picture->frame_poc=missingpoc; + conceal_to_picture->poc=missingpoc; + conceal_from_picture = get_pic_from_dpb(missingpoc, &pos); + + assert(conceal_from_picture != NULL); + + dpb.used_size = pos+1; + + img->frame_to_conceal = conceal_from_picture->frame_num + 1; + + update_ref_list_for_concealment(); + img->conceal_slice_type = B_SLICE; + copy_to_conceal(conceal_from_picture, conceal_to_picture, img); + concealment_ptr = init_node( conceal_to_picture, missingpoc ); + add_node(concealment_ptr); + // Diagnostics + // print_node(concealment_ptr); + } + } + } + + //restore the original value + //dpb.used_size = dpb.size; + dpb.used_size = temp_used_size; + } + + /*! + ************************************************************************ + * \brief + * Perform Sliding window decoded reference picture marking process. It + * maintains the POC s stored in the dpb at a specific instance. + * + ************************************************************************ + */ + + void sliding_window_poc_management(StorablePicture *p) + { + unsigned int i; + + if (dpb.used_size == dpb.size) + { + for(i=0;ipoc; + } + + + /*! + ************************************************************************ + * \brief + * Outputs the non reference frames. The POCs in the concealment buffer are + * sorted in ascending order and outputted when the lowest POC in the + * concealment buffer is lower than the lowest in the dpb. The linked list + * entry corresponding to the outputted POC is immediately deleted. + * + ************************************************************************ + */ + + void write_lost_non_ref_pic(int poc, int p_out) + { + FrameStore concealment_fs; + if(poc > 0) + { + if((poc - dpb.last_output_poc) > img->poc_gap) + { + + concealment_fs.frame = concealment_head->picture; + concealment_fs.is_output = 0; + concealment_fs.is_reference = 0; + concealment_fs.is_used = 3; + + write_stored_frame(&concealment_fs, p_out); + delete_node(concealment_head); + } + } + } + + /*! + ************************************************************************ + * \brief + * Conceals frame loss immediately after the IDR. This special case produces + * the same result for either frame copy or motion vector copy concealment. + * + ************************************************************************ + */ + + void write_lost_ref_after_idr(int pos) + { + int temp = 1; + + if(last_out_fs->frame == NULL) + { + last_out_fs->frame = alloc_storable_picture (FRAME, img->width, img->height, + img->width_cr, img->height_cr); + last_out_fs->is_used = 3; + } + + if(img->conceal_mode == 2) + { + temp = 2; + img->conceal_mode = 1; + } + copy_to_conceal(dpb.fs[pos]->frame, last_out_fs->frame, img); + + img->conceal_mode = temp; + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/erc_globals.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/erc_globals.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/erc_globals.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,52 ---- + + /*! + ************************************************************************ + * \file erc_globals.h + * + * \brief + * global header file for error concealment module + * + * \author + * - Viktor Varsa + * - Ye-Kui Wang + ************************************************************************ + */ + + #ifndef _ERC_GLOBALS_H_ + #define _ERC_GLOBALS_H_ + + #include + #include "defines.h" + + /* "block" means an 8x8 pixel area */ + + /* Region modes */ + #define REGMODE_INTER_COPY 0 /* Copy region */ + #define REGMODE_INTER_PRED 1 /* Inter region with motion vectors */ + #define REGMODE_INTRA 2 /* Intra region */ + #define REGMODE_SPLITTED 3 /* Any region mode higher than this indicates that the region + is splitted which means 8x8 block */ + #define REGMODE_INTER_COPY_8x8 4 + #define REGMODE_INTER_PRED_8x8 5 + #define REGMODE_INTRA_8x8 6 + + /* YUV pixel domain image arrays for a video frame */ + typedef struct + { + imgpel *yptr; + imgpel *uptr; + imgpel *vptr; + } frame; + + /* region structure stores information about a region that is needed for concealment */ + typedef struct + { + byte regionMode; /* region mode as above */ + int xMin; /* X coordinate of the pixel position of the top-left corner of the region */ + int yMin; /* Y coordinate of the pixel position of the top-left corner of the region */ + int32 mv[3]; /* motion vectors in 1/4 pixel units: mvx = mv[0], mvy = mv[1], + and ref_frame = mv[2] */ + } objectBuffer_t; + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/errorconcealment.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/errorconcealment.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/errorconcealment.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,242 ---- + + /*! + *********************************************************************** + * \file errorconcealment.c + * + * \brief + * Implements error concealment scheme for H.264 decoder + * + * \date + * 6.10.2000 + * + * \version + * 1.0 + * + * \note + * This simple error concealment implemented in this decoder uses + * the existing dependencies of syntax elements. + * In case that an element is detected as false this elements and all + * dependend elements are marked as elements to conceal in the ec_flag[] + * array. If the decoder requests a new element by the function + * readSyntaxElement_xxxx() this array is checked first if an error concealment has + * to be applied on this element. + * In case that an error occured a concealed element is given to the + * decoding function in macroblock(). + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Sebastian Purreiter + *********************************************************************** + */ + + #include "contributors.h" + #include "global.h" + #include "elements.h" + + static int ec_flag[SE_MAX_ELEMENTS]; //!< array to set errorconcealment + /* + static char SEtypes[][25] = + { + "SE_HEADER", + "SE_PTYPE", + "SE_MBTYPE", + "SE_REFFRAME", + "SE_INTRAPREDMODE", + "SE_MVD", + "SE_CBP_INTRA", + "SE_LUM_DC_INTRA", + "SE_CHR_DC_INTRA", + "SE_LUM_AC_INTRA", + "SE_CHR_AC_INTRA", + "SE_CBP_INTER", + "SE_LUM_DC_INTER", + "SE_CHR_DC_INTER", + "SE_LUM_AC_INTER", + "SE_CHR_AC_INTER", + "SE_DELTA_QUANT_INTER", + "SE_DELTA_QUANT_INTRA", + "SE_BFRAME", + "SE_EOS" + }; + */ + + /*! + *********************************************************************** + * \brief + * set concealment for all elements in same partition + * and dependend syntax elements + * \return + * EC_REQ, elements of same type or depending type need error concealment. \n + * EX_SYNC sync on next header + *********************************************************************** + */ + int set_ec_flag( + int se) //!< type of syntax element to conceal + { + + /* + if (ec_flag[se] == NO_EC) + printf("Error concealment on element %s\n",SEtypes[se]); + */ + switch (se) + { + case SE_HEADER : + ec_flag[SE_HEADER] = EC_REQ; + case SE_PTYPE : + ec_flag[SE_PTYPE] = EC_REQ; + case SE_MBTYPE : + ec_flag[SE_MBTYPE] = EC_REQ; + + case SE_REFFRAME : + ec_flag[SE_REFFRAME] = EC_REQ; + ec_flag[SE_MVD] = EC_REQ; // set all motion vectors to zero length + se = SE_CBP_INTER; // conceal also Inter texture elements + break; + + case SE_INTRAPREDMODE : + ec_flag[SE_INTRAPREDMODE] = EC_REQ; + se = SE_CBP_INTRA; // conceal also Intra texture elements + break; + case SE_MVD : + ec_flag[SE_MVD] = EC_REQ; + se = SE_CBP_INTER; // conceal also Inter texture elements + break; + + default: + break; + } + + switch (se) + { + case SE_CBP_INTRA : + ec_flag[SE_CBP_INTRA] = EC_REQ; + case SE_LUM_DC_INTRA : + ec_flag[SE_LUM_DC_INTRA] = EC_REQ; + case SE_CHR_DC_INTRA : + ec_flag[SE_CHR_DC_INTRA] = EC_REQ; + case SE_LUM_AC_INTRA : + ec_flag[SE_LUM_AC_INTRA] = EC_REQ; + case SE_CHR_AC_INTRA : + ec_flag[SE_CHR_AC_INTRA] = EC_REQ; + break; + + case SE_CBP_INTER : + ec_flag[SE_CBP_INTER] = EC_REQ; + case SE_LUM_DC_INTER : + ec_flag[SE_LUM_DC_INTER] = EC_REQ; + case SE_CHR_DC_INTER : + ec_flag[SE_CHR_DC_INTER] = EC_REQ; + case SE_LUM_AC_INTER : + ec_flag[SE_LUM_AC_INTER] = EC_REQ; + case SE_CHR_AC_INTER : + ec_flag[SE_CHR_AC_INTER] = EC_REQ; + break; + case SE_DELTA_QUANT_INTER : + ec_flag[SE_DELTA_QUANT_INTER] = EC_REQ; + break; + case SE_DELTA_QUANT_INTRA : + ec_flag[SE_DELTA_QUANT_INTRA] = EC_REQ; + break; + default: + break; + + } + return EC_REQ; + } + + /*! + *********************************************************************** + * \brief + * resets EC_Flags called at the start of each slice + * + *********************************************************************** + */ + void reset_ec_flags() + { + int i; + for (i=0; itype] == NO_EC) + return NO_EC; + /* + #if TRACE + printf("TRACE: get concealed element for %s!!!\n", SEtypes[sym->type]); + #endif + */ + switch (sym->type) + { + case SE_HEADER : + sym->len = 31; + sym->inf = 0; // Picture Header + break; + + case SE_PTYPE : // inter_img_1 + case SE_MBTYPE : // set COPY_MB + case SE_REFFRAME : + sym->len = 1; + sym->inf = 0; + break; + + case SE_INTRAPREDMODE : + case SE_MVD : + sym->len = 1; + sym->inf = 0; // set vector to zero length + break; + + case SE_CBP_INTRA : + sym->len = 5; + sym->inf = 0; // codenumber 3 <=> no CBP information for INTRA images + break; + + case SE_LUM_DC_INTRA : + case SE_CHR_DC_INTRA : + case SE_LUM_AC_INTRA : + case SE_CHR_AC_INTRA : + sym->len = 1; + sym->inf = 0; // return EOB + break; + + case SE_CBP_INTER : + sym->len = 1; + sym->inf = 0; // codenumber 1 <=> no CBP information for INTER images + break; + + case SE_LUM_DC_INTER : + case SE_CHR_DC_INTER : + case SE_LUM_AC_INTER : + case SE_CHR_AC_INTER : + sym->len = 1; + sym->inf = 0; // return EOB + break; + + case SE_DELTA_QUANT_INTER: + sym->len = 1; + sym->inf = 0; + break; + case SE_DELTA_QUANT_INTRA: + sym->len = 1; + sym->inf = 0; + break; + default: + break; + } + + return EC_REQ; + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/errorconcealment.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/errorconcealment.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/errorconcealment.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,21 ---- + + + /*! + **************************************************************************** + * \file errorconcealment.h + * + * \brief + * Header file for errorconcealment.c + * + **************************************************************************** + */ + + #ifndef _ERRORCONCEALMENT_H_ + #define _ERRORCONCEALMENT_H_ + + int set_ec_flag(int se); + void reset_ec_flags(); + int get_concealed_element(SyntaxElement *sym); + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/filehandle.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/filehandle.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/filehandle.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,166 ---- + + /*! + ************************************************************************************** + * \file + * filehandle.c + * \brief + * Trace file handling and standard error handling function. + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Suehring + *************************************************************************************** + */ + + #include + + #include "contributors.h" + #include "global.h" + #include "mbuffer.h" + + /*! + ************************************************************************ + * \brief + * Error handling procedure. Print error message to stderr and exit + * with supplied code. + * \param text + * Error message + * \param code + * Exit code + ************************************************************************ + */ + void error(char *text, int code) + { + fprintf(stderr, "%s\n", text); + flush_dpb(); + exit(code); + } + + #if TRACE + + static int bitcounter = 0; + + /*! + ************************************************************************ + * \brief + * Tracing bitpatterns for symbols + * A code word has the following format: 0 Xn...0 X2 0 X1 0 X0 1 + ************************************************************************ + */ + void tracebits( + const char *trace_str, //!< tracing information, char array describing the symbol + int len, //!< length of syntax element in bits + int info, //!< infoword of syntax element + int value1) + { + + int i, chars; + // int outint = 1; + + + if(len>=34) + { + snprintf(errortext, ET_SIZE, "Length argument to put too long for trace to work"); + error (errortext, 600); + } + + + putc('@', p_trace); + chars = fprintf(p_trace, "%i", bitcounter); + while(chars++ < 6) + putc(' ',p_trace); + + chars += fprintf(p_trace, " %s", trace_str); + while(chars++ < 55) + putc(' ',p_trace); + + // Align bitpattern + if(len<15) + { + for(i=0 ; i<15-len ; i++) + fputc(' ', p_trace); + } + + // Print bitpattern + for(i=0 ; i> ((len/2-i)-1))) + fputc('1', p_trace); + else + fputc('0', p_trace); + } + + fprintf(p_trace, " (%3d)\n", value1); + bitcounter += len; + + fflush (p_trace); + + } + + /*! + ************************************************************************ + * \brief + * Tracing bitpatterns + ************************************************************************ + */ + void tracebits2( + const char *trace_str, //!< tracing information, char array describing the symbol + int len, //!< length of syntax element in bits + int info) + { + + int i, chars; + // int outint = 1; + + if(len>=45) + { + snprintf(errortext, ET_SIZE, "Length argument to put too long for trace to work"); + error (errortext, 600); + } + + putc('@', p_trace); + chars = fprintf(p_trace, "%i", bitcounter); + while(chars++ < 6) + putc(' ',p_trace); + chars += fprintf(p_trace, " %s", trace_str); + while(chars++ < 55) + putc(' ',p_trace); + + // Align bitpattern + if(len<15) + for(i=0 ; i<15-len ; i++) + fputc(' ', p_trace); + + + bitcounter += len; + while (len >= 32) + { + for(i=0 ; i<8 ; i++) + { + fputc('0', p_trace); + } + len -= 8; + + } + // Print bitpattern + for(i=0 ; i> (len-i-1))) + fputc('1', p_trace); + else + fputc('0', p_trace); + } + + fprintf(p_trace, " (%3d)\n", info); + + fflush (p_trace); + } + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/fmo.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/fmo.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/fmo.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,551 ---- + + /*! + ***************************************************************************** + * + * \file fmo.c + * + * \brief + * Support for Flexible Macroblock Ordering (FMO) + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger stewe at cs.tu-berlin.de + * - Karsten Suehring suehring at hhi.de + ****************************************************************************** + */ + + #include + #include + #include + #include + + + #include "global.h" + #include "elements.h" + #include "defines.h" + #include "header.h" + #include "fmo.h" + + //#define PRINT_FMO_MAPS + + int *MbToSliceGroupMap = NULL; + int *MapUnitToSliceGroupMap = NULL; + + static int NumberOfSliceGroups; // the number of slice groups -1 (0 == scan order, 7 == maximum) + + static void FmoGenerateType0MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ); + static void FmoGenerateType1MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ); + static void FmoGenerateType2MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ); + static void FmoGenerateType3MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ); + static void FmoGenerateType4MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ); + static void FmoGenerateType5MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ); + static void FmoGenerateType6MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ); + + + /*! + ************************************************************************ + * \brief + * Generates MapUnitToSliceGroupMap + * Has to be called every time a new Picture Parameter Set is used + * + * \param pps + * Picture Parameter set to be used for map generation + * \param sps + * Sequence Parameter set to be used for map generation + * + ************************************************************************ + */ + static int FmoGenerateMapUnitToSliceGroupMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps) + { + unsigned int NumSliceGroupMapUnits; + + NumSliceGroupMapUnits = (sps->pic_height_in_map_units_minus1+1)* (sps->pic_width_in_mbs_minus1+1); + + if (pps->slice_group_map_type == 6) + { + if ((pps->num_slice_group_map_units_minus1+1) != NumSliceGroupMapUnits) + { + error ("wrong pps->num_slice_group_map_units_minus1 for used SPS and FMO type 6", 500); + } + } + + // allocate memory for MapUnitToSliceGroupMap + if (MapUnitToSliceGroupMap) + free (MapUnitToSliceGroupMap); + if ((MapUnitToSliceGroupMap = malloc ((NumSliceGroupMapUnits) * sizeof (int))) == NULL) + { + printf ("cannot allocated %d bytes for MapUnitToSliceGroupMap, exit\n", (pps->num_slice_group_map_units_minus1+1) * sizeof (int)); + exit (-1); + } + + if (pps->num_slice_groups_minus1 == 0) // only one slice group + { + memset (MapUnitToSliceGroupMap, 0, NumSliceGroupMapUnits * sizeof (int)); + return 0; + } + + switch (pps->slice_group_map_type) + { + case 0: + FmoGenerateType0MapUnitMap (pps, sps, NumSliceGroupMapUnits); + break; + case 1: + FmoGenerateType1MapUnitMap (pps, sps, NumSliceGroupMapUnits); + break; + case 2: + FmoGenerateType2MapUnitMap (pps, sps, NumSliceGroupMapUnits); + break; + case 3: + FmoGenerateType3MapUnitMap (pps, sps, NumSliceGroupMapUnits); + break; + case 4: + FmoGenerateType4MapUnitMap (pps, sps, NumSliceGroupMapUnits); + break; + case 5: + FmoGenerateType5MapUnitMap (pps, sps, NumSliceGroupMapUnits); + break; + case 6: + FmoGenerateType6MapUnitMap (pps, sps, NumSliceGroupMapUnits); + break; + default: + printf ("Illegal slice_group_map_type %d , exit \n", pps->slice_group_map_type); + exit (-1); + } + return 0; + } + + + /*! + ************************************************************************ + * \brief + * Generates MbToSliceGroupMap from MapUnitToSliceGroupMap + * + * \param pps + * Picture Parameter set to be used for map generation + * \param sps + * Sequence Parameter set to be used for map generation + * + ************************************************************************ + */ + static int FmoGenerateMbToSliceGroupMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps) + { + unsigned i; + + // allocate memory for MbToSliceGroupMap + if (MbToSliceGroupMap) + free (MbToSliceGroupMap); + + if ((MbToSliceGroupMap = malloc ((img->PicSizeInMbs) * sizeof (int))) == NULL) + { + printf ("cannot allocated %d bytes for MbToSliceGroupMap, exit\n", (img->PicSizeInMbs) * sizeof (int)); + exit (-1); + } + + + if ((sps->frame_mbs_only_flag)|| img->field_pic_flag) + { + for (i=0; iPicSizeInMbs; i++) + { + MbToSliceGroupMap[i] = MapUnitToSliceGroupMap[i]; + } + } + else + if (sps->mb_adaptive_frame_field_flag && (!img->field_pic_flag)) + { + for (i=0; iPicSizeInMbs; i++) + { + MbToSliceGroupMap[i] = MapUnitToSliceGroupMap[i/2]; + } + } + else + { + for (i=0; iPicSizeInMbs; i++) + { + MbToSliceGroupMap[i] = MapUnitToSliceGroupMap[(i/(2*img->PicWidthInMbs))*img->PicWidthInMbs+(i%img->PicWidthInMbs)]; + } + } + return 0; + } + + + /*! + ************************************************************************ + * \brief + * FMO initialization: Generates MapUnitToSliceGroupMap and MbToSliceGroupMap. + * + * \param pps + * Picture Parameter set to be used for map generation + * \param sps + * Sequence Parameter set to be used for map generation + ************************************************************************ + */ + int FmoInit(pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps) + { + #ifdef PRINT_FMO_MAPS + unsigned i,j; + #endif + + FmoGenerateMapUnitToSliceGroupMap(pps, sps); + FmoGenerateMbToSliceGroupMap(pps, sps); + + NumberOfSliceGroups = pps->num_slice_groups_minus1+1; + + #ifdef PRINT_FMO_MAPS + printf("\n"); + printf("FMO Map (Units):\n"); + + for (j=0; jPicHeightInMapUnits; j++) + { + for (i=0; iPicWidthInMbs; i++) + { + printf("%c",48+MapUnitToSliceGroupMap[i+j*img->PicWidthInMbs]); + } + printf("\n"); + } + printf("\n"); + printf("FMO Map (Mb):\n"); + + for (j=0; jPicHeightInMbs; j++) + { + for (i=0; iPicWidthInMbs; i++) + { + printf("%c",48+MbToSliceGroupMap[i+j*img->PicWidthInMbs]); + } + printf("\n"); + } + printf("\n"); + + #endif + + return 0; + } + + + /*! + ************************************************************************ + * \brief + * Free memory allocated by FMO functions + ************************************************************************ + */ + int FmoFinit() + { + if (MbToSliceGroupMap) + { + free (MbToSliceGroupMap); + MbToSliceGroupMap = NULL; + } + if (MapUnitToSliceGroupMap) + { + free (MapUnitToSliceGroupMap); + MapUnitToSliceGroupMap = NULL; + } + return 0; + } + + + /*! + ************************************************************************ + * \brief + * FmoGetNumberOfSliceGroup() + * + * \par Input: + * None + ************************************************************************ + */ + int FmoGetNumberOfSliceGroup() + { + return NumberOfSliceGroups; + } + + + /*! + ************************************************************************ + * \brief + * FmoGetLastMBOfPicture() + * returns the macroblock number of the last MB in a picture. This + * mb happens to be the last macroblock of the picture if there is only + * one slice group + * + * \par Input: + * None + ************************************************************************ + */ + int FmoGetLastMBOfPicture() + { + return FmoGetLastMBInSliceGroup (FmoGetNumberOfSliceGroup()-1); + } + + + /*! + ************************************************************************ + * \brief + * FmoGetLastMBInSliceGroup: Returns MB number of last MB in SG + * + * \par Input: + * SliceGroupID (0 to 7) + ************************************************************************ + */ + + int FmoGetLastMBInSliceGroup (int SliceGroup) + { + int i; + + for (i=img->PicSizeInMbs-1; i>=0; i--) + if (FmoGetSliceGroupId (i) == SliceGroup) + return i; + return -1; + + }; + + + /*! + ************************************************************************ + * \brief + * Returns SliceGroupID for a given MB + * + * \param mb + * Macroblock number (in scan order) + ************************************************************************ + */ + int FmoGetSliceGroupId (int mb) + { + assert (mb < (int)img->PicSizeInMbs); + assert (MbToSliceGroupMap != NULL); + return MbToSliceGroupMap[mb]; + } + + + /*! + ************************************************************************ + * \brief + * FmoGetNextMBBr: Returns the MB-Nr (in scan order) of the next + * MB in the (scattered) Slice, -1 if the slice is finished + * + * \param CurrentMbNr + * number of the current macroblock + ************************************************************************ + */ + int FmoGetNextMBNr (int CurrentMbNr) + { + int SliceGroup = FmoGetSliceGroupId (CurrentMbNr); + + while (++CurrentMbNr<(int)img->PicSizeInMbs && MbToSliceGroupMap [CurrentMbNr] != SliceGroup) + ; + + if (CurrentMbNr >= (int)img->PicSizeInMbs) + return -1; // No further MB in this slice (could be end of picture) + else + return CurrentMbNr; + } + + + /*! + ************************************************************************ + * \brief + * Generate interleaved slice group map type MapUnit map (type 0) + * + ************************************************************************ + */ + static void FmoGenerateType0MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ) + { + unsigned iGroup, j; + unsigned i = 0; + do + { + for( iGroup = 0; + (iGroup <= pps->num_slice_groups_minus1) && (i < PicSizeInMapUnits); + i += pps->run_length_minus1[iGroup++] + 1 ) + { + for( j = 0; j <= pps->run_length_minus1[ iGroup ] && i + j < PicSizeInMapUnits; j++ ) + MapUnitToSliceGroupMap[i+j] = iGroup; + } + } + while( i < PicSizeInMapUnits ); + } + + + /*! + ************************************************************************ + * \brief + * Generate dispersed slice group map type MapUnit map (type 1) + * + ************************************************************************ + */ + static void FmoGenerateType1MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ) + { + unsigned i; + for( i = 0; i < PicSizeInMapUnits; i++ ) + { + MapUnitToSliceGroupMap[i] = ((i%img->PicWidthInMbs)+(((i/img->PicWidthInMbs)*(pps->num_slice_groups_minus1+1))/2)) + %(pps->num_slice_groups_minus1+1); + } + } + + /*! + ************************************************************************ + * \brief + * Generate foreground with left-over slice group map type MapUnit map (type 2) + * + ************************************************************************ + */ + static void FmoGenerateType2MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ) + { + int iGroup; + unsigned i, x, y; + unsigned yTopLeft, xTopLeft, yBottomRight, xBottomRight; + + for( i = 0; i < PicSizeInMapUnits; i++ ) + MapUnitToSliceGroupMap[ i ] = pps->num_slice_groups_minus1; + + for( iGroup = pps->num_slice_groups_minus1 - 1 ; iGroup >= 0; iGroup-- ) + { + yTopLeft = pps->top_left[ iGroup ] / img->PicWidthInMbs; + xTopLeft = pps->top_left[ iGroup ] % img->PicWidthInMbs; + yBottomRight = pps->bottom_right[ iGroup ] / img->PicWidthInMbs; + xBottomRight = pps->bottom_right[ iGroup ] % img->PicWidthInMbs; + for( y = yTopLeft; y <= yBottomRight; y++ ) + for( x = xTopLeft; x <= xBottomRight; x++ ) + MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ] = iGroup; + } + } + + + /*! + ************************************************************************ + * \brief + * Generate box-out slice group map type MapUnit map (type 3) + * + ************************************************************************ + */ + static void FmoGenerateType3MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ) + { + unsigned i, k; + int leftBound, topBound, rightBound, bottomBound; + int x, y, xDir, yDir; + int mapUnitVacant; + + unsigned mapUnitsInSliceGroup0 = min((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits); + + for( i = 0; i < PicSizeInMapUnits; i++ ) + MapUnitToSliceGroupMap[ i ] = 2; + + x = ( img->PicWidthInMbs - pps->slice_group_change_direction_flag ) / 2; + y = ( img->PicHeightInMapUnits - pps->slice_group_change_direction_flag ) / 2; + + leftBound = x; + topBound = y; + rightBound = x; + bottomBound = y; + + xDir = pps->slice_group_change_direction_flag - 1; + yDir = pps->slice_group_change_direction_flag; + + for( k = 0; k < PicSizeInMapUnits; k += mapUnitVacant ) + { + mapUnitVacant = ( MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ] == 2 ); + if( mapUnitVacant ) + MapUnitToSliceGroupMap[ y * img->PicWidthInMbs + x ] = ( k >= mapUnitsInSliceGroup0 ); + + if( xDir == -1 && x == leftBound ) + { + leftBound = max( leftBound - 1, 0 ); + x = leftBound; + xDir = 0; + yDir = 2 * pps->slice_group_change_direction_flag - 1; + } + else + if( xDir == 1 && x == rightBound ) + { + rightBound = min( rightBound + 1, (int)img->PicWidthInMbs - 1 ); + x = rightBound; + xDir = 0; + yDir = 1 - 2 * pps->slice_group_change_direction_flag; + } + else + if( yDir == -1 && y == topBound ) + { + topBound = max( topBound - 1, 0 ); + y = topBound; + xDir = 1 - 2 * pps->slice_group_change_direction_flag; + yDir = 0; + } + else + if( yDir == 1 && y == bottomBound ) + { + bottomBound = min( bottomBound + 1, (int)img->PicHeightInMapUnits - 1 ); + y = bottomBound; + xDir = 2 * pps->slice_group_change_direction_flag - 1; + yDir = 0; + } + else + { + x = x + xDir; + y = y + yDir; + } + } + + } + + /*! + ************************************************************************ + * \brief + * Generate raster scan slice group map type MapUnit map (type 4) + * + ************************************************************************ + */ + static void FmoGenerateType4MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ) + { + + unsigned mapUnitsInSliceGroup0 = min((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits); + unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0; + + unsigned i; + + for( i = 0; i < PicSizeInMapUnits; i++ ) + if( i < sizeOfUpperLeftGroup ) + MapUnitToSliceGroupMap[ i ] = pps->slice_group_change_direction_flag; + else + MapUnitToSliceGroupMap[ i ] = 1 - pps->slice_group_change_direction_flag; + + } + + /*! + ************************************************************************ + * \brief + * Generate wipe slice group map type MapUnit map (type 5) + * + ************************************************************************ + */ + static void FmoGenerateType5MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ) + { + + unsigned mapUnitsInSliceGroup0 = min((pps->slice_group_change_rate_minus1 + 1) * img->slice_group_change_cycle, PicSizeInMapUnits); + unsigned sizeOfUpperLeftGroup = pps->slice_group_change_direction_flag ? ( PicSizeInMapUnits - mapUnitsInSliceGroup0 ) : mapUnitsInSliceGroup0; + + unsigned i,j, k = 0; + + for( j = 0; j < img->PicWidthInMbs; j++ ) + for( i = 0; i < img->PicHeightInMapUnits; i++ ) + if( k++ < sizeOfUpperLeftGroup ) + MapUnitToSliceGroupMap[ i * img->PicWidthInMbs + j ] = 1 - pps->slice_group_change_direction_flag; + else + MapUnitToSliceGroupMap[ i * img->PicWidthInMbs + j ] = pps->slice_group_change_direction_flag; + + } + + /*! + ************************************************************************ + * \brief + * Generate explicit slice group map type MapUnit map (type 6) + * + ************************************************************************ + */ + static void FmoGenerateType6MapUnitMap (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps, unsigned PicSizeInMapUnits ) + { + unsigned i; + for (i=0; islice_group_id[i]; + } + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/fmo.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/fmo.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/fmo.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,30 ---- + + /*! + *************************************************************************** + * + * \file fmo.h + * + * \brief + * Support for Flexilble Macroblock Ordering (FMO) + * + * \date + * 19 June, 2002 + * + * \author + * Stephan Wenger stewe at cs.tu-berlin.de + **************************************************************************/ + + #ifndef _FMO_H_ + #define _FMO_H_ + + + int FmoInit (pic_parameter_set_rbsp_t* pps, seq_parameter_set_rbsp_t* sps); + int FmoFinit (); + + int FmoGetNumberOfSliceGroup(); + int FmoGetLastMBOfPicture(); + int FmoGetLastMBInSliceGroup(int SliceGroup); + int FmoGetSliceGroupId (int mb); + int FmoGetNextMBNr (int CurrentMbNr); + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/global.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/global.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/global.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,815 ---- + + /*! + ************************************************************************ + * \file + * global.h + * \brief + * global definitions for for H.264 decoder. + * \author + * Copyright (C) 1999 Telenor Satellite Services,Norway + * Ericsson Radio Systems, Sweden + * + * Inge Lille-Langoy + * + * Telenor Satellite Services + * Keysers gt.13 tel.: +47 23 13 86 98 + * N-0130 Oslo,Norway fax.: +47 22 77 79 80 + * + * Rickard Sjoberg + * + * Ericsson Radio Systems + * KI/ERA/T/VV + * 164 80 Stockholm, Sweden + * + ************************************************************************ + */ + #ifndef _GLOBAL_H_ + #define _GLOBAL_H_ + + #include //!< for FILE + #include + #include + #include "defines.h" + #include "parsetcommon.h" + + #ifdef WIN32 + #define snprintf _snprintf + #define open _open + #define close _close + #define read _read + #define write _write + #define lseek _lseeki64 + #define fsync _commit + #define OPENFLAGS_WRITE _O_WRONLY|_O_CREAT|_O_BINARY|_O_TRUNC + #define OPEN_PERMISSIONS _S_IREAD | _S_IWRITE + #define OPENFLAGS_READ _O_RDONLY|_O_BINARY + #else + #define OPENFLAGS_WRITE O_WRONLY|O_CREAT|O_TRUNC + #define OPENFLAGS_READ O_RDONLY + #define OPEN_PERMISSIONS S_IRUSR | S_IWUSR + #endif + + + typedef unsigned char byte; //!< 8 bit unsigned + typedef int int32; + typedef unsigned int u_int32; + + #define imgpel unsigned short + + #if defined(WIN32) && !defined(__GNUC__) + typedef __int64 int64; + #ifndef INT64_MIN + # define INT64_MIN (-9223372036854775807i64 - 1i64) + #endif + #else + typedef long long int64; + #ifndef INT64_MIN + # define INT64_MIN (-9223372036854775807LL - 1LL) + #endif + #endif + + + pic_parameter_set_rbsp_t *active_pps; + seq_parameter_set_rbsp_t *active_sps; + + // global picture format dependend buffers, mem allocation in decod.c ****************** + int **refFrArr; //!< Array for reference frames of each block + + imgpel **imgY_ref; //!< reference frame find snr + imgpel ***imgUV_ref; + + int ReMapRef[20]; + // B pictures + int Bframe_ctr; + int frame_no; + + int g_nFrame; + + // For MB level frame/field coding + int TopFieldForSkip_Y[16][16]; + int TopFieldForSkip_UV[2][16][16]; + + int InvLevelScale4x4Luma_Intra[6][4][4]; + int InvLevelScale4x4Chroma_Intra[2][6][4][4]; + + int InvLevelScale4x4Luma_Inter[6][4][4]; + int InvLevelScale4x4Chroma_Inter[2][6][4][4]; + + int InvLevelScale8x8Luma_Intra[6][8][8]; + + int InvLevelScale8x8Luma_Inter[6][8][8]; + + int *qmatrix[8]; + + #define ET_SIZE 300 //!< size of error text buffer + char errortext[ET_SIZE]; //!< buffer for error message for exit with error() + + /*********************************************************************** + * T y p e d e f i n i t i o n s f o r T M L + *********************************************************************** + */ + + //! Data Partitioning Modes + typedef enum + { + PAR_DP_1, //!< no data partitioning is supported + PAR_DP_3, //!< data partitioning with 3 partitions + } PAR_DP_TYPE; + + + //! Output File Types + typedef enum + { + PAR_OF_ANNEXB, //!< Current TML description + PAR_OF_RTP, //!< RTP Packet Output format + // PAR_OF_IFF //!< Interim File Format + } PAR_OF_TYPE; + + //! Boolean Type + /*typedef enum { + FALSE, + TRUE + } Boolean; + */ + //! definition of H.264 syntax elements + typedef enum { + SE_HEADER, + SE_PTYPE, + SE_MBTYPE, + SE_REFFRAME, + SE_INTRAPREDMODE, + SE_MVD, + SE_CBP_INTRA, + SE_LUM_DC_INTRA, + SE_CHR_DC_INTRA, + SE_LUM_AC_INTRA, + SE_CHR_AC_INTRA, + SE_CBP_INTER, + SE_LUM_DC_INTER, + SE_CHR_DC_INTER, + SE_LUM_AC_INTER, + SE_CHR_AC_INTER, + SE_DELTA_QUANT_INTER, + SE_DELTA_QUANT_INTRA, + SE_BFRAME, + SE_EOS, + SE_MAX_ELEMENTS //!< number of maximum syntax elements, this MUST be the last one! + } SE_type; // substituting the definitions in element.h + + + typedef enum { + INTER_MB, + INTRA_MB_4x4, + INTRA_MB_16x16 + } IntraInterDecision; + + typedef enum { + BITS_TOTAL_MB, + BITS_HEADER_MB, + BITS_INTER_MB, + BITS_CBP_MB, + BITS_COEFF_Y_MB, + BITS_COEFF_UV_MB, + MAX_BITCOUNTER_MB + } BitCountType; + + typedef enum { + NO_SLICES, + FIXED_MB, + FIXED_RATE, + CALLBACK, + FMO + } SliceMode; + + + typedef enum { + UVLC, + CABAC + } SymbolMode; + + typedef enum { + LIST_0=0, + LIST_1=1 + } Lists; + + + typedef enum { + FRAME, + TOP_FIELD, + BOTTOM_FIELD + } PictureStructure; //!< New enum for field processing + + + typedef enum { + P_SLICE = 0, + B_SLICE, + I_SLICE, + SP_SLICE, + SI_SLICE + } SliceType; + + /*********************************************************************** + * D a t a t y p e s f o r C A B A C + *********************************************************************** + */ + + //! struct to characterize the state of the arithmetic coding engine + typedef struct + { + unsigned int Dlow, Drange; + unsigned int Dvalue; + unsigned int Dbuffer; + int Dbits_to_go; + byte *Dcodestrm; + int *Dcodestrm_len; + } DecodingEnvironment; + + typedef DecodingEnvironment *DecodingEnvironmentPtr; + + //! struct for context management + typedef struct + { + unsigned short state; // index into state-table CP + unsigned char MPS; // Least Probable Symbol 0/1 CP + } BiContextType; + + typedef BiContextType *BiContextTypePtr; + + + /********************************************************************** + * C O N T E X T S F O R T M L S Y N T A X E L E M E N T S + ********************************************************************** + */ + + #define NUM_MB_TYPE_CTX 11 + #define NUM_B8_TYPE_CTX 9 + #define NUM_MV_RES_CTX 10 + #define NUM_REF_NO_CTX 6 + #define NUM_DELTA_QP_CTX 4 + #define NUM_MB_AFF_CTX 4 + #define NUM_TRANSFORM_SIZE_CTX 3 + + + typedef struct + { + BiContextType mb_type_contexts [4][NUM_MB_TYPE_CTX]; + BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX]; + BiContextType mv_res_contexts [2][NUM_MV_RES_CTX]; + BiContextType ref_no_contexts [2][NUM_REF_NO_CTX]; + BiContextType delta_qp_contexts[NUM_DELTA_QP_CTX]; + BiContextType mb_aff_contexts [NUM_MB_AFF_CTX]; + BiContextType transform_size_contexts [NUM_TRANSFORM_SIZE_CTX]; + + } MotionInfoContexts; + + #define NUM_IPR_CTX 2 + #define NUM_CIPR_CTX 4 + #define NUM_CBP_CTX 4 + #define NUM_BCBP_CTX 4 + #define NUM_MAP_CTX 15 + #define NUM_LAST_CTX 15 + #define NUM_ONE_CTX 5 + #define NUM_ABS_CTX 5 + + + typedef struct + { + BiContextType ipr_contexts [NUM_IPR_CTX]; + BiContextType cipr_contexts[NUM_CIPR_CTX]; + BiContextType cbp_contexts [3][NUM_CBP_CTX]; + BiContextType bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX]; + BiContextType map_contexts [NUM_BLOCK_TYPES][NUM_MAP_CTX]; + BiContextType last_contexts[NUM_BLOCK_TYPES][NUM_LAST_CTX]; + BiContextType one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX]; + BiContextType abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX]; + BiContextType fld_map_contexts [NUM_BLOCK_TYPES][NUM_MAP_CTX]; + BiContextType fld_last_contexts[NUM_BLOCK_TYPES][NUM_LAST_CTX]; + } TextureInfoContexts; + + + //*********************** end of data type definition for CABAC ******************* + + /*********************************************************************** + * N e w D a t a t y p e s f o r T M L + *********************************************************************** + */ + + struct img_par; + struct inp_par; + struct stat_par; + + /*! Buffer structure for decoded referenc picture marking commands */ + typedef struct DecRefPicMarking_s + { + int memory_management_control_operation; + int difference_of_pic_nums_minus1; + int long_term_pic_num; + int long_term_frame_idx; + int max_long_term_frame_idx_plus1; + struct DecRefPicMarking_s *Next; + } DecRefPicMarking_t; + + //! Syntaxelement + typedef struct syntaxelement + { + int type; //!< type of syntax element for data part. + int value1; //!< numerical value of syntax element + int value2; //!< for blocked symbols, e.g. run/level + int len; //!< length of code + int inf; //!< info part of UVLC code + unsigned int bitpattern; //!< UVLC bitpattern + int context; //!< CABAC context + int k; //!< CABAC context for coeff_count,uv + + #if TRACE + #define TRACESTRING_SIZE 100 //!< size of trace string + char tracestring[TRACESTRING_SIZE]; //!< trace string + #endif + + //! for mapping of UVLC to syntaxElement + void (*mapping)(int len, int info, int *value1, int *value2); + //! used for CABAC: refers to actual coding method of each individual syntax element type + void (*reading)(struct syntaxelement *, struct inp_par *, struct img_par *, DecodingEnvironmentPtr); + + } SyntaxElement; + + //! Macroblock + typedef struct macroblock + { + int qp; + int slice_nr; + int delta_quant; //!< for rate control + + struct macroblock *mb_available_up; //!< pointer to neighboring MB (CABAC) + struct macroblock *mb_available_left; //!< pointer to neighboring MB (CABAC) + + // some storage of macroblock syntax elements for global access + int mb_type; + int mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2]; //!< indices correspond to [forw,backw][block_y][block_x][x,y] + int cbp; + int64 cbp_blk ; + int64 cbp_bits; + + int is_skip; + + int i16mode; + int b8mode[4]; + int b8pdir[4]; + int ei_flag; + + int LFDisableIdc; + int LFAlphaC0Offset; + int LFBetaOffset; + + int c_ipred_mode; //!< chroma intra prediction mode + int mb_field; + + int skip_flag; + + int mbAddrA, mbAddrB, mbAddrC, mbAddrD; + int mbAvailA, mbAvailB, mbAvailC, mbAvailD; + + int luma_transform_size_8x8_flag; + int NoMbPartLessThan8x8Flag; + } Macroblock; + + //! Bitstream + typedef struct + { + // CABAC Decoding + int read_len; //!< actual position in the codebuffer, CABAC only + int code_len; //!< overall codebuffer length, CABAC only + // UVLC Decoding + int frame_bitoffset; //!< actual position in the codebuffer, bit-oriented, UVLC only + int bitstream_length; //!< over codebuffer lnegth, byte oriented, UVLC only + // ErrorConcealment + byte *streamBuffer; //!< actual codebuffer for read bytes + int ei_flag; //!< error indication, 0: no error, else unspecified error + } Bitstream; + + //! DataPartition + typedef struct datapartition + { + + Bitstream *bitstream; + DecodingEnvironment de_cabac; + + int (*readSyntaxElement)(SyntaxElement *, struct img_par *, struct inp_par *, struct datapartition *); + /*!< virtual function; + actual method depends on chosen data partition and + entropy coding method */ + } DataPartition; + + //! Slice + typedef struct + { + int ei_flag; //!< 0 if the partArr[0] contains valid information + int qp; + int slice_qp_delta; + int picture_type; //!< picture type + PictureStructure structure; //!< Identify picture structure type + int start_mb_nr; //!< MUST be set by NAL even in case of ei_flag == 1 + int max_part_nr; + int dp_mode; //!< data partioning mode + int next_header; + // int last_mb_nr; //!< only valid when entropy coding == CABAC + DataPartition *partArr; //!< array of partitions + MotionInfoContexts *mot_ctx; //!< pointer to struct of context models for use in CABAC + TextureInfoContexts *tex_ctx; //!< pointer to struct of context models for use in CABAC + + int ref_pic_list_reordering_flag_l0; + int *reordering_of_pic_nums_idc_l0; + int *abs_diff_pic_num_minus1_l0; + int *long_term_pic_idx_l0; + int ref_pic_list_reordering_flag_l1; + int *reordering_of_pic_nums_idc_l1; + int *abs_diff_pic_num_minus1_l1; + int *long_term_pic_idx_l1; + + int (*readSlice)(struct img_par *, struct inp_par *); + + int LFDisableIdc; //!< Disable loop filter on slice + int LFAlphaC0Offset; //!< Alpha and C0 offset for filtering slice + int LFBetaOffset; //!< Beta offset for filtering slice + + int pic_parameter_set_id; //! + #include + #include + #include + + #include "global.h" + #include "elements.h" + #include "defines.h" + #include "fmo.h" + #include "vlc.h" + #include "mbuffer.h" + #include "header.h" + + #include "ctx_tables.h" + + extern StorablePicture *dec_picture; + + #if TRACE + #define SYMTRACESTRING(s) strncpy(sym.tracestring,s,TRACESTRING_SIZE) + #else + #define SYMTRACESTRING(s) // to nothing + #endif + + extern int UsedBits; + + static void ref_pic_list_reordering(); + static void pred_weight_table(); + + + /*! + ************************************************************************ + * \brief + * calculate Ceil(Log2(uiVal)) + ************************************************************************ + */ + unsigned CeilLog2( unsigned uiVal) + { + unsigned uiTmp = uiVal-1; + unsigned uiRet = 0; + + while( uiTmp != 0 ) + { + uiTmp >>= 1; + uiRet++; + } + return uiRet; + } + + + /*! + ************************************************************************ + * \brief + * read the first part of the header (only the pic_parameter_set_id) + * \return + * Length of the first part of the slice header (in bits) + ************************************************************************ + */ + int FirstPartOfSliceHeader() + { + Slice *currSlice = img->currentSlice; + int dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + int tmp; + + UsedBits= partition->bitstream->frame_bitoffset; // was hardcoded to 31 for previous start-code. This is better. + + // Get first_mb_in_slice + currSlice->start_mb_nr = ue_v ("SH: first_mb_in_slice", currStream); + + tmp = ue_v ("SH: slice_type", currStream); + + if (tmp>4) tmp -=5; + + img->type = currSlice->picture_type = (SliceType) tmp; + + currSlice->pic_parameter_set_id = ue_v ("SH: pic_parameter_set_id", currStream); + + return UsedBits; + } + + /*! + ************************************************************************ + * \brief + * read the scond part of the header (without the pic_parameter_set_id + * \return + * Length of the second part of the Slice header in bits + ************************************************************************ + */ + int RestOfSliceHeader() + { + Slice *currSlice = img->currentSlice; + int dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + + int val, len; + + img->frame_num = u_v (active_sps->log2_max_frame_num_minus4 + 4, "SH: frame_num", currStream); + + /* Tian Dong: frame_num gap processing, if found */ + if (img->idr_flag) + { + img->pre_frame_num = img->frame_num; + // picture error concealment + img->last_ref_pic_poc = 0; + assert(img->frame_num == 0); + } + + if (active_sps->frame_mbs_only_flag) + { + img->structure = FRAME; + img->field_pic_flag=0; + } + else + { + // field_pic_flag u(1) + img->field_pic_flag = u_1("SH: field_pic_flag", currStream); + if (img->field_pic_flag) + { + // bottom_field_flag u(1) + img->bottom_field_flag = u_1("SH: bottom_field_flag", currStream); + + img->structure = img->bottom_field_flag ? BOTTOM_FIELD : TOP_FIELD; + } + else + { + img->structure = FRAME; + img->bottom_field_flag=0; + } + } + + currSlice->structure = img->structure; + + img->MbaffFrameFlag=(active_sps->mb_adaptive_frame_field_flag && (img->field_pic_flag==0)); + + if (img->structure == FRAME ) assert (img->field_pic_flag == 0); + if (img->structure == TOP_FIELD ) assert (img->field_pic_flag == 1 && img->bottom_field_flag == 0); + if (img->structure == BOTTOM_FIELD) assert (img->field_pic_flag == 1 && img->bottom_field_flag == 1); + + if (img->idr_flag) + { + img->idr_pic_id = ue_v("SH: idr_pic_id", currStream); + } + + if (active_sps->pic_order_cnt_type == 0) + { + img->pic_order_cnt_lsb = u_v(active_sps->log2_max_pic_order_cnt_lsb_minus4 + 4, "SH: pic_order_cnt_lsb", currStream); + if( active_pps->pic_order_present_flag == 1 && !img->field_pic_flag ) + img->delta_pic_order_cnt_bottom = se_v("SH: delta_pic_order_cnt_bottom", currStream); + else + img->delta_pic_order_cnt_bottom = 0; + } + if( active_sps->pic_order_cnt_type == 1 && !active_sps->delta_pic_order_always_zero_flag ) + { + img->delta_pic_order_cnt[ 0 ] = se_v("SH: delta_pic_order_cnt[0]", currStream); + if( active_pps->pic_order_present_flag == 1 && !img->field_pic_flag ) + img->delta_pic_order_cnt[ 1 ] = se_v("SH: delta_pic_order_cnt[1]", currStream); + }else + { + if (active_sps->pic_order_cnt_type == 1) + { + img->delta_pic_order_cnt[ 0 ] = 0; + img->delta_pic_order_cnt[ 1 ] = 0; + } + } + + //! redundant_pic_cnt is missing here + if (active_pps->redundant_pic_cnt_present_flag) + { + img->redundant_pic_cnt = ue_v ("SH: redundant_pic_cnt", currStream); + } + + if(img->type==B_SLICE) + { + img->direct_spatial_mv_pred_flag = u_1 ("SH: direct_spatial_mv_pred_flag", currStream); + } + + img->num_ref_idx_l0_active = active_pps->num_ref_idx_l0_active_minus1 + 1; + img->num_ref_idx_l1_active = active_pps->num_ref_idx_l1_active_minus1 + 1; + + if(img->type==P_SLICE || img->type == SP_SLICE || img->type==B_SLICE) + { + val = u_1 ("SH: num_ref_idx_override_flag", currStream); + if (val) + { + img->num_ref_idx_l0_active = 1 + ue_v ("SH: num_ref_idx_l0_active_minus1", currStream); + + if(img->type==B_SLICE) + { + img->num_ref_idx_l1_active = 1 + ue_v ("SH: num_ref_idx_l1_active_minus1", currStream); + } + } + } + if (img->type!=B_SLICE) + { + img->num_ref_idx_l1_active = 0; + } + + ref_pic_list_reordering(); + + img->apply_weights = ((active_pps->weighted_pred_flag && (currSlice->picture_type == P_SLICE || currSlice->picture_type == SP_SLICE) ) + || ((active_pps->weighted_bipred_idc > 0 ) && (currSlice->picture_type == B_SLICE))); + + if ((active_pps->weighted_pred_flag&&(img->type==P_SLICE|| img->type == SP_SLICE))|| + (active_pps->weighted_bipred_idc==1 && (img->type==B_SLICE))) + { + pred_weight_table(); + } + + if (img->nal_reference_idc) + dec_ref_pic_marking(currStream); + + if (active_pps->entropy_coding_mode_flag && img->type!=I_SLICE && img->type!=SI_SLICE) + { + img->model_number = ue_v("SH: cabac_init_idc", currStream); + } + else + { + img->model_number = 0; + } + + val = se_v("SH: slice_qp_delta", currStream); + currSlice->qp = img->qp = 26 + active_pps->pic_init_qp_minus26 + val; + + + currSlice->slice_qp_delta = val; + + if(img->type==SP_SLICE || img->type == SI_SLICE) + { + if(img->type==SP_SLICE) + { + img->sp_switch = u_1 ("SH: sp_for_switch_flag", currStream); + } + val = se_v("SH: slice_qs_delta", currStream); + img->qpsp = 26 + active_pps->pic_init_qs_minus26 + val; + } + + if (active_pps->deblocking_filter_control_present_flag) + { + currSlice->LFDisableIdc = ue_v ("SH: disable_deblocking_filter_idc", currStream); + + if (currSlice->LFDisableIdc!=1) + { + currSlice->LFAlphaC0Offset = 2 * se_v("SH: slice_alpha_c0_offset_div2", currStream); + currSlice->LFBetaOffset = 2 * se_v("SH: slice_beta_offset_div2", currStream); + } + else + { + currSlice->LFAlphaC0Offset = currSlice->LFBetaOffset = 0; + } + } + else + { + currSlice->LFDisableIdc = currSlice->LFAlphaC0Offset = currSlice->LFBetaOffset = 0; + } + + if (active_pps->num_slice_groups_minus1>0 && active_pps->slice_group_map_type>=3 && + active_pps->slice_group_map_type<=5) + { + len = (active_sps->pic_height_in_map_units_minus1+1)*(active_sps->pic_width_in_mbs_minus1+1)/ + (active_pps->slice_group_change_rate_minus1+1); + if (((active_sps->pic_height_in_map_units_minus1+1)*(active_sps->pic_width_in_mbs_minus1+1))% + (active_pps->slice_group_change_rate_minus1+1)) + len +=1; + + len = CeilLog2(len+1); + + img->slice_group_change_cycle = u_v (len, "SH: slice_group_change_cycle", currStream); + } + img->PicHeightInMbs = img->FrameHeightInMbs / ( 1 + img->field_pic_flag ); + img->PicSizeInMbs = img->PicWidthInMbs * img->PicHeightInMbs; + img->FrameSizeInMbs = img->PicWidthInMbs * img->FrameHeightInMbs; + + return UsedBits; + } + + + /*! + ************************************************************************ + * \brief + * read the reference picture reordering information + ************************************************************************ + */ + static void ref_pic_list_reordering() + { + Slice *currSlice = img->currentSlice; + int dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + int i, val; + + alloc_ref_pic_list_reordering_buffer(currSlice); + + if (img->type!=I_SLICE && img->type!=SI_SLICE) + { + val = currSlice->ref_pic_list_reordering_flag_l0 = u_1 ("SH: ref_pic_list_reordering_flag_l0", currStream); + + if (val) + { + i=0; + do + { + val = currSlice->reordering_of_pic_nums_idc_l0[i] = ue_v("SH: reordering_of_pic_nums_idc_l0", currStream); + if (val==0 || val==1) + { + currSlice->abs_diff_pic_num_minus1_l0[i] = ue_v("SH: abs_diff_pic_num_minus1_l0", currStream); + } + else + { + if (val==2) + { + currSlice->long_term_pic_idx_l0[i] = ue_v("SH: long_term_pic_idx_l0", currStream); + } + } + i++; + // assert (i>img->num_ref_idx_l0_active); + } while (val != 3); + } + } + + if (img->type==B_SLICE) + { + val = currSlice->ref_pic_list_reordering_flag_l1 = u_1 ("SH: ref_pic_list_reordering_flag_l1", currStream); + + if (val) + { + i=0; + do + { + val = currSlice->reordering_of_pic_nums_idc_l1[i] = ue_v("SH: reordering_of_pic_nums_idc_l1", currStream); + if (val==0 || val==1) + { + currSlice->abs_diff_pic_num_minus1_l1[i] = ue_v("SH: abs_diff_pic_num_minus1_l1", currStream); + } + else + { + if (val==2) + { + currSlice->long_term_pic_idx_l1[i] = ue_v("SH: long_term_pic_idx_l1", currStream); + } + } + i++; + // assert (i>img->num_ref_idx_l1_active); + } while (val != 3); + } + } + } + + /*! + ************************************************************************ + * \brief + * read the weighted prediction tables + ************************************************************************ + */ + static void pred_weight_table() + { + Slice *currSlice = img->currentSlice; + int dP_nr = assignSE2partition[currSlice->dp_mode][SE_HEADER]; + DataPartition *partition = &(currSlice->partArr[dP_nr]); + Bitstream *currStream = partition->bitstream; + int luma_weight_flag_l0, luma_weight_flag_l1, chroma_weight_flag_l0, chroma_weight_flag_l1; + int i,j; + + img->luma_log2_weight_denom = ue_v ("SH: luma_log2_weight_denom", currStream); + img->wp_round_luma = img->luma_log2_weight_denom ? 1<<(img->luma_log2_weight_denom - 1): 0; + + if ( 0 != active_sps->chroma_format_idc) + { + img->chroma_log2_weight_denom = ue_v ("SH: chroma_log2_weight_denom", currStream); + img->wp_round_chroma = img->chroma_log2_weight_denom ? 1<<(img->chroma_log2_weight_denom - 1): 0; + } + + reset_wp_params(img); + + for (i=0; inum_ref_idx_l0_active; i++) + { + luma_weight_flag_l0 = u_1("SH: luma_weight_flag_l0", currStream); + + if (luma_weight_flag_l0) + { + img->wp_weight[0][i][0] = se_v ("SH: luma_weight_l0", currStream); + img->wp_offset[0][i][0] = se_v ("SH: luma_offset_l0", currStream); + } + else + { + img->wp_weight[0][i][0] = 1<luma_log2_weight_denom; + img->wp_offset[0][i][0] = 0; + } + + if (active_sps->chroma_format_idc != 0) + { + chroma_weight_flag_l0 = u_1 ("SH: chroma_weight_flag_l0", currStream); + + for (j=1; j<3; j++) + { + if (chroma_weight_flag_l0) + { + img->wp_weight[0][i][j] = se_v("SH: chroma_weight_l0", currStream); + img->wp_offset[0][i][j] = se_v("SH: chroma_offset_l0", currStream); + } + else + { + img->wp_weight[0][i][j] = 1<chroma_log2_weight_denom; + img->wp_offset[0][i][j] = 0; + } + } + } + } + if ((img->type == B_SLICE) && active_pps->weighted_bipred_idc == 1) + { + for (i=0; inum_ref_idx_l1_active; i++) + { + luma_weight_flag_l1 = u_1("SH: luma_weight_flag_l1", currStream); + + if (luma_weight_flag_l1) + { + img->wp_weight[1][i][0] = se_v ("SH: luma_weight_l1", currStream); + img->wp_offset[1][i][0] = se_v ("SH: luma_offset_l1", currStream); + } + else + { + img->wp_weight[1][i][0] = 1<luma_log2_weight_denom; + img->wp_offset[1][i][0] = 0; + } + + if (active_sps->chroma_format_idc != 0) + { + chroma_weight_flag_l1 = u_1 ("SH: chroma_weight_flag_l1", currStream); + + for (j=1; j<3; j++) + { + if (chroma_weight_flag_l1) + { + img->wp_weight[1][i][j] = se_v("SH: chroma_weight_l1", currStream); + img->wp_offset[1][i][j] = se_v("SH: chroma_offset_l1", currStream); + } + else + { + img->wp_weight[1][i][j] = 1<chroma_log2_weight_denom; + img->wp_offset[1][i][j] = 0; + } + } + } + } + } + } + + + /*! + ************************************************************************ + * \brief + * read the memory control operations + ************************************************************************ + */ + void dec_ref_pic_marking(Bitstream *currStream) + { + int val; + + DecRefPicMarking_t *tmp_drpm,*tmp_drpm2; + + // free old buffer content + while (img->dec_ref_pic_marking_buffer) + { + tmp_drpm=img->dec_ref_pic_marking_buffer; + + img->dec_ref_pic_marking_buffer=tmp_drpm->Next; + free (tmp_drpm); + } + + if (img->idr_flag) + { + img->no_output_of_prior_pics_flag = u_1("SH: no_output_of_prior_pics_flag", currStream); + img->long_term_reference_flag = u_1("SH: long_term_reference_flag", currStream); + } + else + { + img->adaptive_ref_pic_buffering_flag = u_1("SH: adaptive_ref_pic_buffering_flag", currStream); + if (img->adaptive_ref_pic_buffering_flag) + { + // read Memory Management Control Operation + do + { + tmp_drpm=(DecRefPicMarking_t*)calloc (1,sizeof (DecRefPicMarking_t)); + tmp_drpm->Next=NULL; + + val = tmp_drpm->memory_management_control_operation = ue_v("SH: memory_management_control_operation", currStream); + + if ((val==1)||(val==3)) + { + tmp_drpm->difference_of_pic_nums_minus1 = ue_v("SH: difference_of_pic_nums_minus1", currStream); + } + if (val==2) + { + tmp_drpm->long_term_pic_num = ue_v("SH: long_term_pic_num", currStream); + } + + if ((val==3)||(val==6)) + { + tmp_drpm->long_term_frame_idx = ue_v("SH: long_term_frame_idx", currStream); + } + if (val==4) + { + tmp_drpm->max_long_term_frame_idx_plus1 = ue_v("SH: max_long_term_pic_idx_plus1", currStream); + } + + // add command + if (img->dec_ref_pic_marking_buffer==NULL) + { + img->dec_ref_pic_marking_buffer=tmp_drpm; + } + else + { + tmp_drpm2=img->dec_ref_pic_marking_buffer; + while (tmp_drpm2->Next!=NULL) tmp_drpm2=tmp_drpm2->Next; + tmp_drpm2->Next=tmp_drpm; + } + + }while (val != 0); + + } + } + } + + /*! + ************************************************************************ + * \brief + * To calculate the poc values + * based upon JVT-F100d2 + * POC200301: Until Jan 2003, this function will calculate the correct POC + * values, but the management of POCs in buffered pictures may need more work. + * \return + * none + ************************************************************************ + */ + void decode_poc(struct img_par *img) + { + int i; + // for POC mode 0: + unsigned int MaxPicOrderCntLsb = (1<<(active_sps->log2_max_pic_order_cnt_lsb_minus4+4)); + + switch ( active_sps->pic_order_cnt_type ) + { + case 0: // POC MODE 0 + // 1st + if(img->idr_flag) + { + img->PrevPicOrderCntMsb = 0; + img->PrevPicOrderCntLsb = 0; + } + else + { + if (img->last_has_mmco_5) + { + if (img->last_pic_bottom_field) + { + img->PrevPicOrderCntMsb = 0; + img->PrevPicOrderCntLsb = 0; + } + else + { + img->PrevPicOrderCntMsb = 0; + img->PrevPicOrderCntLsb = img->toppoc; + } + } + } + // Calculate the MSBs of current picture + if( img->pic_order_cnt_lsb < img->PrevPicOrderCntLsb && + ( img->PrevPicOrderCntLsb - img->pic_order_cnt_lsb ) >= ( MaxPicOrderCntLsb / 2 ) ) + img->PicOrderCntMsb = img->PrevPicOrderCntMsb + MaxPicOrderCntLsb; + else if ( img->pic_order_cnt_lsb > img->PrevPicOrderCntLsb && + ( img->pic_order_cnt_lsb - img->PrevPicOrderCntLsb ) > ( MaxPicOrderCntLsb / 2 ) ) + img->PicOrderCntMsb = img->PrevPicOrderCntMsb - MaxPicOrderCntLsb; + else + img->PicOrderCntMsb = img->PrevPicOrderCntMsb; + + // 2nd + + if(img->field_pic_flag==0) + { //frame pix + img->toppoc = img->PicOrderCntMsb + img->pic_order_cnt_lsb; + img->bottompoc = img->toppoc + img->delta_pic_order_cnt_bottom; + img->ThisPOC = img->framepoc = (img->toppoc < img->bottompoc)? img->toppoc : img->bottompoc; // POC200301 + } + else if (img->bottom_field_flag==0) + { //top field + img->ThisPOC= img->toppoc = img->PicOrderCntMsb + img->pic_order_cnt_lsb; + } + else + { //bottom field + img->ThisPOC= img->bottompoc = img->PicOrderCntMsb + img->pic_order_cnt_lsb; + } + img->framepoc=img->ThisPOC; + + if ( img->frame_num!=img->PreviousFrameNum) + img->PreviousFrameNum=img->frame_num; + + if(!img->disposable_flag) + { + img->PrevPicOrderCntLsb = img->pic_order_cnt_lsb; + img->PrevPicOrderCntMsb = img->PicOrderCntMsb; + } + + break; + + case 1: // POC MODE 1 + // 1st + if(img->idr_flag) + { + img->FrameNumOffset=0; // first pix of IDRGOP, + img->delta_pic_order_cnt[0]=0; //ignore first delta + if(img->frame_num) error("frame_num != 0 in idr pix", -1020); + } + else + { + if (img->last_has_mmco_5) + { + img->PreviousFrameNumOffset = 0; + img->PreviousFrameNum = 0; + } + if (img->frame_numPreviousFrameNum) + { //not first pix of IDRGOP + img->FrameNumOffset = img->PreviousFrameNumOffset + img->MaxFrameNum; + } + else + { + img->FrameNumOffset = img->PreviousFrameNumOffset; + } + } + + // 2nd + if(active_sps->num_ref_frames_in_pic_order_cnt_cycle) + img->AbsFrameNum = img->FrameNumOffset+img->frame_num; + else + img->AbsFrameNum=0; + if(img->disposable_flag && img->AbsFrameNum>0) + img->AbsFrameNum--; + + // 3rd + img->ExpectedDeltaPerPicOrderCntCycle=0; + + if(active_sps->num_ref_frames_in_pic_order_cnt_cycle) + for(i=0;i<(int) active_sps->num_ref_frames_in_pic_order_cnt_cycle;i++) + img->ExpectedDeltaPerPicOrderCntCycle += active_sps->offset_for_ref_frame[i]; + + if(img->AbsFrameNum) + { + img->PicOrderCntCycleCnt = (img->AbsFrameNum-1)/active_sps->num_ref_frames_in_pic_order_cnt_cycle; + img->FrameNumInPicOrderCntCycle = (img->AbsFrameNum-1)%active_sps->num_ref_frames_in_pic_order_cnt_cycle; + img->ExpectedPicOrderCnt = img->PicOrderCntCycleCnt*img->ExpectedDeltaPerPicOrderCntCycle; + for(i=0;i<=(int)img->FrameNumInPicOrderCntCycle;i++) + img->ExpectedPicOrderCnt += active_sps->offset_for_ref_frame[i]; + } + else + img->ExpectedPicOrderCnt=0; + + if(img->disposable_flag) + img->ExpectedPicOrderCnt += active_sps->offset_for_non_ref_pic; + + if(img->field_pic_flag==0) + { //frame pix + img->toppoc = img->ExpectedPicOrderCnt + img->delta_pic_order_cnt[0]; + img->bottompoc = img->toppoc + active_sps->offset_for_top_to_bottom_field + img->delta_pic_order_cnt[1]; + img->ThisPOC = img->framepoc = (img->toppoc < img->bottompoc)? img->toppoc : img->bottompoc; // POC200301 + } + else if (img->bottom_field_flag==0) + { //top field + img->ThisPOC = img->toppoc = img->ExpectedPicOrderCnt + img->delta_pic_order_cnt[0]; + } + else + { //bottom field + img->ThisPOC = img->bottompoc = img->ExpectedPicOrderCnt + active_sps->offset_for_top_to_bottom_field + img->delta_pic_order_cnt[0]; + } + img->framepoc=img->ThisPOC; + + img->PreviousFrameNum=img->frame_num; + img->PreviousFrameNumOffset=img->FrameNumOffset; + + break; + + + case 2: // POC MODE 2 + if(img->idr_flag) // IDR picture + { + img->FrameNumOffset=0; // first pix of IDRGOP, + img->ThisPOC = img->framepoc = img->toppoc = img->bottompoc = 0; + if(img->frame_num) error("frame_num != 0 in idr pix", -1020); + } + else + { + if (img->last_has_mmco_5) + { + img->PreviousFrameNum = 0; + img->PreviousFrameNumOffset = 0; + } + if (img->frame_numPreviousFrameNum) + img->FrameNumOffset = img->PreviousFrameNumOffset + img->MaxFrameNum; + else + img->FrameNumOffset = img->PreviousFrameNumOffset; + + + img->AbsFrameNum = img->FrameNumOffset+img->frame_num; + if(img->disposable_flag) + img->ThisPOC = (2*img->AbsFrameNum - 1); + else + img->ThisPOC = (2*img->AbsFrameNum); + + if (img->field_pic_flag==0) + img->toppoc = img->bottompoc = img->framepoc = img->ThisPOC; + else if (img->bottom_field_flag==0) + img->toppoc = img->framepoc = img->ThisPOC; + else img->bottompoc = img->framepoc = img->ThisPOC; + } + + if (!img->disposable_flag) + img->PreviousFrameNum=img->frame_num; + img->PreviousFrameNumOffset=img->FrameNumOffset; + break; + + + default: + //error must occurs + assert( 1==0 ); + break; + } + } + + /*! + ************************************************************************ + * \brief + * A little helper for the debugging of POC code + * \return + * none + ************************************************************************ + */ + int dumppoc(struct img_par *img) { + printf ("\nPOC locals...\n"); + printf ("toppoc %d\n", img->toppoc); + printf ("bottompoc %d\n", img->bottompoc); + printf ("frame_num %d\n", img->frame_num); + printf ("field_pic_flag %d\n", img->field_pic_flag); + printf ("bottom_field_flag %d\n", img->bottom_field_flag); + printf ("POC SPS\n"); + printf ("log2_max_frame_num_minus4 %d\n", active_sps->log2_max_frame_num_minus4); // POC200301 + printf ("log2_max_pic_order_cnt_lsb_minus4 %d\n", active_sps->log2_max_pic_order_cnt_lsb_minus4); + printf ("pic_order_cnt_type %d\n", active_sps->pic_order_cnt_type); + printf ("num_ref_frames_in_pic_order_cnt_cycle %d\n", active_sps->num_ref_frames_in_pic_order_cnt_cycle); + printf ("delta_pic_order_always_zero_flag %d\n", active_sps->delta_pic_order_always_zero_flag); + printf ("offset_for_non_ref_pic %d\n", active_sps->offset_for_non_ref_pic); + printf ("offset_for_top_to_bottom_field %d\n", active_sps->offset_for_top_to_bottom_field); + printf ("offset_for_ref_frame[0] %d\n", active_sps->offset_for_ref_frame[0]); + printf ("offset_for_ref_frame[1] %d\n", active_sps->offset_for_ref_frame[1]); + printf ("POC in SLice Header\n"); + printf ("pic_order_present_flag %d\n", active_pps->pic_order_present_flag); + printf ("delta_pic_order_cnt[0] %d\n", img->delta_pic_order_cnt[0]); + printf ("delta_pic_order_cnt[1] %d\n", img->delta_pic_order_cnt[1]); + printf ("delta_pic_order_cnt[2] %d\n", img->delta_pic_order_cnt[2]); + printf ("idr_flag %d\n", img->idr_flag); + printf ("MaxFrameNum %d\n", img->MaxFrameNum); + + return 0; + } + + /*! + ************************************************************************ + * \brief + * return the poc of img as per (8-1) JVT-F100d2 + * POC200301 + ************************************************************************ + */ + int picture_order(struct img_par *img) + { + if (img->field_pic_flag==0) // is a frame + return img->framepoc; + else if (img->bottom_field_flag==0) // top field + return img->toppoc; + else // bottom field + return img->bottompoc; + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/header.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/header.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/header.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,23 ---- + + /*! + ************************************************************************************* + * \file header.h + * + * \brief + * Prototypes for header.c + ************************************************************************************* + */ + + #ifndef _HEADER_H_ + #define _HEADER_H_ + + int FirstPartOfSliceHeader(); + int RestOfSliceHeader(); + + void dec_ref_pic_marking(Bitstream *currStream); + + void decode_poc(struct img_par *img); + int dumppoc(struct img_par *img); + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/image.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/image.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/image.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1834 ---- + + /*! + *********************************************************************** + * \file image.c + * + * \brief + * Decode a Slice + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Langoy + * - Rickard Sjoberg + * - Jani Lainema + * - Sebastian Purreiter + * - Byeong-Moon Jeon + * - Thomas Wedi + * - Gabi Blaettermann + * - Ye-Kui Wang + * - Antti Hallapuro + * - Alexis Tourapis + * - Jill Boyce + * - Saurav K Bandyopadhyay + * - Zhenyu Wu + * + *********************************************************************** + */ + + #include "contributors.h" + + #include + #include + #include + #include + #include + + #ifdef WIN32 + #include + #else + #include + #endif + + #include "global.h" + #include "errorconcealment.h" + #include "image.h" + #include "mbuffer.h" + #include "fmo.h" + #include "nalu.h" + #include "parsetcommon.h" + #include "parset.h" + #include "header.h" + #include "rtp.h" + #include "sei.h" + #include "output.h" + #include "biaridecod.h" + #include "mb_access.h" + #include "memalloc.h" + #include "annexb.h" + + #include "context_ini.h" + #include "cabac.h" + #include "loopfilter.h" + + #include "vlc.h" + + #include "erc_api.h" + extern objectBuffer_t *erc_object_list; + extern ercVariables_t *erc_errorVar; + extern frame erc_recfr; + extern int erc_mvperMB; + extern struct img_par *erc_img; + + //extern FILE *p_out2; + + extern StorablePicture **listX[6]; + extern ColocatedParams *Co_located; + + StorablePicture *dec_picture; + + OldSliceParams old_slice; + + void MbAffPostProc() + { + imgpel temp[16][32]; + + imgpel ** imgY = dec_picture->imgY; + imgpel ***imgUV = dec_picture->imgUV; + + int i, x, y, x0, y0, uv; + for (i=0; i<(int)dec_picture->PicSizeInMbs; i+=2) + { + if (dec_picture->mb_field[i]) + { + get_mb_pos(i, &x0, &y0); + for (y=0; y<(2*MB_BLOCK_SIZE);y++) + for (x=0; xchroma_format_idc != YUV400) + { + x0 = x0 / (16/img->mb_cr_size_x); + y0 = y0 / (16/img->mb_cr_size_y); + + for (uv=0; uv<2; uv++) + { + for (y=0; y<(2*img->mb_cr_size_y);y++) + for (x=0; xmb_cr_size_x; x++) + temp[x][y] = imgUV[uv][y0+y][x0+x]; + + for (y=0; ymb_cr_size_y;y++) + for (x=0; xmb_cr_size_x; x++) + { + imgUV[uv][y0+(2*y)][x0+x] = temp[x][y]; + imgUV[uv][y0+(2*y+1)][x0+x] = temp[x][y+img->mb_cr_size_y]; + } + } + } + } + } + } + + /*! + *********************************************************************** + * \brief + * decodes one I- or P-frame + * + *********************************************************************** + */ + + int decode_one_frame(struct img_par *img,struct inp_par *inp, struct snr_par *snr) + { + int current_header; + Slice *currSlice = img->currentSlice; + + img->current_slice_nr = 0; + img->current_mb_nr = -4711; // initialized to an impossible value for debugging -- correct value is taken from slice header + currSlice->next_header = -8888; // initialized to an impossible value for debugging -- correct value is taken from slice header + img->num_dec_mb = 0; + img->newframe = 1; + + while ((currSlice->next_header != EOS && currSlice->next_header != SOP)) + { + current_header = read_new_slice(); + + if (current_header == EOS) + { + exit_picture(); + return EOS; + } + + decode_slice(img, inp, current_header); + + img->newframe = 0; + img->current_slice_nr++; + } + + exit_picture(); + + return (SOP); + } + + + /*! + ************************************************************************ + * \brief + * Convert file read buffer to source picture structure + * \param imgX + * Pointer to image plane + * \param buf + * Buffer for file output + * \param size_x + * horizontal image size in pixel + * \param size_y + * vertical image size in pixel + * \param symbol_size_in_bytes + * number of bytes used per pel + ************************************************************************ + */ + void buf2img (imgpel** imgX, unsigned char* buf, int size_x, int size_y, int symbol_size_in_bytes) + { + int i,j; + + unsigned short tmp16, ui16; + unsigned long tmp32, ui32; + + if (symbol_size_in_bytes> sizeof(imgpel)) + { + error ("Source picture has higher bit depth than imgpel data type. Please recompile with larger data type for imgpel.", 500); + } + + if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes)) + { + // imgpel == pixel_in_file == 1 byte -> simple copy + for(j=0;j sizeof(char) + if (testEndian()) + { + // big endian + switch (symbol_size_in_bytes) + { + case 1: + { + for(j=0;j> 8) | ((tmp16&0xFF)<<8); + imgX[j][i] = (imgpel) ui16; + } + break; + } + case 4: + { + for(j=0;j>8) | ((tmp32&0xFF000000)>>24); + imgX[j][i] = (imgpel) ui32; + } + } + default: + { + error ("reading only from formats of 8, 16 or 32 bit allowed on big endian architecture", 500); + break; + } + } + + } + else + { + // little endian + for (j=0; j < size_y; j++) + for (i=0; i < size_x; i++) + { + imgX[j][i]=0; + memcpy(&(imgX[j][i]), buf +((i+j*size_x)*symbol_size_in_bytes), symbol_size_in_bytes); + } + + } + } + } + + + /*! + ************************************************************************ + * \brief + * Find PSNR for all three components.Compare decoded frame with + * the original sequence. Read inp->jumpd frames to reflect frame skipping. + ************************************************************************ + */ + void find_snr( + struct snr_par *snr, //!< pointer to snr parameters + StorablePicture *p, //!< picture to be compared + int p_ref) //!< open reference YUV file + { + int SubWidthC [4]= { 1, 2, 2, 1}; + int SubHeightC [4]= { 1, 2, 1, 1}; + int crop_left, crop_right, crop_top, crop_bottom; + + int i,j; + int64 diff_y,diff_u,diff_v; + int uv; + int64 status; + int symbol_size_in_bytes = img->pic_unit_bitsize_on_disk/8; + int size_x, size_y; + int size_x_cr, size_y_cr; + int64 framesize_in_bytes; + unsigned int max_pix_value_sqd = img->max_imgpel_value * img->max_imgpel_value; + unsigned int max_pix_value_sqd_uv = img->max_imgpel_value_uv * img->max_imgpel_value_uv; + Boolean rgb_output = (active_sps->vui_seq_parameters.matrix_coefficients==0); + unsigned char *buf; + + // picture error concealment + char yuv_types[4][6]= {"4:0:0","4:2:0","4:2:2","4:4:4"}; + + // calculate frame number + int psnrPOC = active_sps->mb_adaptive_frame_field_flag ? p->poc /(input->poc_scale) : p->poc/(input->poc_scale); + + // cropping for luma + if (p->frame_cropping_flag) + { + crop_left = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset; + crop_right = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset; + crop_top = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + } + else + { + crop_left = crop_right = crop_top = crop_bottom = 0; + } + + size_x = p->size_x - crop_left - crop_right; + size_y = p->size_y - crop_top - crop_bottom; + + // cropping for chroma + if (p->frame_cropping_flag) + { + crop_left = p->frame_cropping_rect_left_offset; + crop_right = p->frame_cropping_rect_right_offset; + crop_top = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + } + else + { + crop_left = crop_right = crop_top = crop_bottom = 0; + } + + if ((p->chroma_format_idc==YUV400) && input->write_uv) + { + size_x_cr = p->size_x/2; + size_y_cr = p->size_y/2; + } + else + { + size_x_cr = p->size_x_cr - crop_left - crop_right; + size_y_cr = p->size_y_cr - crop_top - crop_bottom; + } + + framesize_in_bytes = (((int64)size_y*size_x) + ((int64)size_y_cr*size_x_cr)*2) * symbol_size_in_bytes; + + if (psnrPOC==0 && img->psnr_number) + img->idr_psnr_number=img->psnr_number + 1; + + img->psnr_number=max(img->psnr_number,img->idr_psnr_number+psnrPOC); + + frame_no = img->idr_psnr_number+psnrPOC; + + // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version + buf = malloc ( size_y * size_x * symbol_size_in_bytes ); + + if (NULL == buf) + { + no_mem_exit("find_snr: buf"); + } + + status = lseek (p_ref, framesize_in_bytes * frame_no, SEEK_SET); + if (status == -1) + { + fprintf(stderr, "Error in seeking frame number: %d\n", frame_no); + free (buf); + return; + } + + if(rgb_output) + lseek (p_ref, framesize_in_bytes/3, SEEK_CUR); + + read(p_ref, buf, size_y * size_x * symbol_size_in_bytes); + buf2img(imgY_ref, buf, size_x, size_y, symbol_size_in_bytes); + + if (p->chroma_format_idc != YUV400) + { + for (uv=0; uv < 2; uv++) + { + if(rgb_output && uv==1) + lseek (p_ref, -framesize_in_bytes, SEEK_CUR); + + read(p_ref, buf, size_y_cr * size_x_cr*symbol_size_in_bytes); + buf2img(imgUV_ref[uv], buf, size_x_cr, size_y_cr, symbol_size_in_bytes); + } + } + + if(rgb_output) + lseek (p_ref, framesize_in_bytes*2/3, SEEK_CUR); + + free (buf); + + img->quad[0]=0; + diff_y=0; + for (j=0; j < size_y; ++j) + { + for (i=0; i < size_x; ++i) + { + diff_y += img->quad[abs(p->imgY[j][i]-imgY_ref[j][i])]; + } + } + + // Chroma + diff_u=0; + diff_v=0; + + if (p->chroma_format_idc != YUV400) + { + for (j=0; j < size_y_cr; ++j) + { + for (i=0; i < size_x_cr; ++i) + { + diff_u += img->quad[abs(imgUV_ref[0][j][i]-p->imgUV[0][j][i])]; + diff_v += img->quad[abs(imgUV_ref[1][j][i]-p->imgUV[1][j][i])]; + } + } + } + + #if ZEROSNR + if (diff_y == 0) + diff_y = 1; + if (diff_u == 0) + diff_u = 1; + if (diff_v == 0) + diff_v = 1; + #endif + + // Collecting SNR statistics + if (diff_y != 0) + snr->snr_y=(float)(10*log10(max_pix_value_sqd*(double)((double)(size_x)*(size_y) / diff_y))); // luma snr for current frame + else + snr->snr_y=0.0; + if (diff_u != 0) + snr->snr_u=(float)(10*log10(max_pix_value_sqd_uv*(double)((double)(size_x_cr)*(size_y_cr) / (diff_u)))); // chroma snr for current frame + else + snr->snr_u=0.0; + if (diff_v != 0) + snr->snr_v=(float)(10*log10(max_pix_value_sqd_uv*(double)((double)(size_x_cr)*(size_y_cr) / (diff_v)))); // chroma snr for current frame + else + snr->snr_v=0; + + if (img->number == 0) // first + { + snr->snr_ya=snr->snr_y1=snr->snr_y; // keep luma snr for first frame + snr->snr_ua=snr->snr_u1=snr->snr_u; // keep chroma snr for first frame + snr->snr_va=snr->snr_v1=snr->snr_v; // keep chroma snr for first frame + + } + else + { + snr->snr_ya=(float)(snr->snr_ya*(snr->frame_ctr)+snr->snr_y)/(snr->frame_ctr+1); // average snr chroma for all frames + snr->snr_ua=(float)(snr->snr_ua*(snr->frame_ctr)+snr->snr_u)/(snr->frame_ctr+1); // average snr luma for all frames + snr->snr_va=(float)(snr->snr_va*(snr->frame_ctr)+snr->snr_v)/(snr->frame_ctr+1); // average snr luma for all frames + } + + // picture error concealment + if(p->concealed_pic) + { + fprintf(stdout,"%04d(P) %8d %5d %5d %7.4f %7.4f %7.4f %s %5d\n", + frame_no, p->frame_poc, p->pic_num, p->qp, + snr->snr_y, snr->snr_u, snr->snr_v, yuv_types[p->chroma_format_idc], 0); + + } + } + + + /*! + ************************************************************************ + * \brief + * Interpolation of 1/4 subpixel + ************************************************************************ + */ + void get_block(int ref_frame, StorablePicture **list, int x_pos, int y_pos, struct img_par *img, int block[BLOCK_SIZE][BLOCK_SIZE]) + { + + int dx, dy; + int x, y; + int i, j; + int maxold_x,maxold_y; + int result; + int pres_x; + int pres_y; + int tmp_res[4][9]; + static const int COEF[6] = { 1, -5, 20, 20, -5, 1 }; + + dx = x_pos&3; + dy = y_pos&3; + x_pos = (x_pos-dx)/4; + y_pos = (y_pos-dy)/4; + + maxold_x = dec_picture->size_x-1; + maxold_y = dec_picture->size_y-1; + + if (dec_picture->mb_field[img->current_mb_nr]) + maxold_y = dec_picture->size_y/2 - 1; + + if (dx == 0 && dy == 0) + { /* fullpel position */ + for (j = 0; j < BLOCK_SIZE; j++) + for (i = 0; i < BLOCK_SIZE; i++) + block[i][j] = list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j))][max(0,min(maxold_x,x_pos+i))]; + } + else + { /* other positions */ + + if (dy == 0) + { /* No vertical interpolation */ + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i = 0; i < BLOCK_SIZE; i++) + { + for (result = 0, x = -2; x < 4; x++) + result += list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j))][max(0,min(maxold_x,x_pos+i+x))]*COEF[x+2]; + block[i][j] = max(0, min(img->max_imgpel_value, (result+16)/32)); + } + } + + if ((dx&1) == 1) + { + for (j = 0; j < BLOCK_SIZE; j++) + for (i = 0; i < BLOCK_SIZE; i++) + block[i][j] = (block[i][j] + list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j))][max(0,min(maxold_x,x_pos+i+dx/2))] +1 )/2; + } + } + else if (dx == 0) + { /* No horizontal interpolation */ + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i = 0; i < BLOCK_SIZE; i++) + { + for (result = 0, y = -2; y < 4; y++) + result += list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j+y))][max(0,min(maxold_x,x_pos+i))]*COEF[y+2]; + block[i][j] = max(0, min(img->max_imgpel_value, (result+16)/32)); + } + } + + if ((dy&1) == 1) + { + for (j = 0; j < BLOCK_SIZE; j++) + for (i = 0; i < BLOCK_SIZE; i++) + block[i][j] = (block[i][j] + list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j+dy/2))][max(0,min(maxold_x,x_pos+i))] +1 )/2; + } + } + else if (dx == 2) + { /* Vertical & horizontal interpolation */ + + for (j = -2; j < BLOCK_SIZE+3; j++) + { + for (i = 0; i < BLOCK_SIZE; i++) + for (tmp_res[i][j+2] = 0, x = -2; x < 4; x++) + tmp_res[i][j+2] += list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j))][max(0,min(maxold_x,x_pos+i+x))]*COEF[x+2]; + } + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i = 0; i < BLOCK_SIZE; i++) + { + for (result = 0, y = -2; y < 4; y++) + result += tmp_res[i][j+y+2]*COEF[y+2]; + block[i][j] = max(0, min(img->max_imgpel_value, (result+512)/1024)); + } + } + + if ((dy&1) == 1) + { + for (j = 0; j < BLOCK_SIZE; j++) + for (i = 0; i < BLOCK_SIZE; i++) + block[i][j] = (block[i][j] + max(0, min(img->max_imgpel_value, (tmp_res[i][j+2+dy/2]+16)/32)) +1 )/2; + } + } + else if (dy == 2) + { /* Horizontal & vertical interpolation */ + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i = -2; i < BLOCK_SIZE+3; i++) + for (tmp_res[j][i+2] = 0, y = -2; y < 4; y++) + tmp_res[j][i+2] += list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j+y))][max(0,min(maxold_x,x_pos+i))]*COEF[y+2]; + } + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i = 0; i < BLOCK_SIZE; i++) + { + for (result = 0, x = -2; x < 4; x++) + result += tmp_res[j][i+x+2]*COEF[x+2]; + block[i][j] = max(0, min(img->max_imgpel_value, (result+512)/1024)); + } + } + + if ((dx&1) == 1) + { + for (j = 0; j < BLOCK_SIZE; j++) + for (i = 0; i < BLOCK_SIZE; i++) + block[i][j] = (block[i][j] + max(0, min(img->max_imgpel_value, (tmp_res[j][i+2+dx/2]+16)/32))+1)/2; + } + } + else + { /* Diagonal interpolation */ + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i = 0; i < BLOCK_SIZE; i++) + { + pres_y = dy == 1 ? y_pos+j : y_pos+j+1; + pres_y = max(0,min(maxold_y,pres_y)); + for (result = 0, x = -2; x < 4; x++) + result += list[ref_frame]->imgY[pres_y][max(0,min(maxold_x,x_pos+i+x))]*COEF[x+2]; + block[i][j] = max(0, min(img->max_imgpel_value, (result+16)/32)); + } + } + + for (j = 0; j < BLOCK_SIZE; j++) + { + for (i = 0; i < BLOCK_SIZE; i++) + { + pres_x = dx == 1 ? x_pos+i : x_pos+i+1; + pres_x = max(0,min(maxold_x,pres_x)); + for (result = 0, y = -2; y < 4; y++) + result += list[ref_frame]->imgY[max(0,min(maxold_y,y_pos+j+y))][pres_x]*COEF[y+2]; + block[i][j] = (block[i][j] + max(0, min(img->max_imgpel_value, (result+16)/32)) +1 ) / 2; + } + } + + } + } + } + + + void reorder_lists(int currSliceType, Slice * currSlice) + { + + if ((currSliceType != I_SLICE)&&(currSliceType != SI_SLICE)) + { + if (currSlice->ref_pic_list_reordering_flag_l0) + { + reorder_ref_pic_list(listX[0], &listXsize[0], + img->num_ref_idx_l0_active - 1, + currSlice->reordering_of_pic_nums_idc_l0, + currSlice->abs_diff_pic_num_minus1_l0, + currSlice->long_term_pic_idx_l0); + } + if (NULL == listX[0][img->num_ref_idx_l0_active-1]) + { + error("RefPicList0[ num_ref_idx_l0_active_minus1 ] is equal to 'no reference picture', invalid bitstream",500); + } + // that's a definition + listXsize[0] = img->num_ref_idx_l0_active; + } + if (currSliceType == B_SLICE) + { + if (currSlice->ref_pic_list_reordering_flag_l1) + { + reorder_ref_pic_list(listX[1], &listXsize[1], + img->num_ref_idx_l1_active - 1, + currSlice->reordering_of_pic_nums_idc_l1, + currSlice->abs_diff_pic_num_minus1_l1, + currSlice->long_term_pic_idx_l1); + } + if (NULL == listX[1][img->num_ref_idx_l1_active-1]) + { + error("RefPicList1[ num_ref_idx_l1_active_minus1 ] is equal to 'no reference picture', invalid bitstream",500); + } + // that's a definition + listXsize[1] = img->num_ref_idx_l1_active; + } + + free_ref_pic_list_reordering_buffer(currSlice); + } + + + /*! + ************************************************************************ + * \brief + * initialize ref_pic_num array + ************************************************************************ + */ + void set_ref_pic_num() + { + int i,j; + + int slice_id=img->current_slice_nr; + + for (i=0;iref_pic_num [slice_id][LIST_0][i]=listX[LIST_0][i]->poc * 2 + ((listX[LIST_0][i]->structure==BOTTOM_FIELD)?1:0) ; + dec_picture->frm_ref_pic_num [slice_id][LIST_0][i]=listX[LIST_0][i]->frame_poc * 2; + dec_picture->top_ref_pic_num [slice_id][LIST_0][i]=listX[LIST_0][i]->top_poc * 2; + dec_picture->bottom_ref_pic_num [slice_id][LIST_0][i]=listX[LIST_0][i]->bottom_poc * 2 + 1; + //printf("POCS %d %d %d %d ",listX[LIST_0][i]->frame_poc,listX[LIST_0][i]->bottom_poc,listX[LIST_0][i]->top_poc,listX[LIST_0][i]->poc); + //printf("refid %d %d %d %d\n",(int) dec_picture->frm_ref_pic_num[LIST_0][i],(int) dec_picture->top_ref_pic_num[LIST_0][i],(int) dec_picture->bottom_ref_pic_num[LIST_0][i],(int) dec_picture->ref_pic_num[LIST_0][i]); + } + + for (i=0;iref_pic_num [slice_id][LIST_1][i]=listX[LIST_1][i]->poc *2 + ((listX[LIST_1][i]->structure==BOTTOM_FIELD)?1:0); + dec_picture->frm_ref_pic_num [slice_id][LIST_1][i]=listX[LIST_1][i]->frame_poc * 2; + dec_picture->top_ref_pic_num [slice_id][LIST_1][i]=listX[LIST_1][i]->top_poc * 2; + dec_picture->bottom_ref_pic_num [slice_id][LIST_1][i]=listX[LIST_1][i]->bottom_poc * 2 + 1; + } + + if (!active_sps->frame_mbs_only_flag) + { + if (img->structure==FRAME) + for (j=2;j<6;j++) + for (i=0;iref_pic_num [slice_id][j][i] = listX[j][i]->poc * 2 + ((listX[j][i]->structure==BOTTOM_FIELD)?1:0); + dec_picture->frm_ref_pic_num [slice_id][j][i] = listX[j][i]->frame_poc * 2 ; + dec_picture->top_ref_pic_num [slice_id][j][i] = listX[j][i]->top_poc * 2 ; + dec_picture->bottom_ref_pic_num [slice_id][j][i] = listX[j][i]->bottom_poc * 2 + 1; + } + } + + } + + + /*! + ************************************************************************ + * \brief + * Reads new slice from bit_stream + ************************************************************************ + */ + int read_new_slice() + { + NALU_t *nalu = AllocNALU(MAX_CODED_FRAME_SIZE); + int current_header; + int ret; + int BitsUsedByHeader; + Slice *currSlice = img->currentSlice; + Bitstream *currStream; + + int slice_id_a, slice_id_b, slice_id_c; + int redundant_pic_cnt_b, redundant_pic_cnt_c; + long ftell_position, expected_slice_type; + + // int i; + expected_slice_type = NALU_TYPE_DPA; + + while (1) + { + ftell_position = ftell(bits); + + if (input->FileFormat == PAR_OF_ANNEXB) + ret=GetAnnexbNALU (nalu); + else + ret=GetRTPNALU (nalu); + + //In some cases, zero_byte shall be present. If current NALU is a VCL NALU, we can't tell + //whether it is the first VCL NALU at this point, so only non-VCL NAL unit is checked here. + CheckZeroByteNonVCL(nalu, &ret); + + NALUtoRBSP(nalu); + // printf ("nalu->len %d\n", nalu->len); + + if (ret < 0) + printf ("Error while getting the NALU in file format %s, exit\n", input->FileFormat==PAR_OF_ANNEXB?"Annex B":"RTP"); + if (ret == 0) + { + // printf ("read_new_slice: returning %s\n", "EOS"); + if(expected_slice_type != NALU_TYPE_DPA) + { + /* oops... we found the next slice, go back! */ + fseek(bits, ftell_position, SEEK_SET); + FreeNALU(nalu); + return current_header; + } + else + return EOS; + } + + // Got a NALU + if (nalu->forbidden_bit) + { + printf ("Found NALU w/ forbidden_bit set, bit error? Let's try...\n"); + } + + switch (nalu->nal_unit_type) + { + case NALU_TYPE_SLICE: + case NALU_TYPE_IDR: + img->idr_flag = (nalu->nal_unit_type == NALU_TYPE_IDR); + img->nal_reference_idc = nalu->nal_reference_idc; + img->disposable_flag = (nalu->nal_reference_idc == NALU_PRIORITY_DISPOSABLE); + currSlice->dp_mode = PAR_DP_1; + currSlice->max_part_nr = 1; + currSlice->ei_flag = 0; + currStream = currSlice->partArr[0].bitstream; + currStream->ei_flag = 0; + currStream->frame_bitoffset = currStream->read_len = 0; + memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + // Some syntax of the Slice Header depends on the parameter set, which depends on + // the parameter set ID of the SLice header. Hence, read the pic_parameter_set_id + // of the slice header first, then setup the active parameter sets, and then read + // the rest of the slice header + BitsUsedByHeader = FirstPartOfSliceHeader(); + UseParameterSet (currSlice->pic_parameter_set_id); + BitsUsedByHeader+= RestOfSliceHeader (); + + FmoInit (active_pps, active_sps); + + AssignQuantParam (active_pps, active_sps); + + if(is_new_picture()) + { + init_picture(img, input); + + current_header = SOP; + //check zero_byte if it is also the first NAL unit in the access unit + CheckZeroByteVCL(nalu, &ret); + } + else + current_header = SOS; + + init_lists(img->type, img->currentSlice->structure); + reorder_lists (img->type, img->currentSlice); + + if (img->structure==FRAME) + { + init_mbaff_lists(); + } + + /* if (img->frame_num==1) // write a reference list + { + count ++; + if (count==1) + for (i=0; iMbaffFrameFlag) + img->current_mb_nr = currSlice->start_mb_nr << 1; + else + img->current_mb_nr = currSlice->start_mb_nr; + + if (active_pps->entropy_coding_mode_flag) + { + int ByteStartPosition = currStream->frame_bitoffset/8; + if (currStream->frame_bitoffset%8 != 0) + { + ByteStartPosition++; + } + arideco_start_decoding (&currSlice->partArr[0].de_cabac, currStream->streamBuffer, ByteStartPosition, &currStream->read_len, img->type); + } + // printf ("read_new_slice: returning %s\n", current_header == SOP?"SOP":"SOS"); + FreeNALU(nalu); + return current_header; + break; + case NALU_TYPE_DPA: + //! The state machine here should follow the same ideas as the old readSliceRTP() + //! basically: + //! work on DPA (as above) + //! read and process all following SEI/SPS/PPS/PD/Filler NALUs + //! if next video NALU is dpB, + //! then read and check whether it belongs to DPA, if yes, use it + //! else + //! ; // nothing + //! read and process all following SEI/SPS/PPS/PD/Filler NALUs + //! if next video NALU is dpC + //! then read and check whether it belongs to DPA (and DPB, if present), if yes, use it, done + //! else + //! use the DPA (and the DPB if present) + + /* + LC: inserting the code related to DP processing, mainly copying some of the parts + related to NALU_TYPE_SLICE, NALU_TYPE_IDR. + */ + + if(expected_slice_type != NALU_TYPE_DPA) + { + /* oops... we found the next slice, go back! */ + fseek(bits, ftell_position, SEEK_SET); + FreeNALU(nalu); + return current_header; + } + + img->idr_flag = (nalu->nal_unit_type == NALU_TYPE_IDR); + img->nal_reference_idc = nalu->nal_reference_idc; + img->disposable_flag = (nalu->nal_reference_idc == NALU_PRIORITY_DISPOSABLE); + currSlice->dp_mode = PAR_DP_3; + currSlice->max_part_nr = 3; + currSlice->ei_flag = 0; + currStream = currSlice->partArr[0].bitstream; + currStream->ei_flag = 0; + currStream->frame_bitoffset = currStream->read_len = 0; + memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + BitsUsedByHeader = FirstPartOfSliceHeader(); + UseParameterSet (currSlice->pic_parameter_set_id); + BitsUsedByHeader += RestOfSliceHeader (); + + FmoInit (active_pps, active_sps); + + if(is_new_picture()) + { + init_picture(img, input); + current_header = SOP; + CheckZeroByteVCL(nalu, &ret); + } + else + current_header = SOS; + + + init_lists(img->type, img->currentSlice->structure); + reorder_lists (img->type, img->currentSlice); + + if (img->structure==FRAME) + { + init_mbaff_lists(); + } + + // From here on, active_sps, active_pps and the slice header are valid + if (img->MbaffFrameFlag) + img->current_mb_nr = currSlice->start_mb_nr << 1; + else + img->current_mb_nr = currSlice->start_mb_nr; + + + /* + LC: + Now I need to read the slice ID, which depends on the value of + redundant_pic_cnt_present_flag (pag.49). + */ + + slice_id_a = ue_v("NALU:SLICE_A slice_idr", currStream); + if (active_pps->entropy_coding_mode_flag) + { + int ByteStartPosition = currStream->frame_bitoffset/8; + if (currStream->frame_bitoffset%8 != 0) + { + ByteStartPosition++; + } + arideco_start_decoding (&currSlice->partArr[0].de_cabac, currStream->streamBuffer, ByteStartPosition, &currStream->read_len, img->type); + } + // printf ("read_new_slice: returning %s\n", current_header == SOP?"SOP":"SOS"); + break; + case NALU_TYPE_DPB: + /* LC: inserting the code related to DP processing */ + + currStream = currSlice->partArr[1].bitstream; + currStream->ei_flag = 0; + currStream->frame_bitoffset = currStream->read_len = 0; + memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + slice_id_b = ue_v("NALU:SLICE_B slice_idr", currStream); + if (active_pps->redundant_pic_cnt_present_flag) + redundant_pic_cnt_b = ue_v("NALU:SLICE_B redudand_pic_cnt", currStream); + else + redundant_pic_cnt_b = 0; + + /* LC: Initializing CABAC for the current data stream. */ + + if (active_pps->entropy_coding_mode_flag) + { + int ByteStartPosition = currStream->frame_bitoffset/8; + if (currStream->frame_bitoffset % 8 != 0) + ByteStartPosition++; + + arideco_start_decoding (&currSlice->partArr[1].de_cabac, currStream->streamBuffer, + ByteStartPosition, &currStream->read_len, img->type); + + } + + /* LC: resilience code to be inserted */ + /* FreeNALU(nalu); */ + /* return current_header; */ + + break; + case NALU_TYPE_DPC: + /* LC: inserting the code related to DP processing */ + currStream = currSlice->partArr[2].bitstream; + currStream->ei_flag = 0; + currStream->frame_bitoffset = currStream->read_len = 0; + memcpy (currStream->streamBuffer, &nalu->buf[1], nalu->len-1); + currStream->code_len = currStream->bitstream_length = RBSPtoSODB(currStream->streamBuffer, nalu->len-1); + + slice_id_c = ue_v("NALU:SLICE_C slice_idr", currStream); + if (active_pps->redundant_pic_cnt_present_flag) + redundant_pic_cnt_c = ue_v("NALU:SLICE_C redudand_pic_cnt", currStream); + else + redundant_pic_cnt_c = 0; + + /* LC: Initializing CABAC for the current data stream. */ + + if (active_pps->entropy_coding_mode_flag) + { + int ByteStartPosition = currStream->frame_bitoffset/8; + if (currStream->frame_bitoffset % 8 != 0) + ByteStartPosition++; + + arideco_start_decoding (&currSlice->partArr[2].de_cabac, currStream->streamBuffer, + ByteStartPosition, &currStream->read_len, img->type); + } + + /* LC: resilience code to be inserted */ + + FreeNALU(nalu); + return current_header; + + break; + case NALU_TYPE_SEI: + printf ("read_new_slice: Found NALU_TYPE_SEI, len %d\n", nalu->len); + InterpretSEIMessage(nalu->buf,nalu->len,img); + break; + case NALU_TYPE_PPS: + ProcessPPS(nalu); + break; + + case NALU_TYPE_SPS: + ProcessSPS(nalu); + break; + case NALU_TYPE_AUD: + // printf ("read_new_slice: Found 'Access Unit Delimiter' NAL unit, len %d, ignored\n", nalu->len); + break; + case NALU_TYPE_EOSEQ: + // printf ("read_new_slice: Found 'End of Sequence' NAL unit, len %d, ignored\n", nalu->len); + break; + case NALU_TYPE_EOSTREAM: + // printf ("read_new_slice: Found 'End of Stream' NAL unit, len %d, ignored\n", nalu->len); + break; + case NALU_TYPE_FILL: + printf ("read_new_slice: Found NALU_TYPE_FILL, len %d\n", nalu->len); + printf ("Skipping these filling bits, proceeding w/ next NALU\n"); + break; + default: + printf ("Found NALU type %d, len %d undefined, ignore NALU, moving on\n", nalu->nal_unit_type, nalu->len); + } + } + FreeNALU(nalu); + + return current_header; + } + + + /*! + ************************************************************************ + * \brief + * Initializes the parameters for a new picture + ************************************************************************ + */ + void init_picture(struct img_par *img, struct inp_par *inp) + { + int i,k,l; + Slice *currSlice = img->currentSlice; + + if (dec_picture) + { + // this may only happen on slice loss + exit_picture(); + } + + if (img->frame_num != img->pre_frame_num && img->frame_num != (img->pre_frame_num + 1) % img->MaxFrameNum) + { + if (active_sps->gaps_in_frame_num_value_allowed_flag == 0) + { + // picture error concealment + if(inp->conceal_mode !=0) + { + if((img->frame_num) < (img->pre_frame_num)) + { + /* Conceal lost IDR frames and any frames immediately + following the IDR. Use frame copy for these since + lists cannot be formed correctly for motion copy*/ + img->conceal_mode = 1; + img->IDR_concealment_flag = 1; + conceal_lost_frames(img); + //reset to original concealment mode for future drops + img->conceal_mode = inp->conceal_mode; + } + else + { + //reset to original concealment mode for future drops + img->conceal_mode = inp->conceal_mode; + + img->IDR_concealment_flag = 0; + conceal_lost_frames(img); + } + } + else + { /* Advanced Error Concealment would be called here to combat unintentional loss of pictures. */ + error("An unintentional loss of pictures occurs! Exit\n", 100); + } + } + if(img->conceal_mode == 0) + fill_frame_num_gap(img); + } + + if(img->nal_reference_idc) + { + img->pre_frame_num = img->frame_num; + } + + //img->num_dec_mb = 0; + + //calculate POC + decode_poc(img); + + if(img->nal_reference_idc) + img->last_ref_pic_poc = img->framepoc; + + // dumppoc (img); + + if (img->structure==FRAME ||img->structure==TOP_FIELD) + { + #ifdef WIN32 + _ftime (&(img->tstruct_start)); // start time ms + #else + ftime (&(img->tstruct_start)); // start time ms + #endif + time( &(img->ltime_start)); // start time s + } + + dec_picture = alloc_storable_picture (img->structure, img->width, img->height, img->width_cr, img->height_cr); + dec_picture->top_poc=img->toppoc; + dec_picture->bottom_poc=img->bottompoc; + dec_picture->frame_poc=img->framepoc; + dec_picture->qp=img->qp; + dec_picture->slice_qp_delta=currSlice->slice_qp_delta; + dec_picture->chroma_qp_offset[0] = active_pps->chroma_qp_index_offset; + dec_picture->chroma_qp_offset[1] = active_pps->second_chroma_qp_index_offset; + + // reset all variables of the error concealment instance before decoding of every frame. + // here the third parameter should, if perfectly, be equal to the number of slices per frame. + // using little value is ok, the code will allocate more memory if the slice number is larger + ercReset(erc_errorVar, img->PicSizeInMbs, img->PicSizeInMbs, dec_picture->size_x); + erc_mvperMB = 0; + + switch (img->structure ) + { + case TOP_FIELD: + { + dec_picture->poc=img->toppoc; + img->number *= 2; + break; + } + case BOTTOM_FIELD: + { + dec_picture->poc=img->bottompoc; + img->number++; + break; + } + case FRAME: + { + dec_picture->poc=img->framepoc; + break; + } + default: + error("img->structure not initialized", 235); + } + + img->current_slice_nr=0; + + if (img->type > SI_SLICE) + { + set_ec_flag(SE_PTYPE); + img->type = P_SLICE; // concealed element + } + + // CAVLC init + for (i=0;i < (int)img->PicSizeInMbs; i++) + for (k=0;k<4;k++) + for (l=0;l<(4 + img->num_blk8x8_uv);l++) + img->nz_coeff[i][k][l]=-1; // CAVLC + + if(active_pps->constrained_intra_pred_flag) + { + for (i=0; i<(int)img->PicSizeInMbs; i++) + { + img->intra_block[i] = 1; + } + } + + // Set the slice_nr member of each MB to -1, to ensure correct when packet loss occurs + // TO set Macroblock Map (mark all MBs as 'have to be concealed') + for(i=0; i<(int)img->PicSizeInMbs; i++) + { + img->mb_data[i].slice_nr = -1; + img->mb_data[i].ei_flag = 1; + } + + img->mb_y = img->mb_x = 0; + img->block_y = img->pix_y = img->pix_c_y = 0; // define vertical positions + img->block_x = img->pix_x = img->pix_c_x = 0; // define horizontal positions + + dec_picture->slice_type = img->type; + dec_picture->used_for_reference = (img->nal_reference_idc != 0); + dec_picture->idr_flag = img->idr_flag; + dec_picture->no_output_of_prior_pics_flag = img->no_output_of_prior_pics_flag; + dec_picture->long_term_reference_flag = img->long_term_reference_flag; + dec_picture->adaptive_ref_pic_buffering_flag = img->adaptive_ref_pic_buffering_flag; + + dec_picture->dec_ref_pic_marking_buffer = img->dec_ref_pic_marking_buffer; + img->dec_ref_pic_marking_buffer = NULL; + + dec_picture->MbaffFrameFlag = img->MbaffFrameFlag; + dec_picture->PicWidthInMbs = img->PicWidthInMbs; + dec_picture->pic_num = img->frame_num; + dec_picture->frame_num = img->frame_num; + dec_picture->coded_frame = (img->structure==FRAME); + + dec_picture->chroma_format_idc = active_sps->chroma_format_idc; + + dec_picture->frame_mbs_only_flag = active_sps->frame_mbs_only_flag; + dec_picture->frame_cropping_flag = active_sps->frame_cropping_flag; + + if (dec_picture->frame_cropping_flag) + { + dec_picture->frame_cropping_rect_left_offset = active_sps->frame_cropping_rect_left_offset; + dec_picture->frame_cropping_rect_right_offset = active_sps->frame_cropping_rect_right_offset; + dec_picture->frame_cropping_rect_top_offset = active_sps->frame_cropping_rect_top_offset; + dec_picture->frame_cropping_rect_bottom_offset = active_sps->frame_cropping_rect_bottom_offset; + } + } + + /*! + ************************************************************************ + * \brief + * finish decoding of a picture, conceal errors and store it + * into the DPB + ************************************************************************ + */ + void exit_picture() + { + char yuv_types[4][6]= {"4:0:0","4:2:0","4:2:2","4:4:4"}; + int ercStartMB; + int ercSegment; + frame recfr; + unsigned int i; + int structure, frame_poc, slice_type, refpic, qp, pic_num, chroma_format_idc; + + int tmp_time; // time used by decoding the last frame + char yuvFormat[10]; + + // return if the last picture has already been finished + if (dec_picture==NULL) + { + return; + } + + //deblocking for frame or field + DeblockPicture( img, dec_picture ); + + if (dec_picture->MbaffFrameFlag) + MbAffPostProc(); + + recfr.yptr = &dec_picture->imgY[0][0]; + if (dec_picture->chroma_format_idc != YUV400) + { + recfr.uptr = &dec_picture->imgUV[0][0][0]; + recfr.vptr = &dec_picture->imgUV[1][0][0]; + } + + //! this is always true at the beginning of a picture + ercStartMB = 0; + ercSegment = 0; + + //! mark the start of the first segment + if (!dec_picture->MbaffFrameFlag) + { + ercStartSegment(0, ercSegment, 0 , erc_errorVar); + //! generate the segments according to the macroblock map + for(i = 1; iPicSizeInMbs; i++) + { + if(img->mb_data[i].ei_flag != img->mb_data[i-1].ei_flag) + { + ercStopSegment(i-1, ercSegment, 0, erc_errorVar); //! stop current segment + + //! mark current segment as lost or OK + if(img->mb_data[i-1].ei_flag) + ercMarkCurrSegmentLost(dec_picture->size_x, erc_errorVar); + else + ercMarkCurrSegmentOK(dec_picture->size_x, erc_errorVar); + + ercSegment++; //! next segment + ercStartSegment(i, ercSegment, 0 , erc_errorVar); //! start new segment + ercStartMB = i;//! save start MB for this segment + } + } + //! mark end of the last segment + ercStopSegment(dec_picture->PicSizeInMbs-1, ercSegment, 0, erc_errorVar); + if(img->mb_data[i-1].ei_flag) + ercMarkCurrSegmentLost(dec_picture->size_x, erc_errorVar); + else + ercMarkCurrSegmentOK(dec_picture->size_x, erc_errorVar); + + //! call the right error concealment function depending on the frame type. + erc_mvperMB /= dec_picture->PicSizeInMbs; + + erc_img = img; + if(dec_picture->slice_type == I_SLICE || dec_picture->slice_type == SI_SLICE) // I-frame + ercConcealIntraFrame(&recfr, dec_picture->size_x, dec_picture->size_y, erc_errorVar); + else + ercConcealInterFrame(&recfr, erc_object_list, dec_picture->size_x, dec_picture->size_y, erc_errorVar, dec_picture->chroma_format_idc); + } + + if (img->structure == FRAME) // buffer mgt. for frame mode + frame_postprocessing(img, input); + else + field_postprocessing(img, input); // reset all interlaced variables + + structure = dec_picture->structure; + slice_type = dec_picture->slice_type; + frame_poc = dec_picture->frame_poc; + refpic = dec_picture->used_for_reference; + qp = dec_picture->qp; + pic_num = dec_picture->pic_num; + + chroma_format_idc= dec_picture->chroma_format_idc; + + store_picture_in_dpb(dec_picture); + dec_picture=NULL; + + if (img->last_has_mmco_5) + { + img->pre_frame_num = 0; + } + + if ((structure==FRAME)||structure==BOTTOM_FIELD) + { + + #ifdef WIN32 + _ftime (&(img->tstruct_end)); // start time ms + #else + ftime (&(img->tstruct_end)); // start time ms + #endif + + time( &(img->ltime_end)); // start time s + + #if 1 // FIXME: control with a runtime option. + tmp_time=0; + #else + tmp_time=(img->ltime_end*1000+img->tstruct_end.millitm) - (img->ltime_start*1000+img->tstruct_start.millitm); + #endif + tot_time=tot_time + tmp_time; + + sprintf(yuvFormat,"%s", yuv_types[chroma_format_idc]); + + if(slice_type == I_SLICE) // I picture + fprintf(stdout,"%04d(I) %8d %5d %5d %7.4f %7.4f %7.4f %s %5d\n", + frame_no, frame_poc, pic_num, qp, snr->snr_y, snr->snr_u, snr->snr_v, yuvFormat, tmp_time); + else if(slice_type == P_SLICE) // P pictures + fprintf(stdout,"%04d(P) %8d %5d %5d %7.4f %7.4f %7.4f %s %5d\n", + frame_no, frame_poc, pic_num, qp, snr->snr_y, snr->snr_u, snr->snr_v, yuvFormat, tmp_time); + else if(slice_type == SP_SLICE) // SP pictures + fprintf(stdout,"%04d(SP) %8d %5d %5d %7.4f %7.4f %7.4f %s %5d\n", + frame_no, frame_poc, pic_num, qp, snr->snr_y, snr->snr_u, snr->snr_v, yuvFormat, tmp_time); + else if (slice_type == SI_SLICE) + fprintf(stdout,"%04d(SI) %8d %5d %5d %7.4f %7.4f %7.4f %s %5d\n", + frame_no, frame_poc, pic_num, qp, snr->snr_y, snr->snr_u, snr->snr_v, yuvFormat, tmp_time); + else if(refpic) // stored B pictures + fprintf(stdout,"%04d(RB) %8d %5d %5d %7.4f %7.4f %7.4f %s %5d\n", + frame_no, frame_poc, pic_num, qp, snr->snr_y, snr->snr_u, snr->snr_v, yuvFormat, tmp_time); + else // B pictures + fprintf(stdout,"%04d(B) %8d %5d %5d %7.4f %7.4f %7.4f %s %5d\n", + frame_no, frame_poc, pic_num, qp, snr->snr_y, snr->snr_u, snr->snr_v, yuvFormat, tmp_time); + + fflush(stdout); + + if(slice_type == I_SLICE || slice_type == SI_SLICE || slice_type == P_SLICE || refpic) // I or P pictures + img->number++; + else + Bframe_ctr++; // B pictures + snr->frame_ctr++; + + g_nFrame++; + } + + img->current_mb_nr = -4712; // impossible value for debugging, StW + img->current_slice_nr = 0; + + } + + /*! + ************************************************************************ + * \brief + * write the encoding mode and motion vectors of current + * MB to the buffer of the error concealment module. + ************************************************************************ + */ + + void ercWriteMBMODEandMV(struct img_par *img,struct inp_par *inp) + { + extern objectBuffer_t *erc_object_list; + int i, ii, jj, currMBNum = img->current_mb_nr; + int mbx = xPosMB(currMBNum,dec_picture->size_x), mby = yPosMB(currMBNum,dec_picture->size_x); + objectBuffer_t *currRegion, *pRegion; + Macroblock *currMB = &img->mb_data[currMBNum]; + short*** mv; + + currRegion = erc_object_list + (currMBNum<<2); + + if(img->type != B_SLICE) //non-B frame + { + for (i=0; i<4; i++) + { + pRegion = currRegion + i; + pRegion->regionMode = (currMB->mb_type ==I16MB ? REGMODE_INTRA : + currMB->b8mode[i]==IBLOCK ? REGMODE_INTRA_8x8 : + currMB->b8mode[i]==0 ? REGMODE_INTER_COPY : + currMB->b8mode[i]==1 ? REGMODE_INTER_PRED : REGMODE_INTER_PRED_8x8); + if (currMB->b8mode[i]==0 || currMB->b8mode[i]==IBLOCK) // INTRA OR COPY + { + pRegion->mv[0] = 0; + pRegion->mv[1] = 0; + pRegion->mv[2] = 0; + } + else + { + ii = 4*mbx + (i%2)*2;// + BLOCK_SIZE; + jj = 4*mby + (i/2)*2; + if (currMB->b8mode[i]>=5 && currMB->b8mode[i]<=7) // SMALL BLOCKS + { + pRegion->mv[0] = (dec_picture->mv[LIST_0][jj][ii][0] + dec_picture->mv[LIST_0][jj][ii+1][0] + dec_picture->mv[LIST_0][jj+1][ii][0] + dec_picture->mv[LIST_0][jj+1][ii+1][0] + 2)/4; + pRegion->mv[1] = (dec_picture->mv[LIST_0][jj][ii][1] + dec_picture->mv[LIST_0][jj][ii+1][1] + dec_picture->mv[LIST_0][jj+1][ii][1] + dec_picture->mv[LIST_0][jj+1][ii+1][1] + 2)/4; + } + else // 16x16, 16x8, 8x16, 8x8 + { + pRegion->mv[0] = dec_picture->mv[LIST_0][jj][ii][0]; + pRegion->mv[1] = dec_picture->mv[LIST_0][jj][ii][1]; + // pRegion->mv[0] = dec_picture->mv[LIST_0][4*mby+(i/2)*2][4*mbx+(i%2)*2+BLOCK_SIZE][0]; + // pRegion->mv[1] = dec_picture->mv[LIST_0][4*mby+(i/2)*2][4*mbx+(i%2)*2+BLOCK_SIZE][1]; + } + erc_mvperMB += mabs(pRegion->mv[0]) + mabs(pRegion->mv[1]); + pRegion->mv[2] = dec_picture->ref_idx[LIST_0][jj][ii]; + } + } + } + else //B-frame + { + for (i=0; i<4; i++) + { + ii = 4*mbx + (i%2)*2;// + BLOCK_SIZE; + jj = 4*mby + (i/2)*2; + pRegion = currRegion + i; + pRegion->regionMode = (currMB->mb_type ==I16MB ? REGMODE_INTRA : + currMB->b8mode[i]==IBLOCK ? REGMODE_INTRA_8x8 : REGMODE_INTER_PRED_8x8); + if (currMB->mb_type==I16MB || currMB->b8mode[i]==IBLOCK) // INTRA + { + pRegion->mv[0] = 0; + pRegion->mv[1] = 0; + pRegion->mv[2] = 0; + } + else + { + int idx = (dec_picture->ref_idx[0][jj][ii]<0)?1:0; + // int idx = (currMB->b8mode[i]==0 && currMB->b8pdir[i]==2 ? LIST_0 : currMB->b8pdir[i]==1 ? LIST_1 : LIST_0); + // int idx = currMB->b8pdir[i]==0 ? LIST_0 : LIST_1; + mv = dec_picture->mv[idx]; + pRegion->mv[0] = (mv[jj][ii][0] + mv[jj][ii+1][0] + mv[jj+1][ii][0] + mv[jj+1][ii+1][0] + 2)/4; + pRegion->mv[1] = (mv[jj][ii][1] + mv[jj][ii+1][1] + mv[jj+1][ii][1] + mv[jj+1][ii+1][1] + 2)/4; + erc_mvperMB += mabs(pRegion->mv[0]) + mabs(pRegion->mv[1]); + + pRegion->mv[2] = (dec_picture->ref_idx[idx][jj][ii]); + /* + if (currMB->b8pdir[i]==0 || (currMB->b8pdir[i]==2 && currMB->b8mode[i]!=0)) // forward or bidirect + { + pRegion->mv[2] = (dec_picture->ref_idx[LIST_0][jj][ii]); + ///???? is it right, not only "img->fw_refFrArr[jj][ii-4]" + } + else + { + pRegion->mv[2] = (dec_picture->ref_idx[LIST_1][jj][ii]); + // pRegion->mv[2] = 0; + } + */ + } + } + } + } + + /*! + ************************************************************************ + * \brief + * set defaults for old_slice + * NAL unit of a picture" + ************************************************************************ + */ + void init_old_slice() + { + old_slice.field_pic_flag = 0; + + old_slice.pps_id = INT_MAX; + + old_slice.frame_num = INT_MAX; + + old_slice.nal_ref_idc = INT_MAX; + + old_slice.idr_flag = 0; + + old_slice.pic_oder_cnt_lsb = UINT_MAX; + old_slice.delta_pic_oder_cnt_bottom = INT_MAX; + + old_slice.delta_pic_order_cnt[0] = INT_MAX; + old_slice.delta_pic_order_cnt[1] = INT_MAX; + + } + + /*! + ************************************************************************ + * \brief + * save slice parameters that are needed for checking of "first VCL + * NAL unit of a picture" + ************************************************************************ + */ + void exit_slice() + { + + old_slice.pps_id = img->currentSlice->pic_parameter_set_id; + + old_slice.frame_num = img->frame_num; + + old_slice.field_pic_flag = img->field_pic_flag; + + if(img->field_pic_flag) + { + old_slice.bottom_field_flag = img->bottom_field_flag; + } + + old_slice.nal_ref_idc = img->nal_reference_idc; + + old_slice.idr_flag = img->idr_flag; + if (img->idr_flag) + { + old_slice.idr_pic_id = img->idr_pic_id; + } + + if (active_sps->pic_order_cnt_type == 0) + { + old_slice.pic_oder_cnt_lsb = img->pic_order_cnt_lsb; + old_slice.delta_pic_oder_cnt_bottom = img->delta_pic_order_cnt_bottom; + } + + if (active_sps->pic_order_cnt_type == 1) + { + old_slice.delta_pic_order_cnt[0] = img->delta_pic_order_cnt[0]; + old_slice.delta_pic_order_cnt[1] = img->delta_pic_order_cnt[1]; + } + } + + /*! + ************************************************************************ + * \brief + * detect if current slice is "first VCL NAL unit of a picture" + ************************************************************************ + */ + int is_new_picture() + { + int result=0; + + result |= (old_slice.pps_id != img->currentSlice->pic_parameter_set_id); + + result |= (old_slice.frame_num != img->frame_num); + + result |= (old_slice.field_pic_flag != img->field_pic_flag); + + if(img->field_pic_flag && old_slice.field_pic_flag) + { + result |= (old_slice.bottom_field_flag != img->bottom_field_flag); + } + + result |= (old_slice.nal_ref_idc != img->nal_reference_idc) && ((old_slice.nal_ref_idc == 0) || (img->nal_reference_idc == 0)); + + result |= ( old_slice.idr_flag != img->idr_flag); + + if (img->idr_flag && old_slice.idr_flag) + { + result |= (old_slice.idr_pic_id != img->idr_pic_id); + } + + if (active_sps->pic_order_cnt_type == 0) + { + result |= (old_slice.pic_oder_cnt_lsb != img->pic_order_cnt_lsb); + result |= (old_slice.delta_pic_oder_cnt_bottom != img->delta_pic_order_cnt_bottom); + } + + if (active_sps->pic_order_cnt_type == 1) + { + result |= (old_slice.delta_pic_order_cnt[0] != img->delta_pic_order_cnt[0]); + result |= (old_slice.delta_pic_order_cnt[1] != img->delta_pic_order_cnt[1]); + } + + return result; + } + + + /*! + ************************************************************************ + * \brief + * decodes one slice + ************************************************************************ + */ + void decode_one_slice(struct img_par *img,struct inp_par *inp) + { + + Boolean end_of_slice = FALSE; + int read_flag; + img->cod_counter=-1; + + set_ref_pic_num(); + + if (img->type == B_SLICE) + compute_colocated(Co_located, listX); + + //reset_ec_flags(); + + while (end_of_slice == FALSE) // loop over macroblocks + { + + #if TRACE + fprintf(p_trace,"\n*********** POC: %i (I/P) MB: %i Slice: %i Type %d **********\n", img->ThisPOC, img->current_mb_nr, img->current_slice_nr, img->type); + #endif + + // Initializes the current macroblock + start_macroblock(img,inp, img->current_mb_nr); + // Get the syntax elements from the NAL + read_flag = read_one_macroblock(img,inp); + decode_one_macroblock(img,inp); + + if(img->MbaffFrameFlag && dec_picture->mb_field[img->current_mb_nr]) + { + img->num_ref_idx_l0_active >>= 1; + img->num_ref_idx_l1_active >>= 1; + } + + ercWriteMBMODEandMV(img,inp); + + end_of_slice=exit_macroblock(img,inp,(!img->MbaffFrameFlag||img->current_mb_nr%2)); + } + + exit_slice(); + //reset_ec_flags(); + + } + + + void decode_slice(struct img_par *img,struct inp_par *inp, int current_header) + { + Slice *currSlice = img->currentSlice; + + if (active_pps->entropy_coding_mode_flag) + { + init_contexts (img); + cabac_new_slice(); + } + + if ( (active_pps->weighted_bipred_idc > 0 && (img->type == B_SLICE)) || (active_pps->weighted_pred_flag && img->type !=I_SLICE)) + fill_wp_params(img); + + //printf("frame picture %d %d %d\n",img->structure,img->ThisPOC,img->direct_spatial_mv_pred_flag); + + + // decode main slice information + if ((current_header == SOP || current_header == SOS) && currSlice->ei_flag == 0) + decode_one_slice(img,inp); + + // setMB-Nr in case this slice was lost + // if(currSlice->ei_flag) + // img->current_mb_nr = currSlice->last_mb_nr + 1; + + } + + + /*! + ************************************************************************ + * \brief + * Prepare field and frame buffer after frame decoding + ************************************************************************ + */ + void frame_postprocessing(struct img_par *img, struct inp_par *inp) + { + } + + /*! + ************************************************************************ + * \brief + * Prepare field and frame buffer after field decoding + ************************************************************************ + */ + void field_postprocessing(struct img_par *img, struct inp_par *inp) + { + img->number /= 2; + } + + + + void reset_wp_params(struct img_par *img) + { + int i,comp; + int log_weight_denom; + + for (i=0; iluma_log2_weight_denom : img->chroma_log2_weight_denom; + img->wp_weight[0][i][comp] = 1<wp_weight[1][i][comp] = 1<type==B_SLICE); + int max_bwd_ref, max_fwd_ref; + int tx,DistScaleFactor; + + max_fwd_ref = img->num_ref_idx_l0_active; + max_bwd_ref = img->num_ref_idx_l1_active; + + if (active_pps->weighted_bipred_idc == 2 && bframe) + { + img->luma_log2_weight_denom = 5; + img->chroma_log2_weight_denom = 5; + img->wp_round_luma = 16; + img->wp_round_chroma = 16; + + for (i=0; iluma_log2_weight_denom : img->chroma_log2_weight_denom; + img->wp_weight[0][i][comp] = 1<wp_weight[1][i][comp] = 1<wp_offset[0][i][comp] = 0; + img->wp_offset[1][i][comp] = 0; + } + } + } + + if (bframe) + { + for (i=0; iluma_log2_weight_denom : img->chroma_log2_weight_denom; + if (active_pps->weighted_bipred_idc == 1) + { + img->wbp_weight[0][i][j][comp] = img->wp_weight[0][i][comp]; + img->wbp_weight[1][i][j][comp] = img->wp_weight[1][j][comp]; + } + else if (active_pps->weighted_bipred_idc == 2) + { + td = Clip3(-128,127,listX[LIST_1][j]->poc - listX[LIST_0][i]->poc); + if (td == 0 || listX[LIST_1][j]->is_long_term || listX[LIST_0][i]->is_long_term) + { + img->wbp_weight[0][i][j][comp] = 32; + img->wbp_weight[1][i][j][comp] = 32; + } + else + { + tb = Clip3(-128,127,img->ThisPOC - listX[LIST_0][i]->poc); + + tx = (16384 + abs(td/2))/td; + DistScaleFactor = Clip3(-1024, 1023, (tx*tb + 32 )>>6); + + img->wbp_weight[1][i][j][comp] = DistScaleFactor >> 2; + img->wbp_weight[0][i][j][comp] = 64 - img->wbp_weight[1][i][j][comp]; + if (img->wbp_weight[1][i][j][comp] < -64 || img->wbp_weight[1][i][j][comp] > 128) + { + img->wbp_weight[0][i][j][comp] = 32; + img->wbp_weight[1][i][j][comp] = 32; + img->wp_offset[0][i][comp] = 0; + img->wp_offset[1][i][comp] = 0; + } + } + } + } + } + } + } + + if (bframe && img->MbaffFrameFlag) + { + for (i=0; i<2*max_fwd_ref; i++) + { + for (j=0; j<2*max_bwd_ref; j++) + { + for (comp = 0; comp<3; comp++) + { + for (k=2; k<6; k+=2) + { + img->wp_offset[k+0][i][comp] = img->wp_offset[0][i/2][comp]; + img->wp_offset[k+1][i][comp] = img->wp_offset[1][i/2][comp]; + + log_weight_denom = (comp == 0) ? img->luma_log2_weight_denom : img->chroma_log2_weight_denom; + if (active_pps->weighted_bipred_idc == 1) + { + img->wbp_weight[k+0][i][j][comp] = img->wp_weight[0][i/2][comp]; + img->wbp_weight[k+1][i][j][comp] = img->wp_weight[1][j/2][comp]; + } + else if (active_pps->weighted_bipred_idc == 2) + { + td = Clip3(-128,127,listX[k+LIST_1][j]->poc - listX[k+LIST_0][i]->poc); + if (td == 0 || listX[k+LIST_1][j]->is_long_term || listX[k+LIST_0][i]->is_long_term) + { + img->wbp_weight[k+0][i][j][comp] = 32; + img->wbp_weight[k+1][i][j][comp] = 32; + } + else + { + tb = Clip3(-128,127,((k==2)?img->toppoc:img->bottompoc) - listX[k+LIST_0][i]->poc); + + tx = (16384 + abs(td/2))/td; + DistScaleFactor = Clip3(-1024, 1023, (tx*tb + 32 )>>6); + + img->wbp_weight[k+1][i][j][comp] = DistScaleFactor >> 2; + img->wbp_weight[k+0][i][j][comp] = 64 - img->wbp_weight[k+1][i][j][comp]; + if (img->wbp_weight[k+1][i][j][comp] < -64 || img->wbp_weight[k+1][i][j][comp] > 128) + { + img->wbp_weight[k+1][i][j][comp] = 32; + img->wbp_weight[k+0][i][j][comp] = 32; + img->wp_offset[k+0][i][comp] = 0; + img->wp_offset[k+1][i][comp] = 0; + } + } + } + } + } + } + } + } + } Index: llvm-test/MultiSource/Applications/JM/ldecod/image.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/image.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/image.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,24 ---- + + /*! + ************************************************************************ + * \file image.h + * + * \brief + * prototypes for image.c + * + ************************************************************************ + */ + + #ifndef _IMAGE_H_ + #define _IMAGE_H_ + + #include "mbuffer.h" + + extern StorablePicture *dec_picture; + + void find_snr(struct snr_par *snr, StorablePicture *p, int p_ref); + void get_block(int ref_frame, StorablePicture **list, int x_pos, int y_pos, struct img_par *img, int block[BLOCK_SIZE][BLOCK_SIZE]); + int picture_order(struct img_par *img); + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/ldecod.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/ldecod.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/ldecod.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,940 ---- + + /*! + *********************************************************************** + * \mainpage + * This is the H.264/AVC decoder reference software. For detailed documentation + * see the comments in each file. + * + * \author + * The main contributors are listed in contributors.h + * + * \version + * JM 10.1 (FRExt) + * + * \note + * tags are used for document system "doxygen" + * available at http://www.doxygen.org + */ + /*! + * \file + * ldecod.c + * \brief + * H.264/AVC reference decoder project main() + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Lang?y + * - Rickard Sjoberg + * - Stephan Wenger + * - Jani Lainema + * - Sebastian Purreiter + * - Byeong-Moon Jeon + * - Gabi Blaettermann + * - Ye-Kui Wang + * - Valeri George + * - Karsten Suehring + * + *********************************************************************** + */ + + #include "contributors.h" + + #include + #include + #include + #include + #include + + #if defined WIN32 + #include + #else + #include + #endif + #include + #include + + + #include + + #include "global.h" + #include "rtp.h" + #include "memalloc.h" + #include "mbuffer.h" + #include "leaky_bucket.h" + #include "fmo.h" + #include "annexb.h" + #include "output.h" + #include "cabac.h" + + #include "erc_api.h" + + #define JM "10 (FRExt)" + #define VERSION "10.1" + #define EXT_VERSION "(FRExt)" + + #define LOGFILE "log.dec" + #define DATADECFILE "dataDec.txt" + #define TRACEFILE "trace_dec.txt" + + extern objectBuffer_t *erc_object_list; + extern ercVariables_t *erc_errorVar; + extern ColocatedParams *Co_located; + + // I have started to move the inp and img structures into global variables. + // They are declared in the following lines. Since inp is defined in conio.h + // and cannot be overridden globally, it is defined here as input + // + // Everywhere, input-> and img-> can now be used either globally or with + // the local override through the formal parameter mechanism + + extern FILE* bits; + extern StorablePicture* dec_picture; + + struct inp_par *input; //!< input parameters from input configuration file + struct snr_par *snr; //!< statistics + struct img_par *img; //!< image parameters + + int global_init_done = 0; + + /*! + *********************************************************************** + * \brief + * print help message and exit + *********************************************************************** + */ + void JMDecHelpExit () + { + fprintf( stderr, "\n ldecod [-h] {[defdec.cfg] | {[-i bitstream.264]...[-o output.yuv] [-r reference.yuv] [-uv]}}\n\n" + "## Parameters\n\n" + + "## Options\n" + " -h : prints function usage\n" + " : parse for decoder operation.\n" + " -i : Input file name. \n" + " -o : Output file name. If not specified default output is set as test_dec.yuv\n\n" + " -r : Reference file name. If not specified default output is set as test_rec.yuv\n\n" + " -uv : write chroma components for monochrome streams(4:2:0)\n\n" + + "## Supported video file formats\n" + " Input : .264 -> H.264 bitstream files. \n" + " Output: .yuv -> RAW file. Format depends on bitstream information. \n\n" + + "## Examples of usage:\n" + " ldecod\n" + " ldecod -h\n" + " ldecod default.cfg\n" + " ldecod -i bitstream.264 -o output.yuv -r reference.yuv\n"); + + exit(-1); + } + + + void Configure(int ac, char *av[]) + { + int CLcount; + char *config_filename=NULL; + CLcount = 1; + + + strcpy(input->infile,"test.264"); //! set default bitstream name + strcpy(input->outfile,"test_dec.yuv"); //! set default output file name + strcpy(input->reffile,"test_rec.yuv"); //! set default reference file name + input->FileFormat = PAR_OF_ANNEXB; + input->ref_offset=0; + input->poc_scale=1; + + #ifdef _LEAKYBUCKET_ + input->R_decoder=500000; //! Decoder rate + input->B_decoder=104000; //! Decoder buffer size + input->F_decoder=73000; //! Decoder initial delay + strcpy(input->LeakyBucketParamFile,"leakybucketparam.cfg"); // file where Leaky Bucket params (computed by encoder) are stored + #endif + + if (ac==2) + { + if (0 == strncmp (av[1], "-h", 2)) + { + JMDecHelpExit(); + } + else + { + config_filename=av[1]; + init_conf(input, av[1]); + } + CLcount=2; + } + + if (ac>=3) + { + if (0 == strncmp (av[1], "-i", 2)) + { + strcpy(input->infile,av[2]); + CLcount = 3; + } + if (0 == strncmp (av[1], "-h", 2)) + { + JMDecHelpExit(); + } + } + + // Parse the command line + + while (CLcount < ac) + { + if (0 == strncmp (av[CLcount], "-h", 2)) + { + JMDecHelpExit(); + } + + if (0 == strncmp (av[CLcount], "-i", 2)) //! Input file + { + strcpy(input->infile,av[CLcount+1]); + CLcount += 2; + } + else if (0 == strncmp (av[CLcount], "-o", 2)) //! Output File + { + strcpy(input->outfile,av[CLcount+1]); + CLcount += 2; + } + else if (0 == strncmp (av[CLcount], "-r", 2)) //! Reference File + { + strcpy(input->reffile,av[CLcount+1]); + CLcount += 2; + } + else if (0 == strncmp (av[CLcount], "-uv", 2)) //! indicate UV writing for 4:0:0 + { + input->write_uv = 1; + CLcount ++; + } + else + { + //config_filename=av[CLcount]; + //init_conf(input, config_filename); + snprintf(errortext, ET_SIZE, "Invalid syntax. Use ldecod -h for proper usage"); + error(errortext, 300); + } + } + + + #if TRACE + if ((p_trace=fopen(TRACEFILE,"w"))==0) // append new statistic at the end + { + snprintf(errortext, ET_SIZE, "Error open file %s!",TRACEFILE); + error(errortext,500); + } + #endif + + + if ((p_out=open(input->outfile, OPENFLAGS_WRITE, OPEN_PERMISSIONS))==-1) + { + snprintf(errortext, ET_SIZE, "Error open file %s ",input->outfile); + error(errortext,500); + } + /* if ((p_out2=fopen("out.yuv","wb"))==0) + { + snprintf(errortext, ET_SIZE, "Error open file %s ",input->outfile); + error(errortext,500); + }*/ + + + fprintf(stdout,"----------------------------- JM %s %s -----------------------------\n", VERSION, EXT_VERSION); + fprintf(stdout," Decoder config file : %s \n",config_filename); + fprintf(stdout,"--------------------------------------------------------------------------\n"); + fprintf(stdout," Input H.264 bitstream : %s \n",input->infile); + fprintf(stdout," Output decoded YUV : %s \n",input->outfile); + fprintf(stdout," Output status file : %s \n",LOGFILE); + if ((p_ref=open(input->reffile,OPENFLAGS_READ))==-1) + { + fprintf(stdout," Input reference file : %s does not exist \n",input->reffile); + fprintf(stdout," SNR values are not available\n"); + } + else + fprintf(stdout," Input reference file : %s \n",input->reffile); + + fprintf(stdout,"--------------------------------------------------------------------------\n"); + #ifdef _LEAKYBUCKET_ + fprintf(stdout," Rate_decoder : %8ld \n",input->R_decoder); + fprintf(stdout," B_decoder : %8ld \n",input->B_decoder); + fprintf(stdout," F_decoder : %8ld \n",input->F_decoder); + fprintf(stdout," LeakyBucketParamFile: %s \n",input->LeakyBucketParamFile); // Leaky Bucket Param file + calc_buffer(input); + fprintf(stdout,"--------------------------------------------------------------------------\n"); + #endif + fprintf(stdout,"POC must = frame# or field# for SNRs to be correct\n"); + fprintf(stdout,"--------------------------------------------------------------------------\n"); + fprintf(stdout," Frame POC Pic# QP SnrY SnrU SnrV Y:U:V Time(ms)\n"); + fprintf(stdout,"--------------------------------------------------------------------------\n"); + + } + + /*! + *********************************************************************** + * \brief + * main function for TML decoder + *********************************************************************** + */ + int main(int argc, char **argv) + { + // allocate memory for the structures + if ((input = (struct inp_par *)calloc(1, sizeof(struct inp_par)))==NULL) no_mem_exit("main: input"); + if ((snr = (struct snr_par *)calloc(1, sizeof(struct snr_par)))==NULL) no_mem_exit("main: snr"); + if ((img = (struct img_par *)calloc(1, sizeof(struct img_par)))==NULL) no_mem_exit("main: img"); + + + Configure (argc, argv); + + init_old_slice(); + + switch (input->FileFormat) + { + case 0: + OpenBitstreamFile (input->infile); + break; + case 1: + OpenRTPFile (input->infile); + break; + default: + printf ("Unsupported file format %d, exit\n", input->FileFormat); + } + + // Allocate Slice data struct + malloc_slice(input,img); + + init(img); + + dec_picture = NULL; + + dpb.init_done = 0; + g_nFrame = 0; + + init_out_buffer(); + + img->idr_psnr_number=input->ref_offset; + img->psnr_number=0; + + img->number=0; + img->type = I_SLICE; + img->dec_ref_pic_marking_buffer = NULL; + + // B pictures + Bframe_ctr=snr->frame_ctr=0; + + // time for total decoding session + tot_time = 0; + while (decode_one_frame(img, input, snr) != EOS) + ; + + report(input, img, snr); + free_slice(input,img); + FmoFinit(); + free_global_buffers(); + + flush_dpb(); + + #ifdef PAIR_FIELDS_IN_OUTPUT + flush_pending_output(p_out); + #endif + + CloseBitstreamFile(); + + close(p_out); + // fclose(p_out2); + if (p_ref!=-1) + close(p_ref); + #if TRACE + fclose(p_trace); + #endif + + ercClose(erc_errorVar); + + free_dpb(); + uninit_out_buffer(); + + free_colocated(Co_located); + free (input); + free (snr); + free (img); + + //while( !kbhit() ); + return 0; + } + + + /*! + *********************************************************************** + * \brief + * Initilize some arrays + *********************************************************************** + */ + void init(struct img_par *img) //!< image parameters + { + img->oldFrameSizeInMbs = -1; + + imgY_ref = NULL; + imgUV_ref = NULL; + } + + /*! + *********************************************************************** + * \brief + * Initilize FREXT variables + *********************************************************************** + */ + void init_frext(struct img_par *img) //!< image parameters + { + //pel bitdepth init + img->bitdepth_luma_qp_scale = 6*(img->bitdepth_luma - 8); + if(img->bitdepth_luma > img->bitdepth_chroma || active_sps->chroma_format_idc == YUV400) + img->pic_unit_bitsize_on_disk = (img->bitdepth_luma > 8)? 16:8; + else + img->pic_unit_bitsize_on_disk = (img->bitdepth_chroma > 8)? 16:8; + img->dc_pred_value = 1<<(img->bitdepth_luma - 1); + img->max_imgpel_value = (1<bitdepth_luma) - 1; + + if (active_sps->chroma_format_idc != YUV400) + { + //for chrominance part + img->bitdepth_chroma_qp_scale = 6*(img->bitdepth_chroma - 8); + img->max_imgpel_value_uv = (1<bitdepth_chroma) - 1; + img->num_blk8x8_uv = (1<chroma_format_idc)&(~(0x1)); + img->num_cdc_coeff = img->num_blk8x8_uv<<1; + img->mb_cr_size_x = (active_sps->chroma_format_idc==YUV420 || active_sps->chroma_format_idc==YUV422)? 8:16; + img->mb_cr_size_y = (active_sps->chroma_format_idc==YUV444 || active_sps->chroma_format_idc==YUV422)? 16:8; + + // Residue Color Transform + if(img->residue_transform_flag) + img->bitdepth_chroma_qp_scale += 6; + } + else + { + img->bitdepth_chroma_qp_scale = 0; + img->max_imgpel_value_uv = 0; + img->num_blk8x8_uv = 0; + img->num_cdc_coeff = 0; + img->mb_cr_size_x = 0; + img->mb_cr_size_y = 0; + } + + } + + + /*! + ************************************************************************ + * \brief + * Read input from configuration file + * + * \par Input: + * Name of configuration filename + * + * \par Output + * none + ************************************************************************ + */ + void init_conf(struct inp_par *inp, char *config_filename) + { + FILE *fd; + int NAL_mode; + + // picture error concealment + int temp; + char tempval[100]; + + // read the decoder configuration file + if((fd=fopen(config_filename,"r")) == NULL) + { + snprintf(errortext, ET_SIZE, "Error: Control file %s not found\n",config_filename); + error(errortext, 300); + } + + fscanf(fd,"%s",inp->infile); // H.264 compressed input bitstream + fscanf(fd,"%*[^\n]"); + + fscanf(fd,"%s",inp->outfile); // RAW (YUV/RGB) output file + fscanf(fd,"%*[^\n]"); + + fscanf(fd,"%s",inp->reffile); // reference file + fscanf(fd,"%*[^\n]"); + + fscanf(fd,"%d",&(inp->write_uv)); // write UV in YUV 4:0:0 mode + fscanf(fd,"%*[^\n]"); + + fscanf(fd,"%d",&(NAL_mode)); // NAL mode + fscanf(fd,"%*[^\n]"); + + switch(NAL_mode) + { + case 0: + inp->FileFormat = PAR_OF_ANNEXB; + break; + case 1: + inp->FileFormat = PAR_OF_RTP; + break; + default: + snprintf(errortext, ET_SIZE, "NAL mode %i is not supported", NAL_mode); + error(errortext,400); + } + + fscanf(fd,"%d,",&inp->ref_offset); // offset used for SNR computation + fscanf(fd,"%*[^\n]"); + + fscanf(fd,"%d,",&inp->poc_scale); // offset used for SNR computation + fscanf(fd,"%*[^\n]"); + + + if (inp->poc_scale < 1 || inp->poc_scale > 10) + { + snprintf(errortext, ET_SIZE, "Poc Scale is %d. It has to be within range 1 to 10",inp->poc_scale); + error(errortext,1); + } + + inp->write_uv=1; + + // picture error concealment + img->conceal_mode = inp->conceal_mode = 0; + img->ref_poc_gap = inp->ref_poc_gap = 2; + img->poc_gap = inp->poc_gap = 2; + + #ifdef _LEAKYBUCKET_ + fscanf(fd,"%ld,",&inp->R_decoder); // Decoder rate + fscanf(fd, "%*[^\n]"); + fscanf(fd,"%ld,",&inp->B_decoder); // Decoder buffer size + fscanf(fd, "%*[^\n]"); + fscanf(fd,"%ld,",&inp->F_decoder); // Decoder initial delay + fscanf(fd, "%*[^\n]"); + fscanf(fd,"%s",inp->LeakyBucketParamFile); // file where Leaky Bucket params (computed by encoder) are stored + fscanf(fd,"%*[^\n]"); + #endif + + /* since error concealment parameters are added at the end of + decoder conf file we need to read the leakybucket params to get to + those parameters */ + #ifndef _LEAKYBUCKET_ + fscanf(fd,"%ld,",&temp); + fscanf(fd, "%*[^\n]"); + fscanf(fd,"%ld,",&temp); + fscanf(fd, "%*[^\n]"); + fscanf(fd,"%ld,",&temp); + fscanf(fd, "%*[^\n]"); + fscanf(fd,"%s",tempval); + fscanf(fd,"%*[^\n]"); + #endif + + fscanf(fd,"%d",&inp->conceal_mode); // Mode of Error Concealment + fscanf(fd,"%*[^\n]"); + img->conceal_mode = inp->conceal_mode; + fscanf(fd,"%d",&inp->ref_poc_gap); // POC gap depending on pattern + fscanf(fd,"%*[^\n]"); + img->ref_poc_gap = inp->ref_poc_gap; + fscanf(fd,"%d",&inp->poc_gap); // POC gap between consecutive frames in display order + fscanf(fd,"%*[^\n]"); + img->poc_gap = inp->poc_gap; + + fclose (fd); + } + + /*! + ************************************************************************ + * \brief + * Reports the gathered information to appropriate outputs + * + * \par Input: + * struct inp_par *inp, + * struct img_par *img, + * struct snr_par *stat + * + * \par Output: + * None + ************************************************************************ + */ + void report(struct inp_par *inp, struct img_par *img, struct snr_par *snr) + { + #define OUTSTRING_SIZE 255 + char string[OUTSTRING_SIZE]; + FILE *p_log; + char yuv_formats[4][4]= { {"400"}, {"420"}, {"422"}, {"444"} }; + + #ifndef WIN32 + time_t now; + struct tm *l_time; + #else + char timebuf[128]; + #endif + + fprintf(stdout,"-------------------- Average SNR all frames ------------------------------\n"); + fprintf(stdout," SNR Y(dB) : %5.2f\n",snr->snr_ya); + fprintf(stdout," SNR U(dB) : %5.2f\n",snr->snr_ua); + fprintf(stdout," SNR V(dB) : %5.2f\n",snr->snr_va); + fprintf(stdout," Total decoding time : %.3f sec \n",tot_time*0.001); + fprintf(stdout,"--------------------------------------------------------------------------\n"); + fprintf(stdout," Exit JM %s decoder, ver %s ",JM, VERSION); + fprintf(stdout,"\n"); + // write to log file + + snprintf(string, OUTSTRING_SIZE, "%s", LOGFILE); + if ((p_log=fopen(string,"r"))==0) // check if file exist + { + if ((p_log=fopen(string,"a"))==0) + { + snprintf(errortext, ET_SIZE, "Error open file %s for appending",string); + error(errortext, 500); + } + else // Create header to new file + { + fprintf(p_log," -------------------------------------------------------------------------------------------------------------------\n"); + fprintf(p_log,"| Decoder statistics. This file is made first time, later runs are appended |\n"); + fprintf(p_log," ------------------------------------------------------------------------------------------------------------------- \n"); + fprintf(p_log,"| ver | Date | Time | Sequence |#Img| Format | YUV |Coding|SNRY 1|SNRU 1|SNRV 1|SNRY N|SNRU N|SNRV N|\n"); + fprintf(p_log," -------------------------------------------------------------------------------------------------------------------\n"); + } + } + else + { + fclose(p_log); + p_log=fopen(string,"a"); // File exist,just open for appending + } + + fprintf(p_log,"|%s/%-4s", VERSION, EXT_VERSION); + + #ifdef WIN32 + _strdate( timebuf ); + fprintf(p_log,"| %1.5s |",timebuf ); + + _strtime( timebuf); + fprintf(p_log," % 1.5s |",timebuf); + #else + now = time ((time_t *) NULL); // Get the system time and put it into 'now' as 'calender time' + time (&now); + l_time = localtime (&now); + strftime (string, sizeof string, "%d-%b-%Y", l_time); + fprintf(p_log,"| %1.5s |",string ); + + strftime (string, sizeof string, "%H:%M:%S", l_time); + fprintf(p_log,"| %1.5s |",string ); + #endif + + fprintf(p_log,"%20.20s|",inp->infile); + + fprintf(p_log,"%3d |",img->number); + fprintf(p_log,"%4dx%-4d|", img->width, img->height); + fprintf(p_log," %s |", &(yuv_formats[img->yuv_format][0])); + + if (active_pps->entropy_coding_mode_flag == UVLC) + fprintf(p_log," CAVLC|"); + else + fprintf(p_log," CABAC|"); + + + fprintf(p_log,"%6.3f|",snr->snr_y1); + fprintf(p_log,"%6.3f|",snr->snr_u1); + fprintf(p_log,"%6.3f|",snr->snr_v1); + fprintf(p_log,"%6.3f|",snr->snr_ya); + fprintf(p_log,"%6.3f|",snr->snr_ua); + fprintf(p_log,"%6.3f|\n",snr->snr_va); + + fclose(p_log); + + snprintf(string, OUTSTRING_SIZE,"%s", DATADECFILE); + p_log=fopen(string,"a"); + + if(Bframe_ctr != 0) // B picture used + { + fprintf(p_log, "%3d %2d %2d %2.2f %2.2f %2.2f %5d " + "%2.2f %2.2f %2.2f %5d " + "%2.2f %2.2f %2.2f %5d %.3f\n", + img->number, 0, img->qp, + snr->snr_y1, + snr->snr_u1, + snr->snr_v1, + 0, + 0.0, + 0.0, + 0.0, + 0, + snr->snr_ya, + snr->snr_ua, + snr->snr_va, + 0, + (double)0.001*tot_time/(img->number+Bframe_ctr-1)); + } + else + { + fprintf(p_log, "%3d %2d %2d %2.2f %2.2f %2.2f %5d " + "%2.2f %2.2f %2.2f %5d " + "%2.2f %2.2f %2.2f %5d %.3f\n", + img->number, 0, img->qp, + snr->snr_y1, + snr->snr_u1, + snr->snr_v1, + 0, + 0.0, + 0.0, + 0.0, + 0, + snr->snr_ya, + snr->snr_ua, + snr->snr_va, + 0, + (double)0.001*tot_time/img->number); + } + fclose(p_log); + } + + /*! + ************************************************************************ + * \brief + * Allocates a stand-alone partition structure. Structure should + * be freed by FreePartition(); + * data structures + * + * \par Input: + * n: number of partitions in the array + * \par return + * pointer to DataPartition Structure, zero-initialized + ************************************************************************ + */ + + DataPartition *AllocPartition(int n) + { + DataPartition *partArr, *dataPart; + int i; + + partArr = (DataPartition *) calloc(n, sizeof(DataPartition)); + if (partArr == NULL) + { + snprintf(errortext, ET_SIZE, "AllocPartition: Memory allocation for Data Partition failed"); + error(errortext, 100); + } + + for (i=0; ibitstream = (Bitstream *) calloc(1, sizeof(Bitstream)); + if (dataPart->bitstream == NULL) + { + snprintf(errortext, ET_SIZE, "AllocPartition: Memory allocation for Bitstream failed"); + error(errortext, 100); + } + dataPart->bitstream->streamBuffer = (byte *) calloc(MAX_CODED_FRAME_SIZE, sizeof(byte)); + if (dataPart->bitstream->streamBuffer == NULL) + { + snprintf(errortext, ET_SIZE, "AllocPartition: Memory allocation for streamBuffer failed"); + error(errortext, 100); + } + } + return partArr; + } + + + + + /*! + ************************************************************************ + * \brief + * Frees a partition structure (array). + * + * \par Input: + * Partition to be freed, size of partition Array (Number of Partitions) + * + * \par return + * None + * + * \note + * n must be the same as for the corresponding call of AllocPartition + ************************************************************************ + */ + + + void FreePartition (DataPartition *dp, int n) + { + int i; + + assert (dp != NULL); + assert (dp->bitstream != NULL); + assert (dp->bitstream->streamBuffer != NULL); + for (i=0; istreamBuffer); + free (dp[i].bitstream); + } + free (dp); + } + + + /*! + ************************************************************************ + * \brief + * Allocates the slice structure along with its dependent + * data structures + * + * \par Input: + * Input Parameters struct inp_par *inp, struct img_par *img + ************************************************************************ + */ + void malloc_slice(struct inp_par *inp, struct img_par *img) + { + Slice *currSlice; + + img->currentSlice = (Slice *) calloc(1, sizeof(Slice)); + if ( (currSlice = img->currentSlice) == NULL) + { + snprintf(errortext, ET_SIZE, "Memory allocation for Slice datastruct in NAL-mode %d failed", inp->FileFormat); + error(errortext,100); + } + // img->currentSlice->rmpni_buffer=NULL; + //! you don't know whether we do CABAC hre, hence initialize CABAC anyway + // if (inp->symbol_mode == CABAC) + if (1) + { + // create all context models + currSlice->mot_ctx = create_contexts_MotionInfo(); + currSlice->tex_ctx = create_contexts_TextureInfo(); + } + currSlice->max_part_nr = 3; //! assume data partitioning (worst case) for the following mallocs() + currSlice->partArr = AllocPartition(currSlice->max_part_nr); + } + + + /*! + ************************************************************************ + * \brief + * Memory frees of the Slice structure and of its dependent + * data structures + * + * \par Input: + * Input Parameters struct inp_par *inp, struct img_par *img + ************************************************************************ + */ + void free_slice(struct inp_par *inp, struct img_par *img) + { + Slice *currSlice = img->currentSlice; + + FreePartition (currSlice->partArr, 3); + // if (inp->symbol_mode == CABAC) + if (1) + { + // delete all context models + delete_contexts_MotionInfo(currSlice->mot_ctx); + delete_contexts_TextureInfo(currSlice->tex_ctx); + } + free(img->currentSlice); + + currSlice = NULL; + } + + /*! + ************************************************************************ + * \brief + * Dynamic memory allocation of frame size related global buffers + * buffers are defined in global.h, allocated memory must be freed in + * void free_global_buffers() + * + * \par Input: + * Input Parameters struct inp_par *inp, Image Parameters struct img_par *img + * + * \par Output: + * Number of allocated bytes + *********************************************************************** + */ + int init_global_buffers() + { + int memory_size=0; + int quad_range, i; + + if (global_init_done) + { + free_global_buffers(); + } + + // allocate memory for reference frame in find_snr + memory_size += get_mem2Dpel(&imgY_ref, img->height, img->width); + + if (active_sps->chroma_format_idc != YUV400) + memory_size += get_mem3Dpel(&imgUV_ref, 2, img->height_cr, img->width_cr); + else + imgUV_ref=NULL; + + // allocate memory in structure img + if(((img->mb_data) = (Macroblock *) calloc(img->FrameSizeInMbs, sizeof(Macroblock))) == NULL) + no_mem_exit("init_global_buffers: img->mb_data"); + + if(((img->intra_block) = (int*)calloc(img->FrameSizeInMbs, sizeof(int))) == NULL) + no_mem_exit("init_global_buffers: img->intra_block"); + + memory_size += get_mem2Dint(&(img->ipredmode), 4*img->PicWidthInMbs , 4*img->FrameHeightInMbs); + + memory_size += get_mem2Dint(&(img->field_anchor),4*img->FrameHeightInMbs, 4*img->PicWidthInMbs); + + memory_size += get_mem3Dint(&(img->wp_weight), 2, MAX_REFERENCE_PICTURES, 3); + memory_size += get_mem3Dint(&(img->wp_offset), 6, MAX_REFERENCE_PICTURES, 3); + memory_size += get_mem4Dint(&(img->wbp_weight), 6, MAX_REFERENCE_PICTURES, MAX_REFERENCE_PICTURES, 3); + + // CAVLC mem + memory_size += get_mem3Dint(&(img->nz_coeff), img->FrameSizeInMbs, 4, 4 + img->num_blk8x8_uv); + + memory_size += get_mem2Dint(&(img->siblock),img->PicWidthInMbs , img->FrameHeightInMbs); + + if(img->max_imgpel_value > img->max_imgpel_value_uv || active_sps->chroma_format_idc == YUV400) + quad_range = (img->max_imgpel_value + 1) * 2; + else + quad_range = (img->max_imgpel_value_uv + 1) * 2; + + if ((img->quad = (int*)calloc (quad_range, sizeof(int))) == NULL) + no_mem_exit ("init_img: img->quad"); + + for (i=0; i < quad_range/2; ++i) + { + img->quad[i]=i*i; + } + + global_init_done = 1; + + img->oldFrameSizeInMbs = img->FrameSizeInMbs; + + return (memory_size); + } + + /*! + ************************************************************************ + * \brief + * Free allocated memory of frame size related global buffers + * buffers are defined in global.h, allocated memory is allocated in + * int init_global_buffers() + * + * \par Input: + * Input Parameters struct inp_par *inp, Image Parameters struct img_par *img + * + * \par Output: + * none + * + ************************************************************************ + */ + void free_global_buffers() + { + free_mem2Dpel (imgY_ref); + if (imgUV_ref) + free_mem3Dpel (imgUV_ref,2); + + // CAVLC free mem + free_mem3Dint(img->nz_coeff, img->oldFrameSizeInMbs); + + free_mem2Dint(img->siblock); + + // free mem, allocated for structure img + if (img->mb_data != NULL) free(img->mb_data); + + free (img->intra_block); + + free_mem2Dint (img->ipredmode); + + free_mem2Dint(img->field_anchor); + + free_mem3Dint(img->wp_weight, 2); + free_mem3Dint(img->wp_offset, 6); + free_mem4Dint(img->wbp_weight, 6, MAX_REFERENCE_PICTURES); + + free (img->quad); + + global_init_done = 0; + + } + + Index: llvm-test/MultiSource/Applications/JM/ldecod/leaky_bucket.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/leaky_bucket.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/leaky_bucket.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,138 ---- + + /*! + ************************************************************************ + * \file leaky_bucket.c + * + * \brief + * Calculate if decoder leaky bucket parameters meets HRD constraints specified by encoder. + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Shankar Regunathan + ************************************************************************ + */ + + #include "contributors.h" + #include "global.h" + #include "stdlib.h" + + #ifdef _LEAKYBUCKET_ + /*! + *********************************************************************** + * \brief + * Function to get unsigned long word from a file. + * \param fp + * Filepointer + * \return + * unsigned long double word + * \par SideEffects + * None. + * \par Notes + * File should be opened to read in binary format. + * \author + * Shankar Regunathan shanre at microsoft.com + * \date + * December 06, 2001. + *********************************************************************** + */ + /* gets unsigned double stored in Big Endian Order */ + unsigned long GetBigDoubleWord(FILE *fp) + { + register unsigned long dw; + dw = (unsigned long) (fgetc(fp) & 0xFF); + dw = ((unsigned long) (fgetc(fp) & 0xFF)) | (dw << 0x08); + dw = ((unsigned long) (fgetc(fp) & 0xFF)) | (dw << 0x08); + dw = ((unsigned long) (fgetc(fp) & 0xFF)) | (dw << 0x08); + return(dw); + } + + /*! + *********************************************************************** + * \brief + * Calculates if decoder leaky bucket parameters meets HRD constraints specified by encoder. + * \param inp + * Structure which contains decoder leaky bucket parameters. + * \return + * None + * \par SideEffects + * None. + * \par Notes + * Failure if LeakyBucketParam file is missing or if it does not have + * the correct number of entries. + * \author + * Shankar Regunathan shanre at microsoft.com + * \date + * December 06, 2001. + *********************************************************************** + */ + + /* Main Routine to verify HRD compliance */ + void calc_buffer(struct inp_par *inp) + { + unsigned long NumberLeakyBuckets, *Rmin, *Bmin, *Fmin; + float B_interp, F_interp; + unsigned long iBucket; + float dnr, frac1, frac2; + unsigned long R_decoder, B_decoder, F_decoder; + FILE *outf; + + if ((outf=fopen(inp->LeakyBucketParamFile,"rb"))==NULL) + { + snprintf(errortext, ET_SIZE, "Error open file %s \n",inp->LeakyBucketParamFile); + error(errortext,1); + } + + NumberLeakyBuckets = GetBigDoubleWord(outf); + printf(" Number Leaky Buckets: %8ld \n\n", NumberLeakyBuckets); + Rmin = calloc(sizeof(unsigned long), NumberLeakyBuckets); + Bmin = calloc(sizeof(unsigned long), NumberLeakyBuckets); + Fmin = calloc(sizeof(unsigned long), NumberLeakyBuckets); + + for(iBucket =0; iBucket < NumberLeakyBuckets; iBucket++) + { + Rmin[iBucket] = GetBigDoubleWord(outf); + Bmin[iBucket] = GetBigDoubleWord(outf); + Fmin[iBucket] = GetBigDoubleWord(outf); + printf(" %8ld %8ld %8ld \n", Rmin[iBucket], Bmin[iBucket], Fmin[iBucket]); + } + fclose(outf); + + R_decoder = inp->R_decoder; + F_decoder = inp->F_decoder; + B_decoder = inp->B_decoder; + + for( iBucket =0; iBucket < NumberLeakyBuckets; iBucket++) + { + if(R_decoder < Rmin[iBucket]) + break; + } + + printf("\n"); + if(iBucket > 0 ) { + if(iBucket < NumberLeakyBuckets) { + dnr = (float) (Rmin[iBucket] - Rmin[iBucket-1]); + frac1 = (float) (R_decoder - Rmin[iBucket-1]); + frac2 = (float) (Rmin[iBucket] - R_decoder); + B_interp = (float) (Bmin[iBucket] * frac1 + Bmin[iBucket-1] * frac2) /dnr; + F_interp = (float) (Fmin[iBucket] * frac1 + Fmin[iBucket-1] * frac2) /dnr; + } + else { + B_interp = (float) Bmin[iBucket-1]; + F_interp = (float) Fmin[iBucket-1]; + } + printf(" Min.buffer %8.2f Decoder buffer size %ld \n Minimum Delay %8.2f DecoderDelay %ld \n", B_interp, B_decoder, F_interp, F_decoder); + if(B_decoder > B_interp && F_decoder > F_interp) + printf(" HRD Compliant \n"); + else + printf(" HRD Non Compliant \n"); + } + else { // (iBucket = 0) + printf(" Decoder Rate is too small; HRD cannot be verified \n"); + } + + free(Rmin); + free(Bmin); + free(Fmin); + return; + } + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/leaky_bucket.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/leaky_bucket.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/leaky_bucket.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,26 ---- + + /*! + ************************************************************************************* + * \file leaky_bucket.h + * + * \brief + * Header for Leaky Buffer parameters + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Shankar Regunathan + ************************************************************************************* + */ + #ifndef _LEAKY_BUCKET_H_ + #define _LEAKY_BUCKET_H_ + + #include "global.h" + + #ifdef _LEAKYBUCKET_ + // Leaky Bucket functions + unsigned long GetBigDoubleWord(FILE *fp); + void calc_buffer(struct inp_par *inp); + #endif + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/loopFilter.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/loopFilter.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/loopFilter.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,478 ---- + + /*! + ************************************************************************************* + * \file loopFilter.c + * + * \brief + * Filter to reduce blocking artifacts on a macroblock level. + * The filter strength is QP dependent. + * + * \author + * Contributors: + * - Peter List Peter.List at t-systems.de: Original code (13-Aug-2001) + * - Jani Lainema Jani.Lainema at nokia.com: Some bug fixing, removal of recusiveness (16-Aug-2001) + * - Peter List Peter.List at t-systems.de: inplace filtering and various simplifications (10-Jan-2002) + * - Anthony Joch anthony at ubvideo.com: Simplified switching between filters and + * non-recursive default filter. (08-Jul-2002) + * - Cristina Gomila cristina.gomila at thomson.net: Simplification of the chroma deblocking + * from JVT-E089 (21-Nov-2002) + ************************************************************************************* + */ + #include + #include + #include + #include "global.h" + #include "image.h" + #include "mb_access.h" + #include "loopfilter.h" + + extern const byte QP_SCALE_CR[52] ; + + byte mixedModeEdgeFlag, fieldModeFilteringFlag; + + /*********************************************************************************************************/ + + #define IClip( Min, Max, Val) (((Val)<(Min))? (Min):(((Val)>(Max))? (Max):(Val))) + + // NOTE: In principle, the alpha and beta tables are calculated with the formulas below + // Alpha( qp ) = 0.8 * (2^(qp/6) - 1) + // Beta ( qp ) = 0.5 * qp - 7 + + // The tables actually used have been "hand optimized" though (by Anthony Joch). So, the + // table values might be a little different to formula-generated values. Also, the first + // few values of both tables is set to zero to force the filter off at low qp?s + + byte ALPHA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,4,5,6, 7,8,9,10,12,13,15,17, 20,22,25,28,32,36,40,45, 50,56,63,71,80,90,101,113, 127,144,162,182,203,226,255,255} ; + byte BETA_TABLE[52] = {0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,2,2,2,3, 3,3,3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9,10,10, 11,11,12,12,13,13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18} ; + byte CLIP_TAB[52][5] = + { + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0},{ 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 0, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 0, 1, 1, 1},{ 0, 1, 1, 1, 1}, + { 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 1, 1},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 1, 2, 2},{ 0, 1, 2, 3, 3}, + { 0, 1, 2, 3, 3},{ 0, 2, 2, 3, 3},{ 0, 2, 2, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 2, 3, 4, 4},{ 0, 3, 3, 5, 5},{ 0, 3, 4, 6, 6},{ 0, 3, 4, 6, 6}, + { 0, 4, 5, 7, 7},{ 0, 4, 5, 8, 8},{ 0, 4, 6, 9, 9},{ 0, 5, 7,10,10},{ 0, 6, 8,11,11},{ 0, 6, 8,13,13},{ 0, 7,10,14,14},{ 0, 8,11,16,16}, + { 0, 9,12,18,18},{ 0,10,13,20,20},{ 0,11,15,23,23},{ 0,13,17,25,25} + } ; + + char chroma_edge[2][4][4] = //[dir][edge][yuv_format] + { { {-1, 0, 0, 0}, + {-1,-1,-1, 1}, + {-1, 1, 1, 2}, + {-1,-1,-1, 3}}, + + { {-1, 0, 0, 0}, + {-1,-1, 1, 1}, + {-1, 1, 2, 2}, + {-1,-1, 3, 3}}}; + + void GetStrength(byte Strength[16],struct img_par *img,int MbQAddr,int dir,int edge, int mvlimit,StorablePicture *p); + void EdgeLoop(imgpel** Img, byte Strength[16],struct img_par *img, int MbQAddr, int AlphaC0Offset, int BetaOffset, int dir, int edge, int width, int yuv, int uv, StorablePicture *p); + void DeblockMb(ImageParameters *img, StorablePicture *p, int MbQAddr) ; + + /*! + ***************************************************************************************** + * \brief + * Filter all macroblocks in order of increasing macroblock address. + ***************************************************************************************** + */ + void DeblockPicture(ImageParameters *img, StorablePicture *p) + { + unsigned i; + + for (i=0; iPicSizeInMbs; i++) + { + DeblockMb( img, p, i ) ; + } + } + + + /*! + ***************************************************************************************** + * \brief + * Deblocking filter for one macroblock. + ***************************************************************************************** + */ + + void DeblockMb(ImageParameters *img, StorablePicture *p, int MbQAddr) + { + int EdgeCondition; + int dir,edge; + byte Strength[16]; + int mb_x, mb_y; + + int filterNon8x8LumaEdgesFlag[4] = {1,1,1,1}; + int filterLeftMbEdgeFlag; + int filterTopMbEdgeFlag; + int fieldModeMbFlag; + int mvlimit=4; + int i, StrengthSum; + Macroblock *MbQ; + imgpel **imgY = p->imgY; + imgpel ***imgUV = p->imgUV; + + int edge_cr; + + + img->DeblockCall = 1; + get_mb_pos (MbQAddr, &mb_x, &mb_y); + filterLeftMbEdgeFlag = (mb_x != 0); + filterTopMbEdgeFlag = (mb_y != 0); + + MbQ = &(img->mb_data[MbQAddr]) ; // current Mb + + if (MbQ->mb_type == I8MB) + assert(MbQ->luma_transform_size_8x8_flag); + + filterNon8x8LumaEdgesFlag[1] = + filterNon8x8LumaEdgesFlag[3] = !(MbQ->luma_transform_size_8x8_flag); + + if (p->MbaffFrameFlag && mb_y==16 && MbQ->mb_field) + filterTopMbEdgeFlag = 0; + + fieldModeMbFlag = (p->structure!=FRAME) || (p->MbaffFrameFlag && MbQ->mb_field); + if (fieldModeMbFlag) + mvlimit = 2; + + // return, if filter is disabled + if (MbQ->LFDisableIdc==1) { + img->DeblockCall = 0; + return; + } + + if (MbQ->LFDisableIdc==2) + { + // don't filter at slice boundaries + filterLeftMbEdgeFlag = MbQ->mbAvailA; + // if this the bottom of a frame macroblock pair then always filter the top edge + if (p->MbaffFrameFlag && !MbQ->mb_field && (MbQAddr % 2)) filterTopMbEdgeFlag = 1; + else filterTopMbEdgeFlag = MbQ->mbAvailB; + } + + img->current_mb_nr = MbQAddr; + CheckAvailabilityOfNeighbors(); + + for( dir=0 ; dir<2 ; dir++ ) // vertical edges, than horicontal edges + { + EdgeCondition = (dir && filterTopMbEdgeFlag) || (!dir && filterLeftMbEdgeFlag); // can not filter beyond picture boundaries + for( edge=0 ; edge<4 ; edge++ ) // first 4 vertical strips of 16 pel + { // then 4 horicontal + if( edge || EdgeCondition ) + { + edge_cr = chroma_edge[dir][edge][p->chroma_format_idc]; + + GetStrength(Strength,img,MbQAddr,dir,edge, mvlimit, p); // Strength for 4 blks in 1 stripe + StrengthSum = Strength[0]; + for (i = 1; i < 16; i++) StrengthSum += Strength[i]; + if( StrengthSum ) // only if one of the 16 Strength bytes is != 0 + { + if (filterNon8x8LumaEdgesFlag[edge]) + EdgeLoop( imgY, Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge, p->size_x, 0, 0, p) ; + if( (imgUV != NULL) && (edge_cr >= 0)) + { + EdgeLoop( imgUV[0], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge_cr, p->size_x_cr, 1, 0, p) ; + EdgeLoop( imgUV[1], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, edge_cr, p->size_x_cr, 1, 1, p) ; + } + } + + if (dir && !edge && !MbQ->mb_field && mixedModeEdgeFlag) { + // this is the extra horizontal edge between a frame macroblock pair and a field above it + img->DeblockCall = 2; + GetStrength(Strength,img,MbQAddr,dir,4, mvlimit, p); // Strength for 4 blks in 1 stripe + //if( *((int*)Strength) ) // only if one of the 4 Strength bytes is != 0 + { + if (filterNon8x8LumaEdgesFlag[edge]) + EdgeLoop( imgY, Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, p->size_x, 0, 0, p) ; + if( (imgUV != NULL) && (edge_cr >= 0)) + { + EdgeLoop( imgUV[0], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, p->size_x_cr, 1, 0, p) ; + EdgeLoop( imgUV[1], Strength, img, MbQAddr, MbQ->LFAlphaC0Offset, MbQ->LFBetaOffset, dir, 4, p->size_x_cr, 1, 1, p) ; + } + } + img->DeblockCall = 1; + } + + } + }//end edge + }//end loop dir + img->DeblockCall = 0; + + } + + /*! + ********************************************************************************************* + * \brief + * returns a buffer of 16 Strength values for one stripe in a mb (for different Frame types) + ********************************************************************************************* + */ + + int ININT_STRENGTH[4] = {0x04040404, 0x03030303, 0x03030303, 0x03030303} ; + byte BLK_NUM[2][4][4] = {{{0,4,8,12},{1,5,9,13},{2,6,10,14},{3,7,11,15}},{{0,1,2,3},{4,5,6,7},{8,9,10,11},{12,13,14,15}}} ; + byte BLK_4_TO_8[16] = {0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3} ; + #define ANY_INTRA (MbP->mb_type==I4MB||MbP->mb_type==I16MB||MbP->mb_type==IPCM||MbQ->mb_type==I4MB||MbQ->mb_type==I16MB||MbQ->mb_type==IPCM) + + void GetStrength(byte Strength[16],struct img_par *img,int MbQAddr,int dir,int edge, int mvlimit, StorablePicture *p) + { + int blkP, blkQ, idx; + int blk_x, blk_x2, blk_y, blk_y2 ; + short ***list0_mv = p->mv[LIST_0]; + short ***list1_mv = p->mv[LIST_1]; + char **list0_refIdxArr = p->ref_idx[LIST_0]; + char **list1_refIdxArr = p->ref_idx[LIST_1]; + int64 **list0_refPicIdArr = p->ref_pic_id[LIST_0]; + int64 **list1_refPicIdArr = p->ref_pic_id[LIST_1]; + int xQ, xP, yQ, yP; + int mb_x, mb_y; + Macroblock *MbQ; + Macroblock *MbP; + PixelPos pixP; + + MbQ = &(img->mb_data[MbQAddr]); + + for( idx=0 ; idx<16 ; idx++ ) + { + xQ = dir ? idx : edge << 2; + yQ = dir ? (edge < 4 ? edge << 2 : 1) : idx; + getNeighbour(MbQAddr, xQ - (1 - dir), yQ - dir, 1, &pixP); + xP = pixP.x; + yP = pixP.y; + MbP = &(img->mb_data[pixP.mb_addr]); + mixedModeEdgeFlag = MbQ->mb_field != MbP->mb_field; + + blkQ = ((yQ>>2)<<2) + (xQ>>2); + blkP = ((yP>>2)<<2) + (xP>>2); + + if ((p->slice_type==SP_SLICE)||(p->slice_type==SI_SLICE) ) + { + Strength[idx] = (edge == 0 && (((!p->MbaffFrameFlag && (p->structure==FRAME)) || + (p->MbaffFrameFlag && !MbP->mb_field && !MbQ->mb_field)) || + ((p->MbaffFrameFlag || (p->structure != FRAME)) && !dir))) ? 4 : 3; + } + else + { + // Start with Strength=3. or Strength=4 for Mb-edge + Strength[idx] = (edge == 0 && (((!p->MbaffFrameFlag && (p->structure==FRAME)) || + (p->MbaffFrameFlag && !MbP->mb_field && !MbQ->mb_field)) || + ((p->MbaffFrameFlag || (p->structure!=FRAME)) && !dir))) ? 4 : 3; + + if( !(MbP->mb_type==I4MB || MbP->mb_type==I16MB || MbP->mb_type==I8MB || MbP->mb_type==IPCM) + && !(MbQ->mb_type==I4MB || MbQ->mb_type==I16MB || MbQ->mb_type==I8MB || MbQ->mb_type==IPCM) ) + { + if( ((MbQ->cbp_blk & (1 << blkQ )) != 0) || ((MbP->cbp_blk & (1 << blkP)) != 0) ) + Strength[idx] = 2 ; + else + { // if no coefs, but vector difference >= 1 set Strength=1 + // if this is a mixed mode edge then one set of reference pictures will be frame and the + // other will be field + if (mixedModeEdgeFlag) + { + (Strength[idx] = 1); + } + else + { + get_mb_block_pos (MbQAddr, &mb_x, &mb_y); + blk_y = (mb_y<<2) + (blkQ >> 2) ; + blk_x = (mb_x<<2) + (blkQ & 3) ; + blk_y2 = pixP.pos_y >> 2; + blk_x2 = pixP.pos_x >> 2; + { + int64 ref_p0,ref_p1,ref_q0,ref_q1; + ref_p0 = list0_refIdxArr[blk_y] [blk_x] <0 ? INT64_MIN : list0_refPicIdArr[blk_y] [blk_x]; + ref_q0 = list0_refIdxArr[blk_y2][blk_x2]<0 ? INT64_MIN : list0_refPicIdArr[blk_y2][blk_x2]; + ref_p1 = list1_refIdxArr[blk_y] [blk_x] <0 ? INT64_MIN : list1_refPicIdArr[blk_y] [blk_x]; + ref_q1 = list1_refIdxArr[blk_y2][blk_x2]<0 ? INT64_MIN : list1_refPicIdArr[blk_y2][blk_x2]; + if ( ((ref_p0==ref_q0) && (ref_p1==ref_q1)) || + ((ref_p0==ref_q1) && (ref_p1==ref_q0))) + { + Strength[idx]=0; + // L0 and L1 reference pictures of p0 are different; q0 as well + if (ref_p0 != ref_p1) + { + // compare MV for the same reference picture + if (ref_p0==ref_q0) + { + Strength[idx] = (abs( list0_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list0_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit) | + (abs( list1_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list1_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit); + } + else + { + Strength[idx] = (abs( list0_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list0_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit) | + (abs( list1_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list1_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit); + } + } + else + { // L0 and L1 reference pictures of p0 are the same; q0 as well + + Strength[idx] = ((abs( list0_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list0_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit ) | + (abs( list1_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list1_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit)) + && + ((abs( list0_mv[blk_y][blk_x][0] - list1_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list0_mv[blk_y][blk_x][1] - list1_mv[blk_y2][blk_x2][1]) >= mvlimit) | + (abs( list1_mv[blk_y][blk_x][0] - list0_mv[blk_y2][blk_x2][0]) >= 4) | + (abs( list1_mv[blk_y][blk_x][1] - list0_mv[blk_y2][blk_x2][1]) >= mvlimit)); + } + } + else + { + Strength[idx] = 1; + } + } + } + } + } + } + } + } + + #define CQPOF(qp, uv) (Clip3(0, 51, qp + p->chroma_qp_offset[uv])) + + /*! + ***************************************************************************************** + * \brief + * Filters one edge of 16 (luma) or 8 (chroma) pel + ***************************************************************************************** + */ + void EdgeLoop(imgpel** Img, byte Strength[16],struct img_par *img, int MbQAddr, int AlphaC0Offset, int BetaOffset, + int dir, int edge, int width, int yuv, int uv, StorablePicture *p) + { + int pel, ap = 0, aq = 0, Strng ; + int incP, incQ; + int C0, c0, Delta, dif, AbsDelta ; + int L2 = 0, L1, L0, R0, R1, R2 = 0, RL0, L3, R3 ; + int Alpha = 0, Beta = 0 ; + byte* ClipTab = NULL; + int small_gap; + int indexA, indexB; + int PelNum; + int StrengthIdx; + imgpel *SrcPtrP, *SrcPtrQ; + int QP; + int xP, xQ, yP, yQ; + Macroblock *MbQ, *MbP; + PixelPos pixP, pixQ; + int bitdepth_scale; + int pelnum_cr[2][4] = {{0,8,16,16}, {0,8, 8,16}}; //[dir:0=vert, 1=hor.][yuv_format] + + if (!yuv) + bitdepth_scale = 1<<(img->bitdepth_luma - 8); + else + bitdepth_scale = 1<<(img->bitdepth_chroma - 8); + + PelNum = yuv ? pelnum_cr[dir][p->chroma_format_idc] : 16 ; + + for( pel=0 ; pelmb_data[MbQAddr]); + MbP = &(img->mb_data[pixP.mb_addr]); + fieldModeFilteringFlag = MbQ->mb_field || MbP->mb_field; + StrengthIdx = (yuv&&(PelNum==8)) ? ((MbQ->mb_field && !MbP->mb_field) ? pel<<1 :((pel>>1)<<2)+(pel%2)) : pel ; + + if (pixP.available || (MbQ->LFDisableIdc== 0)) + { + incQ = dir ? ((fieldModeFilteringFlag && !MbQ->mb_field) ? 2 * width : width) : 1; + incP = dir ? ((fieldModeFilteringFlag && !MbP->mb_field) ? 2 * width : width) : 1; + SrcPtrQ = &(Img[pixQ.pos_y][pixQ.pos_x]); + SrcPtrP = &(Img[pixP.pos_y][pixP.pos_x]); + + // Average QP of the two blocks + QP = yuv ? (QP_SCALE_CR[CQPOF(MbP->qp,uv)] + QP_SCALE_CR[CQPOF(MbQ->qp,uv)] + 1) >> 1 : (MbP->qp + MbQ->qp + 1) >> 1; + + indexA = IClip(0, MAX_QP, QP + AlphaC0Offset); + indexB = IClip(0, MAX_QP, QP + BetaOffset); + + Alpha =ALPHA_TABLE[indexA] * bitdepth_scale; + Beta =BETA_TABLE[indexB] * bitdepth_scale; + ClipTab=CLIP_TAB[indexA]; + + L0 = SrcPtrP[0] ; + R0 = SrcPtrQ[0] ; + L1 = SrcPtrP[-incP] ; + R1 = SrcPtrQ[ incQ] ; + L2 = SrcPtrP[-incP*2] ; + R2 = SrcPtrQ[ incQ*2] ; + L3 = SrcPtrP[-incP*3] ; + R3 = SrcPtrQ[ incQ*3] ; + + if( (Strng = Strength[StrengthIdx]) ) + { + AbsDelta = abs( Delta = R0 - L0 ) ; + + if( AbsDelta < Alpha ) + { + C0 = ClipTab[ Strng ] * bitdepth_scale; + if( ((abs( R0 - R1) - Beta ) & (abs(L0 - L1) - Beta )) < 0 ) + { + if( !yuv) + { + aq = (abs( R0 - R2) - Beta ) < 0 ; + ap = (abs( L0 - L2) - Beta ) < 0 ; + } + + RL0 = L0 + R0 ; + + if(Strng == 4 ) // INTRA strong filtering + { + if( yuv) // Chroma + { + SrcPtrQ[0] = ((R1 << 1) + R0 + L1 + 2) >> 2; + SrcPtrP[0] = ((L1 << 1) + L0 + R1 + 2) >> 2; + } + else // Luma + { + small_gap = (AbsDelta < ((Alpha >> 2) + 2)); + + aq &= small_gap; + ap &= small_gap; + + SrcPtrQ[0] = aq ? ( L1 + ((R1 + RL0) << 1) + R2 + 4) >> 3 : ((R1 << 1) + R0 + L1 + 2) >> 2 ; + SrcPtrP[0] = ap ? ( R1 + ((L1 + RL0) << 1) + L2 + 4) >> 3 : ((L1 << 1) + L0 + R1 + 2) >> 2 ; + + SrcPtrQ[ incQ] = aq ? ( R2 + R0 + R1 + L0 + 2) >> 2 : R1; + SrcPtrP[-incP] = ap ? ( L2 + L1 + L0 + R0 + 2) >> 2 : L1; + + SrcPtrQ[ incQ*2] = aq ? (((R3 + R2) <<1) + R2 + R1 + RL0 + 4) >> 3 : R2; + SrcPtrP[-incP*2] = ap ? (((L3 + L2) <<1) + L2 + L1 + RL0 + 4) >> 3 : L2; + } + } + else // normal filtering + { + c0 = yuv? (C0+1):(C0 + ap + aq) ; + dif = IClip( -c0, c0, ( (Delta << 2) + (L1 - R1) + 4) >> 3 ) ; + if(!yuv) + { + SrcPtrP[0] = IClip(0, img->max_imgpel_value, L0 + dif) ; + SrcPtrQ[0] = IClip(0, img->max_imgpel_value, R0 - dif) ; + } + else + { + SrcPtrP[0] = IClip(0, img->max_imgpel_value_uv, L0 + dif) ; + SrcPtrQ[0] = IClip(0, img->max_imgpel_value_uv, R0 - dif) ; + } + + if( !yuv ) + { + if( ap ) + SrcPtrP[-incP] += IClip( -C0, C0, ( L2 + ((RL0 + 1) >> 1) - (L1<<1)) >> 1 ) ; + if( aq ) + SrcPtrQ[ incQ] += IClip( -C0, C0, ( R2 + ((RL0 + 1) >> 1) - (R1<<1)) >> 1 ) ; + } ; + } ; + } ; + } ; + } ; + } ; + } + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/loopfilter.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/loopfilter.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/loopfilter.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,18 ---- + /*! + ************************************************************************ + * \file + * loopfilter.h + * \brief + * external loop filter interface + ************************************************************************ + */ + + #ifndef _LOOPFILTER_H_ + #define _LOOPFILTER_H_ + + #include "global.h" + #include "mbuffer.h" + + void DeblockPicture(struct img_par *img, StorablePicture *p) ; + + #endif //_LOOPFILTER_H_ Index: llvm-test/MultiSource/Applications/JM/ldecod/macroblock.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/macroblock.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/macroblock.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,5118 ---- + + /*! + *********************************************************************** + * \file macroblock.c + * + * \brief + * Decode a Macroblock + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Lang?y + * - Rickard Sjoberg + * - Jani Lainema + * - Sebastian Purreiter + * - Thomas Wedi + * - Detlev Marpe + * - Gabi Blaettermann + * - Ye-Kui Wang + * - Lowell Winger + *********************************************************************** + */ + + #include "contributors.h" + + #include + #include + #include + #include + + #include "global.h" + #include "mbuffer.h" + #include "elements.h" + #include "errorconcealment.h" + #include "macroblock.h" + #include "fmo.h" + #include "cabac.h" + #include "vlc.h" + #include "image.h" + #include "mb_access.h" + #include "biaridecod.h" + + #include "transform8x8.h" + + #if TRACE + #define TRACE_STRING(s) strncpy(currSE.tracestring, s, TRACESTRING_SIZE) + #else + #define TRACE_STRING(s) // do nothing + #endif + + extern int last_dquant; + extern ColocatedParams *Co_located; + + + static void SetMotionVectorPredictor (struct img_par *img, + short *pmv_x, + short *pmv_y, + char ref_frame, + byte list, + char ***refPic, + short ****tmp_mv, + int block_x, + int block_y, + int blockshape_x, + int blockshape_y); + + + /*! + ************************************************************************ + * \brief + * initializes the current macroblock + ************************************************************************ + */ + void start_macroblock(struct img_par *img,struct inp_par *inp, int CurrentMBInScanOrder) + { + int i,j,k,l; + Macroblock *currMB; // intialization code deleted, see below, StW + + assert (img->current_mb_nr < img->PicSizeInMbs); + + currMB = &img->mb_data[img->current_mb_nr]; + + /* Update coordinates of the current macroblock */ + if (img->MbaffFrameFlag) + { + img->mb_x = (img->current_mb_nr)%((2*img->width)/MB_BLOCK_SIZE); + img->mb_y = 2*((img->current_mb_nr)/((2*img->width)/MB_BLOCK_SIZE)); + + if (img->mb_x % 2) + { + img->mb_y++; + } + + img->mb_x /= 2; + } + else + { + img->mb_x = (img->current_mb_nr)%(img->width/MB_BLOCK_SIZE); + img->mb_y = (img->current_mb_nr)/(img->width/MB_BLOCK_SIZE); + } + + /* Define vertical positions */ + img->block_y = img->mb_y * BLOCK_SIZE; /* luma block position */ + img->pix_y = img->mb_y * MB_BLOCK_SIZE; /* luma macroblock position */ + img->pix_c_y = img->mb_y * img->mb_cr_size_y; /* chroma macroblock position */ + + /* Define horizontal positions */ + img->block_x = img->mb_x * BLOCK_SIZE; /* luma block position */ + img->pix_x = img->mb_x * MB_BLOCK_SIZE; /* luma pixel position */ + + img->pix_c_x = img->mb_x * img->mb_cr_size_x; /* chroma pixel position */ + + // Save the slice number of this macroblock. When the macroblock below + // is coded it will use this to decide if prediction for above is possible + currMB->slice_nr = img->current_slice_nr; + + if (img->current_slice_nr >= MAX_NUM_SLICES) + { + error ("maximum number of supported slices exceeded, please recompile with increased value for MAX_NUM_SLICES", 200); + } + + dec_picture->slice_id[img->mb_y][img->mb_x] = img->current_slice_nr; + if (img->current_slice_nr > dec_picture->max_slice_id) + { + dec_picture->max_slice_id=img->current_slice_nr; + } + + CheckAvailabilityOfNeighbors(img); + + // Reset syntax element entries in MB struct + currMB->qp = img->qp ; + currMB->mb_type = 0; + currMB->delta_quant = 0; + currMB->cbp = 0; + currMB->cbp_blk = 0; + currMB->c_ipred_mode= DC_PRED_8; //GB + + for (l=0; l < 2; l++) + for (j=0; j < BLOCK_MULTIPLE; j++) + for (i=0; i < BLOCK_MULTIPLE; i++) + for (k=0; k < 2; k++) + currMB->mvd[l][j][i][k] = 0; + + currMB->cbp_bits = 0; + + // initialize img->m7 for ABT + for (j=0; jm7[i][j] = 0; + + // store filtering parameters for this MB + currMB->LFDisableIdc = img->currentSlice->LFDisableIdc; + currMB->LFAlphaC0Offset = img->currentSlice->LFAlphaC0Offset; + currMB->LFBetaOffset = img->currentSlice->LFBetaOffset; + + } + + /*! + ************************************************************************ + * \brief + * set coordinates of the next macroblock + * check end_of_slice condition + ************************************************************************ + */ + int exit_macroblock(struct img_par *img,struct inp_par *inp,int eos_bit) + { + //! The if() statement below resembles the original code, which tested + //! img->current_mb_nr == img->PicSizeInMbs. Both is, of course, nonsense + //! In an error prone environment, one can only be sure to have a new + //! picture by checking the tr of the next slice header! + + // printf ("exit_macroblock: FmoGetLastMBOfPicture %d, img->current_mb_nr %d\n", FmoGetLastMBOfPicture(), img->current_mb_nr); + img->num_dec_mb++; + + if (img->num_dec_mb == img->PicSizeInMbs) + // if (img->current_mb_nr == FmoGetLastMBOfPicture(currSlice->structure)) + { + //thb + /* + if (currSlice->next_header != EOS) + currSlice->next_header = SOP; + */ + //the + assert (nal_startcode_follows (img, inp, eos_bit) == TRUE); + return TRUE; + } + // ask for last mb in the slice UVLC + else + { + // printf ("exit_macroblock: Slice %d old MB %d, now using MB %d\n", img->current_slice_nr, img->current_mb_nr, FmoGetNextMBNr (img->current_mb_nr)); + + img->current_mb_nr = FmoGetNextMBNr (img->current_mb_nr); + + if (img->current_mb_nr == -1) // End of Slice group, MUST be end of slice + { + assert (nal_startcode_follows (img, inp, eos_bit) == TRUE); + return TRUE; + } + + if(nal_startcode_follows(img, inp, eos_bit) == FALSE) + return FALSE; + + if(img->type == I_SLICE || img->type == SI_SLICE || active_pps->entropy_coding_mode_flag == CABAC) + return TRUE; + if(img->cod_counter<=0) + return TRUE; + return FALSE; + } + } + + /*! + ************************************************************************ + * \brief + * Interpret the mb mode for P-Frames + ************************************************************************ + */ + void interpret_mb_mode_P(struct img_par *img) + { + int i; + const int ICBPTAB[6] = {0,16,32,15,31,47}; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int mbmode = currMB->mb_type; + + #define ZERO_P8x8 (mbmode==5) + #define MODE_IS_P8x8 (mbmode==4 || mbmode==5) + #define MODE_IS_I4x4 (mbmode==6) + #define I16OFFSET (mbmode-7) + #define MODE_IS_IPCM (mbmode==31) + + if(mbmode <4) + { + currMB->mb_type = mbmode; + for (i=0;i<4;i++) + { + currMB->b8mode[i] = mbmode; + currMB->b8pdir[i] = 0; + } + } + else if(MODE_IS_P8x8) + { + currMB->mb_type = P8x8; + img->allrefzero = ZERO_P8x8; + } + else if(MODE_IS_I4x4) + { + currMB->mb_type = I4MB; + for (i=0;i<4;i++) + { + currMB->b8mode[i] = IBLOCK; + currMB->b8pdir[i] = -1; + } + } + else if(MODE_IS_IPCM) + { + currMB->mb_type=IPCM; + + for (i=0;i<4;i++) + { + currMB->b8mode[i]=0; currMB->b8pdir[i]=-1; + } + currMB->cbp= -1; + currMB->i16mode = 0; + } + else + { + currMB->mb_type = I16MB; + for (i=0;i<4;i++) {currMB->b8mode[i]=0; currMB->b8pdir[i]=-1; } + currMB->cbp= ICBPTAB[(I16OFFSET)>>2]; + currMB->i16mode = (I16OFFSET) & 0x03; + } + } + + /*! + ************************************************************************ + * \brief + * Interpret the mb mode for I-Frames + ************************************************************************ + */ + void interpret_mb_mode_I(struct img_par *img) + { + int i; + const int ICBPTAB[6] = {0,16,32,15,31,47}; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int mbmode = currMB->mb_type; + + if (mbmode==0) + { + currMB->mb_type = I4MB; + for (i=0;i<4;i++) {currMB->b8mode[i]=IBLOCK; currMB->b8pdir[i]=-1; } + } + else if(mbmode==25) + { + currMB->mb_type=IPCM; + + for (i=0;i<4;i++) {currMB->b8mode[i]=0; currMB->b8pdir[i]=-1; } + currMB->cbp= -1; + currMB->i16mode = 0; + + } + else + { + currMB->mb_type = I16MB; + for (i=0;i<4;i++) {currMB->b8mode[i]=0; currMB->b8pdir[i]=-1; } + currMB->cbp= ICBPTAB[(mbmode-1)>>2]; + currMB->i16mode = (mbmode-1) & 0x03; + } + } + + /*! + ************************************************************************ + * \brief + * Interpret the mb mode for B-Frames + ************************************************************************ + */ + void interpret_mb_mode_B(struct img_par *img) + { + static const int offset2pdir16x16[12] = {0, 0, 1, 2, 0,0,0,0,0,0,0,0}; + static const int offset2pdir16x8[22][2] = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,1},{0,0},{0,1},{0,0},{1,0}, + {0,0},{0,2},{0,0},{1,2},{0,0},{2,0},{0,0},{2,1},{0,0},{2,2},{0,0}}; + static const int offset2pdir8x16[22][2] = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,1},{0,0},{0,1},{0,0}, + {1,0},{0,0},{0,2},{0,0},{1,2},{0,0},{2,0},{0,0},{2,1},{0,0},{2,2}}; + + const int ICBPTAB[6] = {0,16,32,15,31,47}; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + int i, mbmode; + int mbtype = currMB->mb_type; + int *b8mode = currMB->b8mode; + int *b8pdir = currMB->b8pdir; + + //--- set mbtype, b8type, and b8pdir --- + if (mbtype==0) // direct + { + mbmode=0; for(i=0;i<4;i++) {b8mode[i]=0; b8pdir[i]=2; } + } + else if (mbtype==23) // intra4x4 + { + mbmode=I4MB; for(i=0;i<4;i++) {b8mode[i]=IBLOCK; b8pdir[i]=-1; } + } + else if ((mbtype>23) && (mbtype<48) ) // intra16x16 + { + mbmode=I16MB; for(i=0;i<4;i++) {b8mode[i]=0; b8pdir[i]=-1; } + currMB->cbp = ICBPTAB[(mbtype-24)>>2]; + currMB->i16mode = (mbtype-24) & 0x03; + } + else if (mbtype==22) // 8x8(+split) + { + mbmode=P8x8; // b8mode and pdir is transmitted in additional codewords + } + else if (mbtype<4) // 16x16 + { + mbmode=1; for(i=0;i<4;i++) {b8mode[i]=1; b8pdir[i]=offset2pdir16x16[mbtype]; } + } + else if(mbtype==48) + { + mbmode=IPCM; + for (i=0;i<4;i++) {currMB->b8mode[i]=0; currMB->b8pdir[i]=-1; } + currMB->cbp= -1; + currMB->i16mode = 0; + } + + else if (mbtype%2==0) // 16x8 + { + mbmode=2; for(i=0;i<4;i++) {b8mode[i]=2; b8pdir[i]=offset2pdir16x8 [mbtype][i/2]; } + } + else + { + mbmode=3; for(i=0;i<4;i++) {b8mode[i]=3; b8pdir[i]=offset2pdir8x16 [mbtype][i%2]; } + } + currMB->mb_type = mbmode; + } + /*! + ************************************************************************ + * \brief + * Interpret the mb mode for SI-Frames + ************************************************************************ + */ + void interpret_mb_mode_SI(struct img_par *img) + { + int i; + const int ICBPTAB[6] = {0,16,32,15,31,47}; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int mbmode = currMB->mb_type; + + if (mbmode==0) + { + currMB->mb_type = SI4MB; + for (i=0;i<4;i++) {currMB->b8mode[i]=IBLOCK; currMB->b8pdir[i]=-1; } + img->siblock[img->mb_x][img->mb_y]=1; + } + else if (mbmode==1) + { + currMB->mb_type = I4MB; + for (i=0;i<4;i++) {currMB->b8mode[i]=IBLOCK; currMB->b8pdir[i]=-1; } + } + else if(mbmode==26) + { + currMB->mb_type=IPCM; + + for (i=0;i<4;i++) {currMB->b8mode[i]=0; currMB->b8pdir[i]=-1; } + currMB->cbp= -1; + currMB->i16mode = 0; + + } + + else + { + currMB->mb_type = I16MB; + for (i=0;i<4;i++) {currMB->b8mode[i]=0; currMB->b8pdir[i]=-1; } + currMB->cbp= ICBPTAB[(mbmode-1)>>2]; + currMB->i16mode = (mbmode-2) & 0x03; + } + } + /*! + ************************************************************************ + * \brief + * init macroblock I and P frames + ************************************************************************ + */ + void init_macroblock(struct img_par *img) + { + int i,j; + + for (i=0;imv[LIST_0][img->block_y+j][img->block_x+i][0]=0; + dec_picture->mv[LIST_0][img->block_y+j][img->block_x+i][1]=0; + dec_picture->mv[LIST_1][img->block_y+j][img->block_x+i][0]=0; + dec_picture->mv[LIST_1][img->block_y+j][img->block_x+i][1]=0; + + img->ipredmode[img->block_x+i][img->block_y+j] = DC_PRED; + } + } + + for (j=0; jref_idx[LIST_0][img->block_y+j][img->block_x+i] = -1; + dec_picture->ref_idx[LIST_1][img->block_y+j][img->block_x+i] = -1; + dec_picture->ref_pic_id[LIST_0][img->block_y+j][img->block_x+i] = INT64_MIN; + dec_picture->ref_pic_id[LIST_1][img->block_y+j][img->block_x+i] = INT64_MIN; + } + } + + + /*! + ************************************************************************ + * \brief + * Sets mode for 8x8 block + ************************************************************************ + */ + void SetB8Mode (struct img_par* img, Macroblock* currMB, int value, int i) + { + static const int p_v2b8 [ 5] = {4, 5, 6, 7, IBLOCK}; + static const int p_v2pd [ 5] = {0, 0, 0, 0, -1}; + static const int b_v2b8 [14] = {0, 4, 4, 4, 5, 6, 5, 6, 5, 6, 7, 7, 7, IBLOCK}; + static const int b_v2pd [14] = {2, 0, 1, 2, 0, 0, 1, 1, 2, 2, 0, 1, 2, -1}; + + if (img->type==B_SLICE) + { + currMB->b8mode[i] = b_v2b8[value]; + currMB->b8pdir[i] = b_v2pd[value]; + + } + else + { + currMB->b8mode[i] = p_v2b8[value]; + currMB->b8pdir[i] = p_v2pd[value]; + } + + } + + + void reset_coeffs() + { + int i, j, iii, jjj; + + // reset luma coeffs + for (i=0;icof[i][j][iii][jjj]=0; + } + + // reset chroma coeffs + for (j=4;j<(4+img->num_blk8x8_uv);j++) + { + for (i=0;i<4;i++) + for (iii=0;iii<4;iii++) + for (jjj=0;jjj<4;jjj++) + img->cof[i][j][iii][jjj]=0; + } + + // CAVLC + for (i=0; i < 4; i++) + for (j=0; j < (4 + img->num_blk8x8_uv); j++) + img->nz_coeff[img->current_mb_nr][i][j]=0; + + } + + void field_flag_inference() + { + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + if (currMB->mbAvailA) + { + currMB->mb_field = img->mb_data[currMB->mbAddrA].mb_field; + } + else + { + // check top macroblock pair + if (currMB->mbAvailB) + { + currMB->mb_field = img->mb_data[currMB->mbAddrB].mb_field; + } + else + currMB->mb_field = 0; + } + + } + + /*! + ************************************************************************ + * \brief + * Get the syntax elements from the NAL + ************************************************************************ + */ + int read_one_macroblock(struct img_par *img,struct inp_par *inp) + { + int i; + + SyntaxElement currSE; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + Slice *currSlice = img->currentSlice; + DataPartition *dP; + int *partMap = assignSE2partition[currSlice->dp_mode]; + Macroblock *topMB = NULL; + int prevMbSkipped = 0; + int img_block_y; + int check_bottom, read_bottom, read_top; + + if (img->MbaffFrameFlag) + { + if (img->current_mb_nr%2) + { + topMB= &img->mb_data[img->current_mb_nr-1]; + if(!(img->type == B_SLICE)) + prevMbSkipped = (topMB->mb_type == 0); + else + prevMbSkipped = topMB->skip_flag; + } + else + prevMbSkipped = 0; + } + + if (img->current_mb_nr%2 == 0) + currMB->mb_field = 0; + else + currMB->mb_field = img->mb_data[img->current_mb_nr-1].mb_field; + + + currMB->qp = img->qp ; + + currSE.type = SE_MBTYPE; + + // read MB mode ***************************************************************** + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_ue; + + if(img->type == I_SLICE || img->type == SI_SLICE) + { + // read MB aff + if (img->MbaffFrameFlag && img->current_mb_nr%2==0) + { + TRACE_STRING("mb_field_decoding_flag"); + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + { + currSE.len = 1; + readSyntaxElement_FLC(&currSE, dP->bitstream); + } + else + { + currSE.reading = readFieldModeInfo_CABAC; + dP->readSyntaxElement(&currSE,img,inp,dP); + } + currMB->mb_field = currSE.value1; + } + if(active_pps->entropy_coding_mode_flag == CABAC) + CheckAvailabilityOfNeighborsCABAC(); + + // read MB type + TRACE_STRING("mb_type"); + currSE.reading = readMB_typeInfo_CABAC; + dP->readSyntaxElement(&currSE,img,inp,dP); + + currMB->mb_type = currSE.value1; + if(!dP->bitstream->ei_flag) + currMB->ei_flag = 0; + } + // non I/SI-slice CABAC + else if (active_pps->entropy_coding_mode_flag == CABAC) + { + // read MB skip_flag + if (img->MbaffFrameFlag && (img->current_mb_nr%2 == 0||prevMbSkipped)) + field_flag_inference(); + + CheckAvailabilityOfNeighborsCABAC(); + TRACE_STRING("mb_skip_flag"); + currSE.reading = readMB_skip_flagInfo_CABAC; + dP->readSyntaxElement(&currSE,img,inp,dP); + + currMB->mb_type = currSE.value1; + currMB->skip_flag = !(currSE.value1); + + if (img->type==B_SLICE) + currMB->cbp = currSE.value2; + + if(!dP->bitstream->ei_flag) + currMB->ei_flag = 0; + + if ((img->type==B_SLICE) && currSE.value1==0 && currSE.value2==0) + img->cod_counter=0; + + // read MB AFF + if (img->MbaffFrameFlag) + { + check_bottom=read_bottom=read_top=0; + if (img->current_mb_nr%2==0) + { + check_bottom = currMB->skip_flag; + read_top = !check_bottom; + } + else + { + read_bottom = (topMB->skip_flag && (!currMB->skip_flag)); + } + + if (read_bottom || read_top) + { + TRACE_STRING("mb_field_decoding_flag"); + currSE.reading = readFieldModeInfo_CABAC; + dP->readSyntaxElement(&currSE,img,inp,dP); + currMB->mb_field = currSE.value1; + } + if (check_bottom) + check_next_mb_and_get_field_mode_CABAC(&currSE,img,inp,dP); + + } + + CheckAvailabilityOfNeighborsCABAC(); + + // read MB type + if (currMB->mb_type != 0 ) + { + currSE.reading = readMB_typeInfo_CABAC; + TRACE_STRING("mb_type"); + dP->readSyntaxElement(&currSE,img,inp,dP); + currMB->mb_type = currSE.value1; + if(!dP->bitstream->ei_flag) + currMB->ei_flag = 0; + } + } + // VLC Non-Intra + else + { + if(img->cod_counter == -1) + { + TRACE_STRING("mb_skip_run"); + dP->readSyntaxElement(&currSE,img,inp,dP); + img->cod_counter = currSE.value1; + } + if (img->cod_counter==0) + { + // read MB aff + if ((img->MbaffFrameFlag) && ((img->current_mb_nr%2==0) || ((img->current_mb_nr%2) && prevMbSkipped))) + { + TRACE_STRING("mb_field_decoding_flag"); + currSE.len = 1; + readSyntaxElement_FLC(&currSE, dP->bitstream); + currMB->mb_field = currSE.value1; + } + + // read MB type + TRACE_STRING("mb_type"); + dP->readSyntaxElement(&currSE,img,inp,dP); + if(img->type == P_SLICE || img->type == SP_SLICE) + currSE.value1++; + currMB->mb_type = currSE.value1; + if(!dP->bitstream->ei_flag) + currMB->ei_flag = 0; + img->cod_counter--; + currMB->skip_flag = 0; + } + else + { + img->cod_counter--; + currMB->mb_type = 0; + currMB->ei_flag = 0; + currMB->skip_flag = 1; + + // read field flag of bottom block + if(img->MbaffFrameFlag) + { + if(img->cod_counter == 0 && (img->current_mb_nr%2 == 0)) + { + TRACE_STRING("mb_field_decoding_flag (of coded bottom mb)"); + currSE.len = 1; + readSyntaxElement_FLC(&currSE, dP->bitstream); + dP->bitstream->frame_bitoffset--; + currMB->mb_field = currSE.value1; + } + else if(img->cod_counter > 0 && (img->current_mb_nr%2 == 0)) + { + // check left macroblock pair first + if (mb_is_available(img->current_mb_nr-2, img->current_mb_nr)&&((img->current_mb_nr%(img->PicWidthInMbs*2))!=0)) + { + currMB->mb_field = img->mb_data[img->current_mb_nr-2].mb_field; + } + else + { + // check top macroblock pair + if (mb_is_available(img->current_mb_nr-2*img->PicWidthInMbs, img->current_mb_nr)) + { + currMB->mb_field = img->mb_data[img->current_mb_nr-2*img->PicWidthInMbs].mb_field; + } + else + currMB->mb_field = 0; + } + } + } + } + } + + dec_picture->mb_field[img->current_mb_nr] = currMB->mb_field; + + img->siblock[img->mb_x][img->mb_y]=0; + + if ((img->type==P_SLICE )) // inter frame + interpret_mb_mode_P(img); + else if (img->type==I_SLICE) // intra frame + interpret_mb_mode_I(img); + else if ((img->type==B_SLICE)) // B frame + interpret_mb_mode_B(img); + else if ((img->type==SP_SLICE)) // SP frame + interpret_mb_mode_P(img); + else if (img->type==SI_SLICE) // SI frame + interpret_mb_mode_SI(img); + + if(img->MbaffFrameFlag) + { + if(currMB->mb_field) + { + img->num_ref_idx_l0_active <<=1; + img->num_ref_idx_l1_active <<=1; + } + } + + //init NoMbPartLessThan8x8Flag + currMB->NoMbPartLessThan8x8Flag = (IS_DIRECT(currMB) && !(active_sps->direct_8x8_inference_flag))? 0: 1; + + //====== READ 8x8 SUB-PARTITION MODES (modes of 8x8 blocks) and Intra VBST block modes ====== + if (IS_P8x8 (currMB)) + { + currSE.type = SE_MBTYPE; + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + + for (i=0; i<4; i++) + { + if (active_pps->entropy_coding_mode_flag ==UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_ue; + else currSE.reading = readB8_typeInfo_CABAC; + + TRACE_STRING("sub_mb_type"); + dP->readSyntaxElement (&currSE, img, inp, dP); + SetB8Mode (img, currMB, currSE.value1, i); + + //set NoMbPartLessThan8x8Flag for P8x8 mode + currMB->NoMbPartLessThan8x8Flag &= (currMB->b8mode[i]==0 && active_sps->direct_8x8_inference_flag) || + (currMB->b8mode[i]==4); + } + //--- init macroblock data --- + init_macroblock (img); + readMotionInfoFromNAL (img, inp); + } + + + //============= Transform Size Flag for INTRA MBs ============= + //------------------------------------------------------------- + //transform size flag for INTRA_4x4 and INTRA_8x8 modes + if (currMB->mb_type == I4MB && img->Transform8x8Mode) + { + currSE.type = SE_HEADER; + dP = &(currSlice->partArr[partMap[SE_HEADER]]); + currSE.reading = readMB_transform_size_flag_CABAC; + TRACE_STRING("transform size 8x8 flag"); + + // read UVLC transform_size_8x8_flag + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + { + currSE.len = 1; + readSyntaxElement_FLC(&currSE, dP->bitstream); + } + else + { + dP->readSyntaxElement(&currSE,img,inp,dP); + } + + currMB->luma_transform_size_8x8_flag = currSE.value1; + + if (currMB->luma_transform_size_8x8_flag) + { + currMB->mb_type = I8MB; + for (i=0;i<4;i++) {currMB->b8mode[i]=I8MB; currMB->b8pdir[i]=-1; } + } + } + else + { + currMB->luma_transform_size_8x8_flag = 0; + } + + if(active_pps->constrained_intra_pred_flag && (img->type==P_SLICE|| img->type==B_SLICE)) // inter frame + { + if( !IS_INTRA(currMB) ) + { + img->intra_block[img->current_mb_nr] = 0; + } + } + + //! TO for Error Concelament + //! If we have an INTRA Macroblock and we lost the partition + //! which contains the intra coefficients Copy MB would be better + //! than just a grey block. + //! Seems to be a bit at the wrong place to do this right here, but for this case + //! up to now there is no other way. + dP = &(currSlice->partArr[partMap[SE_CBP_INTRA]]); + if(IS_INTRA (currMB) && dP->bitstream->ei_flag && img->number) + { + currMB->mb_type = 0; + currMB->ei_flag = 1; + for (i=0;i<4;i++) {currMB->b8mode[i]=currMB->b8pdir[i]=0; } + } + dP = &(currSlice->partArr[partMap[currSE.type]]); + //! End TO + + + //--- init macroblock data --- + if (!IS_P8x8 (currMB)) + init_macroblock (img); + + if (IS_DIRECT (currMB) && img->cod_counter >= 0) + { + currMB->cbp = 0; + reset_coeffs(); + + if (active_pps->entropy_coding_mode_flag ==CABAC) + img->cod_counter=-1; + + return DECODE_MB; + } + + if (IS_COPY (currMB)) //keep last macroblock + { + int i, j, k; + short pmv[2]; + int zeroMotionAbove; + int zeroMotionLeft; + PixelPos mb_a, mb_b; + int a_mv_y = 0; + int a_ref_idx = 0; + int b_mv_y = 0; + int b_ref_idx = 0; + int list_offset = ((img->MbaffFrameFlag)&&(currMB->mb_field))? img->current_mb_nr%2 ? 4 : 2 : 0; + + getLuma4x4Neighbour(img->current_mb_nr,0,0,-1, 0,&mb_a); + getLuma4x4Neighbour(img->current_mb_nr,0,0, 0,-1,&mb_b); + + if (mb_a.available) + { + a_mv_y = dec_picture->mv[LIST_0][mb_a.pos_y][mb_a.pos_x][1]; + a_ref_idx = dec_picture->ref_idx[LIST_0][mb_a.pos_y][mb_a.pos_x]; + + if (currMB->mb_field && !img->mb_data[mb_a.mb_addr].mb_field) + { + a_mv_y /=2; + a_ref_idx *=2; + } + if (!currMB->mb_field && img->mb_data[mb_a.mb_addr].mb_field) + { + a_mv_y *=2; + a_ref_idx >>=1; + } + } + + if (mb_b.available) + { + b_mv_y = dec_picture->mv[LIST_0][mb_b.pos_y][mb_b.pos_x][1]; + b_ref_idx = dec_picture->ref_idx[LIST_0][mb_b.pos_y][mb_b.pos_x]; + + if (currMB->mb_field && !img->mb_data[mb_b.mb_addr].mb_field) + { + b_mv_y /=2; + b_ref_idx *=2; + } + if (!currMB->mb_field && img->mb_data[mb_b.mb_addr].mb_field) + { + b_mv_y *=2; + b_ref_idx >>=1; + } + } + + zeroMotionLeft = !mb_a.available ? 1 : a_ref_idx==0 && dec_picture->mv[LIST_0][mb_a.pos_y][mb_a.pos_x][0]==0 && a_mv_y==0 ? 1 : 0; + zeroMotionAbove = !mb_b.available ? 1 : b_ref_idx==0 && dec_picture->mv[LIST_0][mb_b.pos_y][mb_b.pos_x][0]==0 && b_mv_y==0 ? 1 : 0; + + currMB->cbp = 0; + reset_coeffs(); + + img_block_y = img->block_y; + + if (zeroMotionAbove || zeroMotionLeft) + { + for(i=0;imv[LIST_0][img->block_y+j][img->block_x+i][k] = 0; + } + else + { + SetMotionVectorPredictor (img, pmv, pmv+1, 0, LIST_0, dec_picture->ref_idx, dec_picture->mv, 0, 0, 16, 16); + + for(i=0;imv[LIST_0][img_block_y+j][img->block_x+i][k] = pmv[k]; + } + } + + for(i=0;iref_idx[LIST_0][img_block_y+j][img->block_x+i] = 0; + dec_picture->ref_pic_id[LIST_0][img_block_y+j][img->block_x+i] = + dec_picture->ref_pic_num[img->current_slice_nr][LIST_0 + list_offset][(short)dec_picture->ref_idx[LIST_0][img_block_y+j][img->block_x+i]]; + } + + return DECODE_MB; + } + if(currMB->mb_type!=IPCM) + { + + // intra prediction modes for a macroblock 4x4 ********************************************** + read_ipred_modes(img,inp); + + // read inter frame vector data ********************************************************* + if (IS_INTERMV (currMB) && (!IS_P8x8(currMB))) + { + readMotionInfoFromNAL (img, inp); + } + // read CBP and Coeffs *************************************************************** + readCBPandCoeffsFromNAL (img,inp); + } + else + { + //read pcm_alignment_zero_bit and pcm_byte[i] + + // here dP is assigned with the same dP as SE_MBTYPE, because IPCM syntax is in the + // same category as MBTYPE + dP = &(currSlice->partArr[partMap[SE_MBTYPE]]); + readIPCMcoeffsFromNAL(img,inp,dP); + } + return DECODE_MB; + } + + + + /*! + ************************************************************************ + * \brief + * Initialize decoding engine after decoding an IPCM macroblock + * (for IPCM CABAC 28/11/2003) + * + * \author + * Dong Wang + ************************************************************************ + */ + void init_decoding_engine_IPCM(struct img_par *img) + { + Slice *currSlice = img->currentSlice; + Bitstream *currStream; + int ByteStartPosition; + int PartitionNumber; + int i; + + if(currSlice->dp_mode==PAR_DP_1) + PartitionNumber=1; + else if(currSlice->dp_mode==PAR_DP_3) + PartitionNumber=3; + else + { + printf("Partition Mode is not supported\n"); + exit(1); + } + + for(i=0;ipartArr[i].bitstream; + ByteStartPosition = currStream->read_len; + + + arideco_start_decoding (&currSlice->partArr[i].de_cabac, currStream->streamBuffer, ByteStartPosition, &currStream->read_len, img->type); + } + } + + + + + /*! + ************************************************************************ + * \brief + * Read IPCM pcm_alignment_zero_bit and pcm_byte[i] from stream to img->cof + * (for IPCM CABAC and IPCM CAVLC) + * + * \author + * Dong Wang + ************************************************************************ + */ + + void readIPCMcoeffsFromNAL(struct img_par *img, struct inp_par *inp, struct datapartition *dP) + { + SyntaxElement currSE; + int i,j; + + //For CABAC, we don't need to read bits to let stream byte aligned + // because we have variable for integer bytes position + if(active_pps->entropy_coding_mode_flag == CABAC) + { + //read luma and chroma IPCM coefficients + currSE.len=8; + + for(i=0;ibitstream); + img->cof[i/4][j/4][i%4][j%4]=currSE.value1; + } + } + if (dec_picture->chroma_format_idc != YUV400) + { + TRACE_STRING("pcm_byte chroma"); + for(i=0;imb_cr_size_y;i++) + { + for(j=0;jmb_cr_size_x;j++) + { + readIPCMBytes_CABAC(&currSE, dP->bitstream); + img->cof[i/4][j/4+4][i%4][j%4]=currSE.value1; + } + } + for(i=0;imb_cr_size_y;i++) + { + for(j=0;jmb_cr_size_x;j++) + { + readIPCMBytes_CABAC(&currSE, dP->bitstream); + img->cof[i/4+2][j/4+4][i%4][j%4]=currSE.value1; + } + } + } + //If the decoded MB is IPCM MB, decoding engine is initialized + + // here the decoding engine is directly initialized without checking End of Slice + // The reason is that, whether current MB is the last MB in slice or not, there is + // at least one 'end of slice' syntax after this MB. So when fetching bytes in this + // initialisation process, we can guarantee there is bits available in bitstream. + + init_decoding_engine_IPCM(img); + } + else + { + //read bits to let stream byte aligned + + if((dP->bitstream->frame_bitoffset)%8!=0) + { + TRACE_STRING("pcm_alignment_zero_bit"); + currSE.len=8-(dP->bitstream->frame_bitoffset)%8; + readSyntaxElement_FLC(&currSE, dP->bitstream); + } + + //read luma and chroma IPCM coefficients + currSE.len=img->bitdepth_luma; + TRACE_STRING("pcm_byte luma"); + + for(i=0;ibitstream); + img->cof[i/4][j/4][i%4][j%4]=currSE.value1; + } + } + currSE.len=img->bitdepth_chroma; + if (dec_picture->chroma_format_idc != YUV400) + { + TRACE_STRING("pcm_byte chroma"); + for(i=0;imb_cr_size_y;i++) + { + for(j=0;jmb_cr_size_x;j++) + { + readSyntaxElement_FLC(&currSE, dP->bitstream); + img->cof[i/4][j/4+4][i%4][j%4]=currSE.value1; + } + } + for(i=0;imb_cr_size_y;i++) + { + for(j=0;jmb_cr_size_x;j++) + { + readSyntaxElement_FLC(&currSE, dP->bitstream); + img->cof[i/4+2][j/4+4][i%4][j%4]=currSE.value1; + } + } + } + } + } + + + + void read_ipred_modes(struct img_par *img,struct inp_par *inp) + { + int b8,i,j,bi,bj,bx,by,dec; + SyntaxElement currSE; + Slice *currSlice; + DataPartition *dP; + int *partMap; + Macroblock *currMB; + int ts, ls; + int mostProbableIntraPredMode; + int upIntraPredMode; + int leftIntraPredMode; + int IntraChromaPredModeFlag; + int bs_x, bs_y; + int ii,jj; + + PixelPos left_block; + PixelPos top_block; + + currMB = &img->mb_data[img->current_mb_nr]; + + IntraChromaPredModeFlag = IS_INTRA(currMB); + + currSlice = img->currentSlice; + partMap = assignSE2partition[currSlice->dp_mode]; + + currSE.type = SE_INTRAPREDMODE; + + TRACE_STRING("intra4x4_pred_mode"); + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (!(active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag)) + currSE.reading = readIntraPredMode_CABAC; + + for(b8=0;b8<4;b8++) //loop 8x8 blocks + { + if((currMB->b8mode[b8]==IBLOCK )||(currMB->b8mode[b8]==I8MB)) + { + bs_x = bs_y = (currMB->b8mode[b8] == I8MB)?8:4; + + IntraChromaPredModeFlag = 1; + + ii=(bs_x>>2); + jj=(bs_y>>2); + + for(j=0;j<2;j+=jj) //loop subblocks + for(i=0;i<2;i+=ii) + { + //get from stream + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + readSyntaxElement_Intra4x4PredictionMode(&currSE,img,inp,dP); + else + { + currSE.context=(b8<<2)+(j<<1)+i; + dP->readSyntaxElement(&currSE,img,inp,dP); + } + + bx = ((b8&1)<<1) + i; + by = (b8&2) + j; + + getLuma4x4Neighbour(img->current_mb_nr, bx, by, -1, 0, &left_block); + getLuma4x4Neighbour(img->current_mb_nr, bx, by, 0, -1, &top_block); + + //get from array and decode + bi = img->block_x + bx; + bj = img->block_y + by; + + if (active_pps->constrained_intra_pred_flag) + { + left_block.available = left_block.available ? img->intra_block[left_block.mb_addr] : 0; + top_block.available = top_block.available ? img->intra_block[top_block.mb_addr] : 0; + } + + // !! KS: not sure if the follwing is still correct... + ts=ls=0; // Check to see if the neighboring block is SI + if (IS_OLDINTRA(currMB) && img->type == SI_SLICE) // need support for MBINTLC1 + { + if (left_block.available) + if (img->siblock [left_block.pos_x][left_block.pos_y]) + ls=1; + + if (top_block.available) + if (img->siblock [top_block.pos_x][top_block.pos_y]) + ts=1; + } + + upIntraPredMode = (top_block.available &&(ts == 0)) ? img->ipredmode[top_block.pos_x ][top_block.pos_y ] : -1; + leftIntraPredMode = (left_block.available &&(ls == 0)) ? img->ipredmode[left_block.pos_x][left_block.pos_y] : -1; + + mostProbableIntraPredMode = (upIntraPredMode < 0 || leftIntraPredMode < 0) ? DC_PRED : upIntraPredMode < leftIntraPredMode ? upIntraPredMode : leftIntraPredMode; + + dec = (currSE.value1 == -1) ? mostProbableIntraPredMode : currSE.value1 + (currSE.value1 >= mostProbableIntraPredMode); + + //set + for(jj=0;jj<(bs_y>>2);jj++) //loop 4x4s in the subblock for 8x8 prediction setting + for(ii=0;ii<(bs_x>>2);ii++) + img->ipredmode[bi+ii][bj+jj]=dec; + } + } + } + + if (IntraChromaPredModeFlag && dec_picture->chroma_format_idc != YUV400) + { + currSE.type = SE_INTRAPREDMODE; + TRACE_STRING("intra_chroma_pred_mode"); + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_ue; + else currSE.reading = readCIPredMode_CABAC; + + dP->readSyntaxElement(&currSE,img,inp,dP); + currMB->c_ipred_mode = currSE.value1; + + if (currMB->c_ipred_mode < DC_PRED_8 || currMB->c_ipred_mode > PLANE_8) + { + error("illegal chroma intra pred mode!\n", 600); + } + } + } + + /*! + ************************************************************************ + * \brief + * Set motion vector predictor + ************************************************************************ + */ + static void SetMotionVectorPredictor (struct img_par *img, + short *pmv_x, + short *pmv_y, + char ref_frame, + byte list, + char ***refPic, + short ****tmp_mv, + int block_x, + int block_y, + int blockshape_x, + int blockshape_y) + { + int mb_x = BLOCK_SIZE*block_x; + int mb_y = BLOCK_SIZE*block_y; + int mb_nr = img->current_mb_nr; + + int mv_a, mv_b, mv_c, pred_vec=0; + int mvPredType, rFrameL, rFrameU, rFrameUR; + int hv; + + + PixelPos block_a, block_b, block_c, block_d; + + getLuma4x4Neighbour(mb_nr, block_x, block_y, -1, 0, &block_a); + getLuma4x4Neighbour(mb_nr, block_x, block_y, 0, -1, &block_b); + getLuma4x4Neighbour(mb_nr, block_x, block_y, blockshape_x, -1, &block_c); + getLuma4x4Neighbour(mb_nr, block_x, block_y, -1, -1, &block_d); + + if (mb_y > 0) + { + if (mb_x < 8) // first column of 8x8 blocks + { + if (mb_y==8) + { + if (blockshape_x == 16) block_c.available = 0; + } + else + { + if (mb_x+blockshape_x == 8) block_c.available = 0; + } + } + else + { + if (mb_x+blockshape_x == 16) block_c.available = 0; + } + } + + if (!block_c.available) + { + block_c=block_d; + } + + mvPredType = MVPRED_MEDIAN; + + if (!img->MbaffFrameFlag) + { + rFrameL = block_a.available ? refPic[list][block_a.pos_y][block_a.pos_x] : -1; + rFrameU = block_b.available ? refPic[list][block_b.pos_y][block_b.pos_x] : -1; + rFrameUR = block_c.available ? refPic[list][block_c.pos_y][block_c.pos_x] : -1; + } + else + { + if (img->mb_data[img->current_mb_nr].mb_field) + { + rFrameL = block_a.available ? + img->mb_data[block_a.mb_addr].mb_field ? + refPic[list][block_a.pos_y][block_a.pos_x]: + refPic[list][block_a.pos_y][block_a.pos_x] * 2: + -1; + rFrameU = block_b.available ? + img->mb_data[block_b.mb_addr].mb_field ? + refPic[list][block_b.pos_y][block_b.pos_x]: + refPic[list][block_b.pos_y][block_b.pos_x] * 2: + -1; + rFrameUR = block_c.available ? + img->mb_data[block_c.mb_addr].mb_field ? + refPic[list][block_c.pos_y][block_c.pos_x]: + refPic[list][block_c.pos_y][block_c.pos_x] * 2: + -1; + } + else + { + rFrameL = block_a.available ? + img->mb_data[block_a.mb_addr].mb_field ? + refPic[list][block_a.pos_y][block_a.pos_x] >>1: + refPic[list][block_a.pos_y][block_a.pos_x] : + -1; + rFrameU = block_b.available ? + img->mb_data[block_b.mb_addr].mb_field ? + refPic[list][block_b.pos_y][block_b.pos_x] >>1: + refPic[list][block_b.pos_y][block_b.pos_x] : + -1; + rFrameUR = block_c.available ? + img->mb_data[block_c.mb_addr].mb_field ? + refPic[list][block_c.pos_y][block_c.pos_x] >>1: + refPic[list][block_c.pos_y][block_c.pos_x] : + -1; + } + } + + + /* Prediction if only one of the neighbors uses the reference frame + * we are checking + */ + if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame) mvPredType = MVPRED_L; + else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame) mvPredType = MVPRED_U; + else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame) mvPredType = MVPRED_UR; + // Directional predictions + if(blockshape_x == 8 && blockshape_y == 16) + { + if(mb_x == 0) + { + if(rFrameL == ref_frame) + mvPredType = MVPRED_L; + } + else + { + if( rFrameUR == ref_frame) + mvPredType = MVPRED_UR; + } + } + else if(blockshape_x == 16 && blockshape_y == 8) + { + if(mb_y == 0) + { + if(rFrameU == ref_frame) + mvPredType = MVPRED_U; + } + else + { + if(rFrameL == ref_frame) + mvPredType = MVPRED_L; + } + } + + for (hv=0; hv < 2; hv++) + { + if (!img->MbaffFrameFlag || hv==0) + { + mv_a = block_a.available ? tmp_mv[list][block_a.pos_y][block_a.pos_x][hv] : 0; + mv_b = block_b.available ? tmp_mv[list][block_b.pos_y][block_b.pos_x][hv] : 0; + mv_c = block_c.available ? tmp_mv[list][block_c.pos_y][block_c.pos_x][hv] : 0; + } + else + { + if (img->mb_data[img->current_mb_nr].mb_field) + { + mv_a = block_a.available ? img->mb_data[block_a.mb_addr].mb_field? + tmp_mv[list][block_a.pos_y][block_a.pos_x][hv]: + tmp_mv[list][block_a.pos_y][block_a.pos_x][hv] / 2: + 0; + mv_b = block_b.available ? img->mb_data[block_b.mb_addr].mb_field? + tmp_mv[list][block_b.pos_y][block_b.pos_x][hv]: + tmp_mv[list][block_b.pos_y][block_b.pos_x][hv] / 2: + 0; + mv_c = block_c.available ? img->mb_data[block_c.mb_addr].mb_field? + tmp_mv[list][block_c.pos_y][block_c.pos_x][hv]: + tmp_mv[list][block_c.pos_y][block_c.pos_x][hv] / 2: + 0; + } + else + { + mv_a = block_a.available ? img->mb_data[block_a.mb_addr].mb_field? + tmp_mv[list][block_a.pos_y][block_a.pos_x][hv] * 2: + tmp_mv[list][block_a.pos_y][block_a.pos_x][hv]: + 0; + mv_b = block_b.available ? img->mb_data[block_b.mb_addr].mb_field? + tmp_mv[list][block_b.pos_y][block_b.pos_x][hv] * 2: + tmp_mv[list][block_b.pos_y][block_b.pos_x][hv]: + 0; + mv_c = block_c.available ? img->mb_data[block_c.mb_addr].mb_field? + tmp_mv[list][block_c.pos_y][block_c.pos_x][hv] * 2: + tmp_mv[list][block_c.pos_y][block_c.pos_x][hv]: + 0; + } + } + + switch (mvPredType) + { + case MVPRED_MEDIAN: + if(!(block_b.available || block_c.available)) + pred_vec = mv_a; + else + pred_vec = mv_a+mv_b+mv_c-min(mv_a,min(mv_b,mv_c))-max(mv_a,max(mv_b,mv_c)); + break; + case MVPRED_L: + pred_vec = mv_a; + break; + case MVPRED_U: + pred_vec = mv_b; + break; + case MVPRED_UR: + pred_vec = mv_c; + break; + default: + break; + } + + if (hv==0) *pmv_x = pred_vec; + else *pmv_y = pred_vec; + + } + } + + + /*! + ************************************************************************ + * \brief + * Set context for reference frames + ************************************************************************ + */ + int + BType2CtxRef (int btype) + { + if (btype<4) return 0; + else return 1; + } + + /*! + ************************************************************************ + * \brief + * Read motion info + ************************************************************************ + */ + void readMotionInfoFromNAL (struct img_par *img, struct inp_par *inp) + { + int i,j,k; + int step_h,step_v; + int curr_mvd; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + SyntaxElement currSE; + Slice *currSlice = img->currentSlice; + DataPartition *dP; + int *partMap = assignSE2partition[currSlice->dp_mode]; + int bframe = (img->type==B_SLICE); + int partmode = (IS_P8x8(currMB)?4:currMB->mb_type); + int step_h0 = BLOCK_STEP [partmode][0]; + int step_v0 = BLOCK_STEP [partmode][1]; + + int mv_mode, i0, j0; + char refframe; + short pmv[2]; + int j4, i4, ii,jj; + int vec; + + int mv_scale = 0; + + int flag_mode; + + int list_offset = ((img->MbaffFrameFlag)&&(currMB->mb_field))? img->current_mb_nr%2 ? 4 : 2 : 0; + + byte ** moving_block; + short **** co_located_mv; + char *** co_located_ref_idx; + int64 *** co_located_ref_id; + + if ((img->MbaffFrameFlag)&&(currMB->mb_field)) + { + if(img->current_mb_nr%2) + { + moving_block = Co_located->bottom_moving_block; + co_located_mv = Co_located->bottom_mv; + co_located_ref_idx = Co_located->bottom_ref_idx; + co_located_ref_id = Co_located->bottom_ref_pic_id; + } + else + { + moving_block = Co_located->top_moving_block; + co_located_mv = Co_located->top_mv; + co_located_ref_idx = Co_located->top_ref_idx; + co_located_ref_id = Co_located->top_ref_pic_id; + } + } + else + { + moving_block = Co_located->moving_block; + co_located_mv = Co_located->mv; + co_located_ref_idx = Co_located->ref_idx; + co_located_ref_id = Co_located->ref_pic_id; + } + + if (bframe && IS_P8x8 (currMB)) + { + if (img->direct_spatial_mv_pred_flag) + { + int imgblock_y= ((img->MbaffFrameFlag)&&(currMB->mb_field))? (img->current_mb_nr%2) ? (img->block_y-4)/2:img->block_y/2: img->block_y; + int fw_rFrameL, fw_rFrameU, fw_rFrameUL, fw_rFrameUR; + int bw_rFrameL, bw_rFrameU, bw_rFrameUL, bw_rFrameUR; + + PixelPos mb_left, mb_up, mb_upleft, mb_upright; + + char fw_rFrame,bw_rFrame; + short pmvfw[2]={0,0}, + pmvbw[2]={0,0}; + + + getLuma4x4Neighbour(img->current_mb_nr, 0, 0, -1, 0, &mb_left); + getLuma4x4Neighbour(img->current_mb_nr, 0, 0, 0, -1, &mb_up); + getLuma4x4Neighbour(img->current_mb_nr, 0, 0, 16, -1, &mb_upright); + getLuma4x4Neighbour(img->current_mb_nr, 0, 0, -1, -1, &mb_upleft); + + if (!img->MbaffFrameFlag) + { + fw_rFrameL = mb_left.available ? dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] : -1; + fw_rFrameU = mb_up.available ? dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] : -1; + fw_rFrameUL = mb_upleft.available ? dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + fw_rFrameUR = mb_upright.available ? dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] : fw_rFrameUL; + + bw_rFrameL = mb_left.available ? dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] : -1; + bw_rFrameU = mb_up.available ? dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] : -1; + bw_rFrameUL = mb_upleft.available ? dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + bw_rFrameUR = mb_upright.available ? dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] : bw_rFrameUL; + } + else + { + if (img->mb_data[img->current_mb_nr].mb_field) + { + fw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] < 0? + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] : + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] * 2: -1; + fw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] < 0? + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] : + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] * 2: -1; + + fw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] < 0? + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] : + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] *2: -1; + + fw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] : + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] * 2: fw_rFrameUL; + + bw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] : + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] * 2: -1; + + bw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] : + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] * 2: -1; + + bw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] : + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] *2: -1; + + bw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] : + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] * 2: bw_rFrameUL; + + } + else + { + fw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] >> 1 : + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x]: -1; + + fw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] >> 1 : + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] : -1; + + fw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x]>> 1 : + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + + fw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] >> 1 : + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] : fw_rFrameUL; + + bw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] >> 1 : + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] : -1; + bw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] >> 1 : + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] : -1; + + bw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] >> 1 : + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + + bw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] >> 1: + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] : bw_rFrameUL; + } + } + + fw_rFrame = (fw_rFrameL >= 0 && fw_rFrameU >= 0) ? min(fw_rFrameL,fw_rFrameU): max(fw_rFrameL,fw_rFrameU); + fw_rFrame = (fw_rFrame >= 0 && fw_rFrameUR >= 0) ? min(fw_rFrame,fw_rFrameUR): max(fw_rFrame,fw_rFrameUR); + + bw_rFrame = (bw_rFrameL >= 0 && bw_rFrameU >= 0) ? min(bw_rFrameL,bw_rFrameU): max(bw_rFrameL,bw_rFrameU); + bw_rFrame = (bw_rFrame >= 0 && bw_rFrameUR >= 0) ? min(bw_rFrame,bw_rFrameUR): max(bw_rFrame,bw_rFrameUR); + + + if (fw_rFrame >=0) + SetMotionVectorPredictor (img, pmvfw, pmvfw+1, fw_rFrame, LIST_0, dec_picture->ref_idx, dec_picture->mv, 0, 0, 16, 16); + + if (bw_rFrame >=0) + SetMotionVectorPredictor (img, pmvbw, pmvbw+1, bw_rFrame, LIST_1, dec_picture->ref_idx, dec_picture->mv, 0, 0, 16, 16); + + + for (i=0;i<4;i++) + { + if (currMB->b8mode[i] == 0) + for(j=2*(i/2);j<2*(i/2)+2;j++) + for(k=2*(i%2);k<2*(i%2)+2;k++) + { + int j6 = imgblock_y+j; + j4 = img->block_y+j; + i4 = img->block_x+k; + + + if (fw_rFrame >= 0) + { + + if (!fw_rFrame && ((!moving_block[j6][i4]) && (!listX[1+list_offset][0]->is_long_term))) + { + dec_picture->mv [LIST_0][j4][i4][0] = 0; + dec_picture->mv [LIST_0][j4][i4][1] = 0; + dec_picture->ref_idx[LIST_0][j4][i4] = 0; + } + else + { + + dec_picture->mv [LIST_0][j4][i4][0] = pmvfw[0]; + dec_picture->mv [LIST_0][j4][i4][1] = pmvfw[1]; + dec_picture->ref_idx[LIST_0][j4][i4] = fw_rFrame; + } + } + else + { + dec_picture->mv [LIST_0][j4][i4][0] = 0; + dec_picture->mv [LIST_0][j4][i4][1] = 0; + dec_picture->ref_idx[LIST_0][j4][i4] = -1; + } + if (bw_rFrame >= 0) + { + if (bw_rFrame==0 && ((!moving_block[j6][i4])&& (!listX[1+list_offset][0]->is_long_term))) + { + dec_picture->mv [LIST_1][j4][i4][0] = 0; + dec_picture->mv [LIST_1][j4][i4][1] = 0; + dec_picture->ref_idx[LIST_1][j4][i4] = 0; + } + else + { + dec_picture->mv [LIST_1][j4][i4][0] = pmvbw[0]; + dec_picture->mv [LIST_1][j4][i4][1] = pmvbw[1]; + dec_picture->ref_idx[LIST_1][j4][i4] = bw_rFrame; + } + } + else + { + dec_picture->mv [LIST_1][j4][i4][0] = 0; + dec_picture->mv [LIST_1][j4][i4][1] = 0; + dec_picture->ref_idx[LIST_1][j4][i4] = -1; + } + + if (fw_rFrame <0 && bw_rFrame <0) + { + dec_picture->ref_idx[LIST_0][j4][i4] = 0; + dec_picture->ref_idx[LIST_1][j4][i4] = 0; + } + } + } + } + else + { + for (i=0;i<4;i++) + { + if (currMB->b8mode[i] == 0) + { + for(j=2*(i/2);j<2*(i/2)+2;j++) + { + for(k=2*(i%2);k<2*(i%2)+2;k++) + { + + int list_offset = ((img->MbaffFrameFlag)&&(currMB->mb_field))? img->current_mb_nr%2 ? 4 : 2 : 0; + int imgblock_y= ((img->MbaffFrameFlag)&&(currMB->mb_field))? (img->current_mb_nr%2) ? (img->block_y-4)/2 : img->block_y/2 : img->block_y; + int refList = co_located_ref_idx[LIST_0 ][imgblock_y+j][img->block_x+k]== -1 ? LIST_1 : LIST_0; + int ref_idx = co_located_ref_idx[refList][imgblock_y + j][img->block_x + k]; + int mapped_idx=-1, iref; + + if (ref_idx == -1) + { + dec_picture->ref_idx [LIST_0][img->block_y + j][img->block_x + k] = 0; + dec_picture->ref_idx [LIST_1][img->block_y + j][img->block_x + k] = 0; + } + else + { + for (iref=0;irefnum_ref_idx_l0_active,listXsize[LIST_0 + list_offset]);iref++) + { + #if 1 + int curr_mb_field = ((img->MbaffFrameFlag)&&(currMB->mb_field)); + + if(img->structure==0 && curr_mb_field==0) + { + // If the current MB is a frame MB and the colocated is from a field picture, + // then the co_located_ref_id may have been generated from the wrong value of + // frame_poc if it references it's complementary field, so test both POC values + if(listX[0][iref]->top_poc*2 == co_located_ref_id[refList][imgblock_y + j][img->block_x + k] + || listX[0][iref]->bottom_poc*2 == co_located_ref_id[refList][imgblock_y + j][img->block_x + k]) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + continue; + } + #endif + if (dec_picture->ref_pic_num[img->current_slice_nr][LIST_0 + list_offset][iref]==co_located_ref_id[refList][imgblock_y + j][img->block_x + k]) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + } + if (INVALIDINDEX == mapped_idx) + { + error("temporal direct error\ncolocated block has ref that is unavailable",-1111); + } + dec_picture->ref_idx [LIST_0][img->block_y + j][img->block_x + k] = mapped_idx; + dec_picture->ref_idx [LIST_1][img->block_y + j][img->block_x + k] = 0; + } + } + } + } + } + } + } + + // If multiple ref. frames, read reference frame for the MB ********************************* + if(img->num_ref_idx_l0_active>1) + { + flag_mode = ( img->num_ref_idx_l0_active == 2 ? 1 : 0); + + currSE.type = SE_REFFRAME; + dP = &(currSlice->partArr[partMap[SE_REFFRAME]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_ue; + else currSE.reading = readRefFrame_CABAC; + + for (j0=0; j0<4; j0+=step_v0) + { + for (i0=0; i0<4; i0+=step_h0) + { + k=2*(j0/2)+(i0/2); + if ((currMB->b8pdir[k]==0 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0) + { + TRACE_STRING("ref_idx_l0"); + + img->subblock_x = i0; + img->subblock_y = j0; + + if (!IS_P8x8 (currMB) || bframe || (!bframe && !img->allrefzero)) + { + currSE.context = BType2CtxRef (currMB->b8mode[k]); + if( (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) && flag_mode ) + { + currSE.len = 1; + readSyntaxElement_FLC(&currSE, dP->bitstream); + currSE.value1 = 1 - currSE.value1; + } + else + { + currSE.value2 = LIST_0; + dP->readSyntaxElement (&currSE,img,inp,dP); + } + refframe = currSE.value1; + + } + else + { + refframe = 0; + } + + /* + if (bframe && refframe>img->buf_cycle) // img->buf_cycle should be correct for field MBs now + { + set_ec_flag(SE_REFFRAME); + refframe = 1; + } + */ + + for (j=j0; jref_idx[LIST_0][img->block_y + j][img->block_x + i] = refframe; + } + + } + } + } + } + else + { + for (j0=0; j0<4; j0+=step_v0) + { + for (i0=0; i0<4; i0+=step_h0) + { + k=2*(j0/2)+(i0/2); + if ((currMB->b8pdir[k]==0 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0) + { + for (j=j0; jref_idx[LIST_0][img->block_y + j][img->block_x + i] = 0; + } + } + } + } + } + + // If backward multiple ref. frames, read backward reference frame for the MB ********************************* + if(img->num_ref_idx_l1_active>1) + { + flag_mode = ( img->num_ref_idx_l1_active == 2 ? 1 : 0); + + currSE.type = SE_REFFRAME; + dP = &(currSlice->partArr[partMap[SE_REFFRAME]]); + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_ue; + else currSE.reading = readRefFrame_CABAC; + + for (j0=0; j0<4; j0+=step_v0) + { + for (i0=0; i0<4; i0+=step_h0) + { + k=2*(j0/2)+(i0/2); + if ((currMB->b8pdir[k]==1 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0) + { + TRACE_STRING("ref_idx_l1"); + + img->subblock_x = i0; + img->subblock_y = j0; + + currSE.context = BType2CtxRef (currMB->b8mode[k]); + if( (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) && flag_mode ) + { + currSE.len = 1; + readSyntaxElement_FLC(&currSE, dP->bitstream); + currSE.value1 = 1-currSE.value1; + } + else + { + currSE.value2 = LIST_1; + dP->readSyntaxElement (&currSE,img,inp,dP); + } + refframe = currSE.value1; + + for (j=j0; jref_idx[LIST_1][img->block_y + j][img->block_x + i] = refframe; + } + } + } + } + } + } + else + { + for (j0=0; j0<4; j0+=step_v0) + { + for (i0=0; i0<4; i0+=step_h0) + { + k=2*(j0/2)+(i0/2); + if ((currMB->b8pdir[k]==1 || currMB->b8pdir[k]==2) && currMB->b8mode[k]!=0) + { + for (j=j0; jref_idx[LIST_1][img->block_y + j][img->block_x + i] = 0; + } + } + } + } + } + + //===== READ FORWARD MOTION VECTORS ===== + currSE.type = SE_MVD; + dP = &(currSlice->partArr[partMap[SE_MVD]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_se; + else currSE.reading = readMVD_CABAC; + + for (j0=0; j0<4; j0+=step_v0) + for (i0=0; i0<4; i0+=step_h0) + { + k=2*(j0/2)+(i0/2); + + if ((currMB->b8pdir[k]==0 || currMB->b8pdir[k]==2) && (currMB->b8mode[k] !=0))//has forward vector + { + mv_mode = currMB->b8mode[k]; + step_h = BLOCK_STEP [mv_mode][0]; + step_v = BLOCK_STEP [mv_mode][1]; + + refframe = dec_picture->ref_idx[LIST_0][img->block_y+j0][img->block_x+i0]; + + for (j=j0; jblock_y+j; + i4 = img->block_x+i; + + // first make mv-prediction + SetMotionVectorPredictor (img, pmv, pmv+1, refframe, LIST_0, dec_picture->ref_idx, dec_picture->mv, i, j, 4*step_h, 4*step_v); + + for (k=0; k < 2; k++) + { + TRACE_STRING("mvd_l0"); + + img->subblock_x = i; // position used for context determination + img->subblock_y = j; // position used for context determination + currSE.value2 = k<<1; // identifies the component; only used for context determination + dP->readSyntaxElement(&currSE,img,inp,dP); + curr_mvd = currSE.value1; + + vec=curr_mvd+pmv[k]; /* find motion vector */ + + for(ii=0;iimv [LIST_0][j4+jj][i4+ii][k] = vec; + currMB->mvd [LIST_0][j+jj] [i+ii] [k] = curr_mvd; + } + } + } + } + } + } + else if (currMB->b8mode[k=2*(j0/2)+(i0/2)]==0) + { + if (!img->direct_spatial_mv_pred_flag) + { + int list_offset = ((img->MbaffFrameFlag)&&(currMB->mb_field))? img->current_mb_nr%2 ? 4 : 2 : 0; + int imgblock_y= ((img->MbaffFrameFlag)&&(currMB->mb_field))? (img->current_mb_nr%2) ? (img->block_y-4)/2:img->block_y/2 : img->block_y; + + int refList = (co_located_ref_idx[LIST_0][imgblock_y+j0][img->block_x+i0]== -1 ? LIST_1 : LIST_0); + int ref_idx = co_located_ref_idx[refList][imgblock_y+j0][img->block_x+i0]; + + if (ref_idx==-1) + { + for (j=j0; jref_idx [LIST_1][img->block_y+j][img->block_x+i]=0; + dec_picture->ref_idx [LIST_0][img->block_y+j][img->block_x+i]=0; + j4 = img->block_y+j; + i4 = img->block_x+i; + for (ii=0; ii < 2; ii++) + { + dec_picture->mv [LIST_0][j4][i4][ii]=0; + dec_picture->mv [LIST_1][j4][i4][ii]=0; + } + } + } + else + { + int mapped_idx=-1, iref; + int j6; + + for (iref=0;irefnum_ref_idx_l0_active,listXsize[LIST_0 + list_offset]);iref++) + { + + #if 1 + int curr_mb_field = ((img->MbaffFrameFlag)&&(currMB->mb_field)); + + if(img->structure==0 && curr_mb_field==0) + { + // If the current MB is a frame MB and the colocated is from a field picture, + // then the co_located_ref_id may have been generated from the wrong value of + // frame_poc if it references it's complementary field, so test both POC values + if(listX[0][iref]->top_poc*2 == co_located_ref_id[refList][imgblock_y + j0][img->block_x + i0] + || listX[0][iref]->bottom_poc*2 == co_located_ref_id[refList][imgblock_y + j0][img->block_x + i0]) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + continue; + } + #endif + if (dec_picture->ref_pic_num[img->current_slice_nr][LIST_0 + list_offset][iref]==co_located_ref_id[refList][imgblock_y+j0][img->block_x+i0]) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + } + + if (INVALIDINDEX == mapped_idx) + { + error("temporal direct error\ncolocated block has ref that is unavailable",-1111); + } + + + for (j=j0; jmvscale[LIST_0 + list_offset][mapped_idx]; + + dec_picture->ref_idx [LIST_0][img->block_y+j][img->block_x+i] = mapped_idx; + dec_picture->ref_idx [LIST_1][img->block_y+j][img->block_x+i] = 0; + + j4 = img->block_y+j; + j6 = imgblock_y+j; + i4 = img->block_x+i; + + for (ii=0; ii < 2; ii++) + { + //if (iTRp==0) + if (mv_scale == 9999 || listX[LIST_0+list_offset][mapped_idx]->is_long_term) + // if (mv_scale==9999 || Co_located->is_long_term) + { + dec_picture->mv [LIST_0][j4][i4][ii]=co_located_mv[refList][j6][i4][ii]; + dec_picture->mv [LIST_1][j4][i4][ii]=0; + } + else + { + dec_picture->mv [LIST_0][j4][i4][ii]=(mv_scale * co_located_mv[refList][j6][i4][ii] + 128 ) >> 8; + dec_picture->mv [LIST_1][j4][i4][ii]=dec_picture->mv[LIST_0][j4][i4][ii] - co_located_mv[refList][j6][i4][ii]; + } + } + } + } + } + } + } + } + + //===== READ BACKWARD MOTION VECTORS ===== + currSE.type = SE_MVD; + dP = &(currSlice->partArr[partMap[SE_MVD]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_se; + else currSE.reading = readMVD_CABAC; + + for (j0=0; j0<4; j0+=step_v0) + { + for (i0=0; i0<4; i0+=step_h0) + { + k=2*(j0/2)+(i0/2); + if ((currMB->b8pdir[k]==1 || currMB->b8pdir[k]==2) && (currMB->b8mode[k]!=0))//has backward vector + { + mv_mode = currMB->b8mode[k]; + step_h = BLOCK_STEP [mv_mode][0]; + step_v = BLOCK_STEP [mv_mode][1]; + + refframe = dec_picture->ref_idx[LIST_1][img->block_y+j0][img->block_x+i0]; + + for (j=j0; jblock_y+j; + i4 = img->block_x+i; + + // first make mv-prediction + SetMotionVectorPredictor (img, pmv, pmv+1, refframe, LIST_1, dec_picture->ref_idx, dec_picture->mv, i, j, 4*step_h, 4*step_v); + + for (k=0; k < 2; k++) + { + TRACE_STRING("mvd_l1"); + + img->subblock_x = i; // position used for context determination + img->subblock_y = j; // position used for context determination + currSE.value2 = (k<<1) +1; // identifies the component; only used for context determination + dP->readSyntaxElement(&currSE,img,inp,dP); + curr_mvd = currSE.value1; + + vec=curr_mvd+pmv[k]; /* find motion vector */ + + for(ii=0;iimv [LIST_1][j4+jj][i4+ii][k] = vec; + currMB->mvd [LIST_1][j+jj] [i+ii] [k] = curr_mvd; + } + } + } + } + } + } + } + } + // record reference picture Ids for deblocking decisions + + for(i4=img->block_x;i4<(img->block_x+4);i4++) + for(j4=img->block_y;j4<(img->block_y+4);j4++) + { + if(dec_picture->ref_idx[LIST_0][j4][i4]>=0) + dec_picture->ref_pic_id[LIST_0][j4][i4] = dec_picture->ref_pic_num[img->current_slice_nr][LIST_0 + list_offset][(short)dec_picture->ref_idx[LIST_0][j4][i4]]; + else + dec_picture->ref_pic_id[LIST_0][j4][i4] = INT64_MIN; + if(dec_picture->ref_idx[LIST_1][j4][i4]>=0) + dec_picture->ref_pic_id[LIST_1][j4][i4] = dec_picture->ref_pic_num[img->current_slice_nr][LIST_1 + list_offset][(short)dec_picture->ref_idx[LIST_1][j4][i4]]; + else + dec_picture->ref_pic_id[LIST_1][j4][i4] = INT64_MIN; + } + } + + + + /*! + ************************************************************************ + * \brief + * Get the Prediction from the Neighboring Blocks for Number of Nonzero Coefficients + * + * Luma Blocks + ************************************************************************ + */ + int predict_nnz(struct img_par *img, int i,int j) + { + PixelPos pix; + + int pred_nnz = 0; + int cnt = 0; + int mb_nr = img->current_mb_nr; + + // left block + getLuma4x4Neighbour(mb_nr, i, j, -1, 0, &pix); + + if (pix.available && active_pps->constrained_intra_pred_flag && (img->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= img->intra_block[pix.mb_addr]; + } + + if (pix.available) + { + pred_nnz = img->nz_coeff [pix.mb_addr ][pix.x][pix.y]; + cnt++; + } + + // top block + getLuma4x4Neighbour(mb_nr, i, j, 0, -1, &pix); + + if (pix.available && active_pps->constrained_intra_pred_flag && (img->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= img->intra_block[pix.mb_addr]; + } + + if (pix.available) + { + pred_nnz += img->nz_coeff [pix.mb_addr ][pix.x][pix.y]; + cnt++; + } + + if (cnt==2) + { + pred_nnz++; + pred_nnz/=cnt; + } + + return pred_nnz; + } + + + /*! + ************************************************************************ + * \brief + * Get the Prediction from the Neighboring Blocks for Number of Nonzero Coefficients + * + * Chroma Blocks + ************************************************************************ + */ + int predict_nnz_chroma(struct img_par *img, int i,int j) + { + PixelPos pix; + + int pred_nnz = 0; + int cnt =0; + int mb_nr = img->current_mb_nr; + int j_off_tab [12] = {0,0,0,0,4,4,4,4,8,8,8,8}; + int j_off = j_off_tab[j]; + + if (dec_picture->chroma_format_idc != YUV444) + { + //YUV420 and YUV422 + // left block + getChroma4x4Neighbour(mb_nr, i%2, j-4, -1, 0, &pix); + + if (pix.available && active_pps->constrained_intra_pred_flag && (img->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= img->intra_block[pix.mb_addr]; + } + + if (pix.available) + { + pred_nnz = img->nz_coeff [pix.mb_addr ][2 * (i/2) + pix.x][4 + pix.y]; + cnt++; + } + + // top block + getChroma4x4Neighbour(mb_nr, i%2, j-4, 0, -1, &pix); + + if (pix.available && active_pps->constrained_intra_pred_flag && (img->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= img->intra_block[pix.mb_addr]; + } + + if (pix.available) + { + pred_nnz += img->nz_coeff [pix.mb_addr ][2 * (i/2) + pix.x][4 + pix.y]; + cnt++; + } + } + else + { + //YUV444 + // left block + getChroma4x4Neighbour(mb_nr, i, j-j_off, -1, 0, &pix); + + if (pix.available && active_pps->constrained_intra_pred_flag && (img->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= img->intra_block[pix.mb_addr]; + } + + if (pix.available) + { + pred_nnz = img->nz_coeff [pix.mb_addr ][pix.x][j_off + pix.y]; + cnt++; + } + + // top block + getChroma4x4Neighbour(mb_nr, i, j-j_off, 0, -1, &pix); + + if (pix.available && active_pps->constrained_intra_pred_flag && (img->currentSlice->dp_mode==PAR_DP_3)) + { + pix.available &= img->intra_block[pix.mb_addr]; + } + + if (pix.available) + { + pred_nnz += img->nz_coeff [pix.mb_addr ][pix.x][j_off + pix.y]; + cnt++; + } + } + + if (cnt==2) + { + pred_nnz++; + pred_nnz/=cnt; + } + + return pred_nnz; + } + + + /*! + ************************************************************************ + * \brief + * Reads coeff of an 4x4 block (CAVLC) + * + * \author + * Karl Lillevold + * contributions by James Au + ************************************************************************ + */ + + + void readCoeff4x4_CAVLC (struct img_par *img,struct inp_par *inp, + int block_type, + int i, int j, int levarr[16], int runarr[16], + int *number_coefficients) + { + int mb_nr = img->current_mb_nr; + Macroblock *currMB = &img->mb_data[mb_nr]; + SyntaxElement currSE; + Slice *currSlice = img->currentSlice; + DataPartition *dP; + int *partMap = assignSE2partition[currSlice->dp_mode]; + + + int k, code, vlcnum; + int numcoeff, numtrailingones, numcoeff_vlc; + int level_two_or_higher; + int numones, totzeros, level, cdc=0, cac=0; + int zerosleft, ntr, dptype = 0; + int max_coeff_num = 0, nnz; + char type[15]; + int incVlc[] = {0,3,6,12,24,48,32768}; // maximum vlc = 6 + + numcoeff = 0; + + switch (block_type) + { + case LUMA: + max_coeff_num = 16; + sprintf(type, "%s", "Luma"); + if (IS_INTRA (currMB)) + { + dptype = SE_LUM_AC_INTRA; + } + else + { + dptype = SE_LUM_AC_INTER; + } + break; + case LUMA_INTRA16x16DC: + max_coeff_num = 16; + sprintf(type, "%s", "Lum16DC"); + dptype = SE_LUM_DC_INTRA; + break; + case LUMA_INTRA16x16AC: + max_coeff_num = 15; + sprintf(type, "%s", "Lum16AC"); + dptype = SE_LUM_AC_INTRA; + break; + + case CHROMA_DC: + max_coeff_num = img->num_cdc_coeff; + cdc = 1; + + sprintf(type, "%s", "ChrDC"); + if (IS_INTRA (currMB)) + { + dptype = SE_CHR_DC_INTRA; + } + else + { + dptype = SE_CHR_DC_INTER; + } + break; + case CHROMA_AC: + max_coeff_num = 15; + cac = 1; + sprintf(type, "%s", "ChrAC"); + if (IS_INTRA (currMB)) + { + dptype = SE_CHR_AC_INTRA; + } + else + { + dptype = SE_CHR_AC_INTER; + } + break; + default: + error ("readCoeff4x4_CAVLC: invalid block type", 600); + break; + } + + currSE.type = dptype; + dP = &(currSlice->partArr[partMap[dptype]]); + + img->nz_coeff[img->current_mb_nr][i][j] = 0; + + + if (!cdc) + { + // luma or chroma AC + if (!cac) + { + nnz = predict_nnz(img, i, j); + } + else + { + nnz = predict_nnz_chroma(img, i, j); + } + + if (nnz < 2) + { + numcoeff_vlc = 0; + } + else if (nnz < 4) + { + numcoeff_vlc = 1; + } + else if (nnz < 8) + { + numcoeff_vlc = 2; + } + else // + { + numcoeff_vlc = 3; + } + + currSE.value1 = numcoeff_vlc; + + readSyntaxElement_NumCoeffTrailingOnes(&currSE, dP, type); + + numcoeff = currSE.value1; + numtrailingones = currSE.value2; + + img->nz_coeff[img->current_mb_nr][i][j] = numcoeff; + } + else + { + // chroma DC + readSyntaxElement_NumCoeffTrailingOnesChromaDC(&currSE, dP); + + numcoeff = currSE.value1; + numtrailingones = currSE.value2; + } + + + for (k = 0; k < max_coeff_num; k++) + { + levarr[k] = 0; + runarr[k] = 0; + } + + numones = numtrailingones; + *number_coefficients = numcoeff; + + if (numcoeff) + { + if (numtrailingones) + { + + currSE.len = numtrailingones; + + #if TRACE + snprintf(currSE.tracestring, + TRACESTRING_SIZE, "%s trailing ones sign (%d,%d)", type, i, j); + #endif + + readSyntaxElement_FLC (&currSE, dP->bitstream); + + code = currSE.inf; + ntr = numtrailingones; + for (k = numcoeff-1; k > numcoeff-1-numtrailingones; k--) + { + ntr --; + if ((code>>ntr)&1) + levarr[k] = -1; + else + levarr[k] = 1; + } + } + + // decode levels + level_two_or_higher = 1; + if (numcoeff > 3 && numtrailingones == 3) + level_two_or_higher = 0; + + if (numcoeff > 10 && numtrailingones < 3) + vlcnum = 1; + else + vlcnum = 0; + + for (k = numcoeff - 1 - numtrailingones; k >= 0; k--) + { + + #if TRACE + snprintf(currSE.tracestring, + TRACESTRING_SIZE, "%s lev (%d,%d) k=%d vlc=%d ", type, + i, j, k, vlcnum); + #endif + + if (vlcnum == 0) + readSyntaxElement_Level_VLC0(&currSE, dP); + else + readSyntaxElement_Level_VLCN(&currSE, vlcnum, dP); + + if (level_two_or_higher) + { + if (currSE.inf > 0) + currSE.inf ++; + else + currSE.inf --; + level_two_or_higher = 0; + } + + level = levarr[k] = currSE.inf; + if (abs(level) == 1) + numones ++; + + // update VLC table + if (abs(level)>incVlc[vlcnum]) + vlcnum++; + + if (k == numcoeff - 1 - numtrailingones && abs(level)>3) + vlcnum = 2; + + } + + if (numcoeff < max_coeff_num) + { + // decode total run + vlcnum = numcoeff-1; + currSE.value1 = vlcnum; + + #if TRACE + snprintf(currSE.tracestring, + TRACESTRING_SIZE, "%s totalrun (%d,%d) vlc=%d ", type, i,j, vlcnum); + #endif + if (cdc) + readSyntaxElement_TotalZerosChromaDC(&currSE, dP); + else + readSyntaxElement_TotalZeros(&currSE, dP); + + totzeros = currSE.value1; + } + else + { + totzeros = 0; + } + + // decode run before each coefficient + zerosleft = totzeros; + i = numcoeff-1; + if (zerosleft > 0 && i > 0) + { + do + { + // select VLC for runbefore + vlcnum = zerosleft - 1; + if (vlcnum > RUNBEFORE_NUM-1) + vlcnum = RUNBEFORE_NUM-1; + + currSE.value1 = vlcnum; + #if TRACE + snprintf(currSE.tracestring, + TRACESTRING_SIZE, "%s run (%d,%d) k=%d vlc=%d ", + type, i, j, i, vlcnum); + #endif + + readSyntaxElement_Run(&currSE, dP); + runarr[i] = currSE.value1; + + zerosleft -= runarr[i]; + i --; + } while (zerosleft != 0 && i != 0); + } + runarr[i] = zerosleft; + + } // if numcoeff + } + + /*! + ************************************************************************ + * \brief + * Calculate the quantisation and inverse quantisation parameters + * + ************************************************************************ + */ + void CalculateQuant8Param() + { + int i, j, k, temp; + + for(k=0; k<6; k++) + for(j=0; j<8; j++) + { + for(i=0; i<8; i++) + { + temp = (i<<3)+j; + InvLevelScale8x8Luma_Intra[k][j][i] = dequant_coef8[k][j][i]*qmatrix[6][temp]; + InvLevelScale8x8Luma_Inter[k][j][i] = dequant_coef8[k][j][i]*qmatrix[7][temp]; + } + } + } + + /*! + ************************************************************************ + * \brief + * Get coefficients (run/level) of one 8x8 block + * from the NAL (CABAC Mode) + ************************************************************************ + */ + void readLumaCoeff8x8_CABAC (struct img_par *img,struct inp_par *inp, int b8) + { + int i,j,k; + int level; + int mb_nr = img->current_mb_nr; + Macroblock *currMB = &img->mb_data[mb_nr]; + int cbp = currMB->cbp; + SyntaxElement currSE; + Slice *currSlice = img->currentSlice; + DataPartition *dP; + int *partMap = assignSE2partition[currSlice->dp_mode]; + int coef_ctr;// i0, j0; + int start_scan; + int boff_x, boff_y; + int any_coeff; + int dq_lshift = 0, dq_rshift = 0, dq_round = 0; + + int run, len; + + int qp_per = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6; + int qp_rem = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6; + Boolean lossless_qpprime = ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + + if (qp_per < 6) + { + dq_rshift = 6 - qp_per; + dq_round = 1<<(5-qp_per); + } + else + dq_lshift = qp_per - 6; + + img->is_intra_block = IS_INTRA(currMB); + + + if (cbp & (1<subblock_x = boff_x>>2; // position for coeff_count ctx + img->subblock_y = boff_y>>2; // position for coeff_count ctx + + start_scan = 0; // take all coeffs + coef_ctr = start_scan-1; + level = 1; + + for(k=start_scan;(k < 65) && (level != 0);k++) + { + //============ read ============= + /* + * make distinction between INTRA and INTER coded + * luminance coefficients + */ + currSE.context = LUMA_8x8; + currSE.type = (IS_INTRA(currMB) ? + (k==0 ? SE_LUM_DC_INTRA : SE_LUM_AC_INTRA) : + (k==0 ? SE_LUM_DC_INTER : SE_LUM_AC_INTER)); + + #if TRACE + sprintf(currSE.tracestring, "Luma8x8 sng "); + #endif + dP = &(currSlice->partArr[partMap[currSE.type]]); + currSE.reading = readRunLevel_CABAC; + + dP->readSyntaxElement(&currSE,img,inp,dP); + level = currSE.value1; + run = currSE.value2; + len = currSE.len; + + + //============ decode ============= + if (level != 0) /* leave if len=1 */ + { + any_coeff=1; + coef_ctr += run+1; + + if ((img->structure == FRAME) && (!currMB->mb_field)) + { + i=SNGL_SCAN8x8[coef_ctr][0]; + j=SNGL_SCAN8x8[coef_ctr][1]; + } + else + { // Alternate scan for field coding + i=FIELD_SCAN8x8[coef_ctr][0]; + j=FIELD_SCAN8x8[coef_ctr][1]; + } + + currMB->cbp_blk |= 51 << (4*b8-2*(b8%2)); // corresponds to 110011, as if all four 4x4 blocks contain coeff, shifted to block position + + if(lossless_qpprime) + { + img->m7[boff_x + i][boff_y + j] = level; + } + else if (qp_per>=6) + { + if(img->is_intra_block == 1) + img->m7[boff_x + i][boff_y + j] = level*InvLevelScale8x8Luma_Intra[qp_rem][i][j]<m7[boff_x + i][boff_y + j] = level*InvLevelScale8x8Luma_Inter[qp_rem][i][j]<is_intra_block == 1) + img->m7[boff_x + i][boff_y + j] = (level*InvLevelScale8x8Luma_Intra[qp_rem][i][j]+dq_round)>>dq_rshift; // dequantization + else + img->m7[boff_x + i][boff_y + j] = (level*InvLevelScale8x8Luma_Inter[qp_rem][i][j]+dq_round)>>dq_rshift; // dequantization + } + } + } + } + + + } + + /*! + ************************************************************************ + * \brief + * Get coded block pattern and coefficients (run/level) + * from the NAL + ************************************************************************ + */ + void readCBPandCoeffsFromNAL(struct img_par *img,struct inp_par *inp) + { + int i,j,k; + int level; + int mb_nr = img->current_mb_nr; + int ii,jj; + int m2,jg2;// i1,j1; + Macroblock *currMB = &img->mb_data[mb_nr]; + int cbp; + SyntaxElement currSE; + Slice *currSlice = img->currentSlice; + DataPartition *dP; + int *partMap = assignSE2partition[currSlice->dp_mode]; + int iii,jjj; + int coef_ctr, i0, j0, b8; + int ll; + int block_x,block_y; + int start_scan; + int run, len; + int levarr[16], runarr[16], numcoeff; + + int qp_const; + int qp_per = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6; + int qp_rem = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6; + int smb = ((img->type==SP_SLICE) && IS_INTER (currMB)) || (img->type == SI_SLICE && currMB->mb_type == SI4MB); + + int uv; + int qp_uv[2]; + int qp_const_uv[2]; + int qp_per_uv[2]; + int qp_rem_uv[2]; + int qp_c[2]; + + int intra = IS_INTRA (currMB); + int temp[4]; + + int b4; + int yuv = dec_picture->chroma_format_idc-1; + int m5[4]; + int m6[4]; + + int need_transform_size_flag; + Boolean lossless_qpprime = ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + + // Residue Color Transform + Boolean residual_transform_dc = ((img->residue_transform_flag==1) && (IS_OLDINTRA(currMB)||currMB->mb_type==I8MB) ); + + // QPI + //init constants for every chroma qp offset + if (dec_picture->chroma_format_idc != YUV400) + { + for (i=0; i<2; i++) + { + qp_uv[i] = img->qp + dec_picture->chroma_qp_offset[i]; + qp_uv[i] = Clip3(-(img->bitdepth_chroma_qp_scale), 51, qp_uv[i]); + qp_c[i] = (qp_uv[i] < 0)? qp_uv[i] : QP_SCALE_CR[qp_uv[i]-MIN_QP]; + qp_per_uv[i] = (qp_c[i] + img->bitdepth_chroma_qp_scale)/6; + qp_rem_uv[i] = (qp_c[i] + img->bitdepth_chroma_qp_scale)%6; + } + } + + // read CBP if not new intra mode + if (!IS_NEWINTRA (currMB)) + { + //===== C B P ===== + //--------------------- + if (IS_OLDINTRA (currMB) || currMB->mb_type == SI4MB || currMB->mb_type == I8MB) currSE.type = SE_CBP_INTRA; + else currSE.type = SE_CBP_INTER; + + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + { + if (IS_OLDINTRA (currMB) || currMB->mb_type == SI4MB || currMB->mb_type == I8MB) currSE.mapping = linfo_cbp_intra; + else currSE.mapping = linfo_cbp_inter; + } + else + { + currSE.reading = readCBP_CABAC; + } + + TRACE_STRING("coded_block_pattern"); + + dP->readSyntaxElement(&currSE,img,inp,dP); + currMB->cbp = cbp = currSE.value1; + + + //============= Transform size flag for INTER MBs ============= + //------------------------------------------------------------- + need_transform_size_flag = (((currMB->mb_type >= 1 && currMB->mb_type <= 3)|| + (IS_DIRECT(currMB) && active_sps->direct_8x8_inference_flag) || + (currMB->NoMbPartLessThan8x8Flag)) + && currMB->mb_type != I8MB && currMB->mb_type != I4MB + && (currMB->cbp&15) + && img->Transform8x8Mode); + + if (need_transform_size_flag) + { + currSE.type = SE_HEADER; + dP = &(currSlice->partArr[partMap[SE_HEADER]]); + currSE.reading = readMB_transform_size_flag_CABAC; + TRACE_STRING("transform size 8x8 flag"); + + // read UVLC transform_size_8x8_flag + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + { + currSE.len = 1; + readSyntaxElement_FLC(&currSE, dP->bitstream); + } else + { + dP->readSyntaxElement(&currSE,img,inp,dP); + } + currMB->luma_transform_size_8x8_flag = currSE.value1; + } + + //===== DQUANT ===== + //---------------------- + // Delta quant only if nonzero coeffs + if (cbp !=0) + { + if (IS_INTER (currMB)) currSE.type = SE_DELTA_QUANT_INTER; + else currSE.type = SE_DELTA_QUANT_INTRA; + + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + { + currSE.mapping = linfo_se; + } + else + currSE.reading= readDquant_CABAC; //gabi + + TRACE_STRING("mb_qp_delta"); + + dP->readSyntaxElement(&currSE,img,inp,dP); + currMB->delta_quant = currSE.value1; + + img->qp= ((img->qp + currMB->delta_quant + 52 + 2*img->bitdepth_luma_qp_scale)%(52+img->bitdepth_luma_qp_scale)) - + img->bitdepth_luma_qp_scale; + } + } + else + { + cbp = currMB->cbp; + } + + for (i=0;icof[i][j][iii][jjj]=0;// reset luma coeffs + + + if (IS_NEWINTRA (currMB)) // read DC coeffs for new intra modes + { + currSE.type = SE_DELTA_QUANT_INTRA; + + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + { + currSE.mapping = linfo_se; + } + else + { + currSE.reading= readDquant_CABAC; + } + #if TRACE + snprintf(currSE.tracestring, TRACESTRING_SIZE, "Delta quant "); + #endif + dP->readSyntaxElement(&currSE,img,inp,dP); + currMB->delta_quant = currSE.value1; + + img->qp= ((img->qp + currMB->delta_quant + 52 + 2*img->bitdepth_luma_qp_scale)%(52+img->bitdepth_luma_qp_scale)) - + img->bitdepth_luma_qp_scale; + + for (i=0;iipredmode[img->block_x+i][img->block_y+j]=DC_PRED; + + + if (active_pps->entropy_coding_mode_flag == UVLC) + { + readCoeff4x4_CAVLC(img, inp, LUMA_INTRA16x16DC, 0, 0, + levarr, runarr, &numcoeff); + + coef_ctr=-1; + level = 1; // just to get inside the loop + for(k = 0; k < numcoeff; k++) + { + if (levarr[k] != 0) // leave if len=1 + { + coef_ctr=coef_ctr+runarr[k]+1; + + if ((img->structure == FRAME) && (!currMB->mb_field)) + { + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + } + else + { // Alternate scan for field coding + i0=FIELD_SCAN[coef_ctr][0]; + j0=FIELD_SCAN[coef_ctr][1]; + } + + img->cof[i0][j0][0][0]=levarr[k];// add new intra DC coeff + } + } + } + else + { + + currSE.type = SE_LUM_DC_INTRA; + dP = &(currSlice->partArr[partMap[currSE.type]]); + + currSE.context = LUMA_16DC; + currSE.type = SE_LUM_DC_INTRA; + img->is_intra_block = 1; + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + { + currSE.mapping = linfo_levrun_inter; + } + else + { + currSE.reading = readRunLevel_CABAC; + } + + + + coef_ctr=-1; + level = 1; // just to get inside the loop + for(k=0;(k<17) && (level!=0);k++) + { + #if TRACE + snprintf(currSE.tracestring, TRACESTRING_SIZE, "DC luma 16x16 "); + #endif + dP->readSyntaxElement(&currSE,img,inp,dP); + level = currSE.value1; + run = currSE.value2; + len = currSE.len; + + if (level != 0) // leave if len=1 + { + coef_ctr=coef_ctr+run+1; + + if ((img->structure == FRAME) && (!currMB->mb_field)) + { + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + } + else + { // Alternate scan for field coding + i0=FIELD_SCAN[coef_ctr][0]; + j0=FIELD_SCAN[coef_ctr][1]; + } + + img->cof[i0][j0][0][0]=level;// add new intra DC coeff + } + } + } + if(!lossless_qpprime) + itrans_2(img);// transform new intra DC + } + + qp_per = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6; + qp_rem = (img->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6; + qp_const = 1<<(3-qp_per); + + //init constants for every chroma qp offset + if (dec_picture->chroma_format_idc != YUV400) + { + for(i=0; i < 2; i++) + { + qp_uv[i] = img->qp + dec_picture->chroma_qp_offset[i]; + qp_uv[i] = Clip3(-(img->bitdepth_chroma_qp_scale), 51, qp_uv[i]); + qp_c[i] = (qp_uv[i] < 0)? qp_uv[i] : QP_SCALE_CR[qp_uv[i]-MIN_QP]; + qp_per_uv[i] = (qp_c[i] + img->bitdepth_chroma_qp_scale)/6; + qp_rem_uv[i] = (qp_c[i] + img->bitdepth_chroma_qp_scale)%6; + } + } + + currMB->qp = img->qp; + + + // luma coefficients + for (block_y=0; block_y < 4; block_y += 2) /* all modes */ + { + for (block_x=0; block_x < 4; block_x += 2) + { + + b8 = 2*(block_y/2) + block_x/2; + if (active_pps->entropy_coding_mode_flag == UVLC) + { + for (j=block_y; j < block_y+2; j++) + { + for (i=block_x; i < block_x+2; i++) + { + ii = block_x/2; jj = block_y/2; + b8 = 2*jj+ii; + + if (cbp & (1<structure == FRAME) && (!currMB->mb_field)) + { + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + } + else { // Alternate scan for field coding + i0=FIELD_SCAN[coef_ctr][0]; + j0=FIELD_SCAN[coef_ctr][1]; + } + + + if (!currMB->luma_transform_size_8x8_flag) + { // inverse quant for 4x4 transform only + currMB->cbp_blk |= 1 << ((j<<2) + i); + + if(lossless_qpprime) + { + img->cof[i][j][i0][j0]= levarr[k]; + } + else if(qp_per<4) + { + if(intra == 1) + img->cof[i][j][i0][j0]= (levarr[k]*InvLevelScale4x4Luma_Intra[qp_rem][i0][j0]+qp_const)>>(4-qp_per); + else + img->cof[i][j][i0][j0]= (levarr[k]*InvLevelScale4x4Luma_Inter[qp_rem][i0][j0]+qp_const)>>(4-qp_per); + } + else + { + if(intra == 1) + img->cof[i][j][i0][j0]= (levarr[k]*InvLevelScale4x4Luma_Intra[qp_rem][i0][j0])<<(qp_per-4); + else + img->cof[i][j][i0][j0]= (levarr[k]*InvLevelScale4x4Luma_Inter[qp_rem][i0][j0])<<(qp_per-4); + } + + } //if (!currMB->luma_transform_size_8x8_flag) + else + { + // new inverse quant for 8x8 transform + int b4, iz, jz, dq_rshift = 0, dq_round = 0, dq_lshift = 0; + + // do same as CABAC for deblocking: any coeff in the 8x8 marks all the 4x4s + //as containing coefficients + currMB->cbp_blk |= 51 << ((block_y<<2) + block_x); + + b4 = 2*(j-block_y)+(i-block_x); + + if ((img->structure == FRAME) && (!currMB->mb_field)) + { + iz=SNGL_SCAN8x8[coef_ctr*4+b4][0]; + jz=SNGL_SCAN8x8[coef_ctr*4+b4][1]; + } + else { // Alternate scan for field coding + iz=FIELD_SCAN8x8[coef_ctr*4+b4][0]; + jz=FIELD_SCAN8x8[coef_ctr*4+b4][1]; + } + + if (qp_per < 6) + { + dq_rshift = 6 - qp_per; + dq_round = 1<<(5-qp_per); + } + else + dq_lshift = qp_per - 6; + + if(lossless_qpprime) + { + img->m7[block_x*4 +iz][block_y*4 +jz] = levarr[k]; + } + else if (qp_per>=6) + { + if(intra == 1) + img->m7[block_x*4 +iz][block_y*4 +jz] = levarr[k]*InvLevelScale8x8Luma_Intra[qp_rem][iz][jz]<m7[block_x*4 +iz][block_y*4 +jz] = levarr[k]*InvLevelScale8x8Luma_Inter[qp_rem][iz][jz]<m7[block_x*4 +iz][block_y*4 +jz] = (levarr[k]*InvLevelScale8x8Luma_Intra[qp_rem][iz][jz]+dq_round)>>dq_rshift; // dequantization + else + img->m7[block_x*4 +iz][block_y*4 +jz] = (levarr[k]*InvLevelScale8x8Luma_Inter[qp_rem][iz][jz]+dq_round)>>dq_rshift; // dequantization + } + + }//else (!currMB->luma_transform_size_8x8_flag) + } + } + } + else + { + img->nz_coeff[img->current_mb_nr][i][j] = 0; + } + } + } + } // VLC + else + { + if(currMB->luma_transform_size_8x8_flag) + readLumaCoeff8x8_CABAC(img, inp, b8); //======= 8x8 trannsform size & CABAC ======== + else + { + //======= Other Modes & CABAC ======== + //------------------------------------ + for (j=block_y; j < block_y+2; j++) + { + for (i=block_x; i < block_x+2; i++) + { + if (IS_NEWINTRA (currMB)) start_scan = 1; // skip DC coeff + else start_scan = 0; // take all coeffs + + img->subblock_x = i; // position for coeff_count ctx + img->subblock_y = j; // position for coeff_count ctx + if (cbp & (1<is_intra_block = IS_INTRA(currMB); + + #if TRACE + sprintf(currSE.tracestring, "Luma sng "); + #endif + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) currSE.mapping = linfo_levrun_inter; + else currSE.reading = readRunLevel_CABAC; + + dP->readSyntaxElement(&currSE,img,inp,dP); + level = currSE.value1; + run = currSE.value2; + len = currSE.len; + + if (level != 0) /* leave if len=1 */ + { + coef_ctr += run+1; + + if ((img->structure == FRAME) && (!currMB->mb_field)) + { + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + } + else { // Alternate scan for field coding + i0=FIELD_SCAN[coef_ctr][0]; + j0=FIELD_SCAN[coef_ctr][1]; + } + currMB->cbp_blk |= 1 << ((j<<2) + i) ; + + if(lossless_qpprime) + { + img->cof[i][j][i0][j0]= level; + } + else if(qp_per<4) + { + if(intra == 1) + img->cof[i][j][i0][j0]= (level*InvLevelScale4x4Luma_Intra[qp_rem][i0][j0]+qp_const)>>(4-qp_per); + else + img->cof[i][j][i0][j0]= (level*InvLevelScale4x4Luma_Inter[qp_rem][i0][j0]+qp_const)>>(4-qp_per); + } + else + { + if(intra == 1) + img->cof[i][j][i0][j0]= (level*InvLevelScale4x4Luma_Intra[qp_rem][i0][j0])<<(qp_per-4); + else + img->cof[i][j][i0][j0]= (level*InvLevelScale4x4Luma_Inter[qp_rem][i0][j0])<<(qp_per-4); + } + + } + } + } + } + } + } + } + } + } + + if (dec_picture->chroma_format_idc != YUV400) + { + for (j=4;j<(4+img->num_blk8x8_uv);j++) // reset all chroma coeffs before read + for (i=0;i<4;i++) + for (iii=0;iii<4;iii++) + for (jjj=0;jjj<4;jjj++) + img->cof[i][j][iii][jjj]=0; + + m2 =img->mb_x*2; + jg2=img->mb_y*2; + + qp_const_uv[0] = 1<<(3-qp_per_uv[0]); + qp_const_uv[1] = 1<<(3-qp_per_uv[1]); + + + //========================== CHROMA DC ============================ + //----------------------------------------------------------------- + // chroma DC coeff + if(cbp>15) + { + for (ll=0;ll<3;ll+=2) + { + uv = ll>>1; + + if (dec_picture->chroma_format_idc == YUV420) + { + //===================== CHROMA DC YUV420 ====================== + for (i=0;i<4;i++) + img->cofu[i]=0; + + if (active_pps->entropy_coding_mode_flag == UVLC) + { + readCoeff4x4_CAVLC(img, inp, CHROMA_DC, 0, 0, + levarr, runarr, &numcoeff); + coef_ctr=-1; + level=1; + for(k = 0; k < numcoeff; k++) + { + if (levarr[k] != 0) + { + currMB->cbp_blk |= 0xf0000 << (ll<<1) ; + coef_ctr=coef_ctr+runarr[k]+1; + img->cofu[coef_ctr]=levarr[k]; + } + } + } + else + { + coef_ctr=-1; + level=1; + for(k=0;(k<(img->num_cdc_coeff+1))&&(level!=0);k++) + { + currSE.context = CHROMA_DC; + currSE.type = (IS_INTRA(currMB) ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER); + img->is_intra_block = IS_INTRA(currMB); + img->is_v_block = ll; + + #if TRACE + snprintf(currSE.tracestring, TRACESTRING_SIZE, " 2x2 DC Chroma "); + #endif + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + currSE.mapping = linfo_levrun_c2x2; + else + currSE.reading = readRunLevel_CABAC; + + dP->readSyntaxElement(&currSE,img,inp,dP); + level = currSE.value1; + run = currSE.value2; + len = currSE.len; + if (level != 0) + { + currMB->cbp_blk |= 0xf0000 << (ll<<1) ; + coef_ctr=coef_ctr+run+1; + // Bug: img->cofu has only 4 entries, hence coef_ctr MUST be <4 (which is + // caught by the assert(). If it is bigger than 4, it starts patching the + // img->predmode pointer, which leads to bugs later on. + // + // This assert() should be left in the code, because it captures a very likely + // bug early when testing in error prone environments (or when testing NAL + // functionality). + assert (coef_ctr < img->num_cdc_coeff); + img->cofu[coef_ctr]=level; + } + } + } + + if (smb // check to see if MB type is SPred or SIntra4x4 + || lossless_qpprime) + { + img->cof[0+ll][4][0][0]=img->cofu[0]; img->cof[1+ll][4][0][0]=img->cofu[1]; + img->cof[0+ll][5][0][0]=img->cofu[2]; img->cof[1+ll][5][0][0]=img->cofu[3]; + } + else + { + temp[0]=(img->cofu[0]+img->cofu[1]+img->cofu[2]+img->cofu[3]); + temp[1]=(img->cofu[0]-img->cofu[1]+img->cofu[2]-img->cofu[3]); + temp[2]=(img->cofu[0]+img->cofu[1]-img->cofu[2]-img->cofu[3]); + temp[3]=(img->cofu[0]-img->cofu[1]-img->cofu[2]+img->cofu[3]); + + for (i=0;inum_cdc_coeff;i++) + { + if(qp_per_uv[uv]<5) + { + if(intra == 1) + temp[i]=(temp[i]*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0])>>(5-qp_per_uv[uv]); + else + temp[i]=(temp[i]*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0])>>(5-qp_per_uv[uv]); + } + else + { + if(intra == 1) + temp[i]=(temp[i]*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-5); + else + temp[i]=(temp[i]*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-5); + } + } + img->cof[0+ll][4][0][0]=temp[0]; + img->cof[1+ll][4][0][0]=temp[1]; + img->cof[0+ll][5][0][0]=temp[2]; + img->cof[1+ll][5][0][0]=temp[3]; + } + } + else if (dec_picture->chroma_format_idc == YUV422) + { + int i,j,j1; + int uv_idx = ll; + int m3[2][4] = {{0,0,0,0},{0,0,0,0}}; + int m4[2][4] = {{0,0,0,0},{0,0,0,0}}; + int qp_per_uv_dc = (qp_c[uv] + 3 + img->bitdepth_chroma_qp_scale)/6; //for YUV422 only + int qp_rem_uv_dc = (qp_c[uv] + 3 + img->bitdepth_chroma_qp_scale)%6; //for YUV422 only + + //===================== CHROMA DC YUV422 ====================== + if (active_pps->entropy_coding_mode_flag == UVLC) + { + readCoeff4x4_CAVLC(img, inp, CHROMA_DC, 0, 0, + levarr, runarr, &numcoeff); + coef_ctr=-1; + level=1; + for(k = 0; k < numcoeff; k++) + { + if (levarr[k] != 0) + { + currMB->cbp_blk |= ((int64)0xff0000) << (ll<<2); + coef_ctr=coef_ctr+runarr[k]+1; + i0=SCAN_YUV422[coef_ctr][0]; + j0=SCAN_YUV422[coef_ctr][1]; + + m3[i0][j0]=levarr[k]; + } + } + } + else + { + coef_ctr=-1; + level=1; + for(k=0;(k<9)&&(level!=0);k++) + { + currSE.context = CHROMA_DC_2x4; + currSE.type = (IS_INTRA(currMB) ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER); + img->is_intra_block = IS_INTRA(currMB); + img->is_v_block = ll; + + #if TRACE + snprintf(currSE.tracestring, TRACESTRING_SIZE, "2x4 DC Chroma "); + #endif + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + currSE.mapping = linfo_levrun_c2x2; + else + currSE.reading = readRunLevel_CABAC; + + dP->readSyntaxElement(&currSE,img,inp,dP); + level = currSE.value1; + run = currSE.value2; + len = currSE.len; + if (level != 0) + { + currMB->cbp_blk |= ((int64)0xff0000) << (ll<<2) ; + coef_ctr=coef_ctr+run+1; + assert (coef_ctr < img->num_cdc_coeff); + i0=SCAN_YUV422[coef_ctr][0]; + j0=SCAN_YUV422[coef_ctr][1]; + + m3[i0][j0]=level; + } + } + } + // inverse CHROMA DC YUV422 transform + // horizontal + if(!lossless_qpprime) + { + m4[0][0] = m3[0][0] + m3[1][0]; + m4[0][1] = m3[0][1] + m3[1][1]; + m4[0][2] = m3[0][2] + m3[1][2]; + m4[0][3] = m3[0][3] + m3[1][3]; + + m4[1][0] = m3[0][0] - m3[1][0]; + m4[1][1] = m3[0][1] - m3[1][1]; + m4[1][2] = m3[0][2] - m3[1][2]; + m4[1][3] = m3[0][3] - m3[1][3]; + } + else + { + for(i=0;i<2;i++) + for(j=0;j<4;j++) + img->cof[i+uv_idx][j+4][0][0]=m3[i][j]; + } + + // vertical + for (i=0;i<2 && !lossless_qpprime;i++) + { + for (j=0; j < 4;j++) //TODO: remove m5 with m4 + m5[j]=m4[i][j]; + + m6[0]=m5[0]+m5[2]; + m6[1]=m5[0]-m5[2]; + m6[2]=m5[1]-m5[3]; + m6[3]=m5[1]+m5[3]; + + for (j=0;j<2;j++) + { + j1=3-j; + if(qp_per_uv_dc<4) + { + if(intra == 1) + { + img->cof[i+uv_idx][j +4][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv_dc][0][0]+(1<<(3-qp_per_uv_dc)))>>(4-qp_per_uv_dc))+2)>>2; + img->cof[i+uv_idx][j1+4][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv_dc][0][0]+(1<<(3-qp_per_uv_dc)))>>(4-qp_per_uv_dc))+2)>>2; + } + else + { + img->cof[i+uv_idx][j +4][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv_dc][0][0]+(1<<(3-qp_per_uv_dc)))>>(4-qp_per_uv_dc))+2)>>2; + img->cof[i+uv_idx][j1+4][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv_dc][0][0]+(1<<(3-qp_per_uv_dc)))>>(4-qp_per_uv_dc))+2)>>2; + } + } + else + { + if(intra == 1) + { + img->cof[i+uv_idx][j +4][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv_dc][0][0])<<(qp_per_uv_dc-4))+2)>>2; + img->cof[i+uv_idx][j1+4][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv_dc][0][0])<<(qp_per_uv_dc-4))+2)>>2; + } + else + { + img->cof[i+uv_idx][j +4][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv_dc][0][0])<<(qp_per_uv_dc-4))+2)>>2; + img->cof[i+uv_idx][j1+4][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv_dc][0][0])<<(qp_per_uv_dc-4))+2)>>2; + } + } + }//for (j=0;j<2;j++) + }//for (i=0;i<2;i++) + }//else if (dec_picture->chroma_format_idc == YUV422) + else + { + //===================== CHROMA DC YUV444 ====================== + int i,j,i1,j1; + int uv_idx = 4 + (ll<<1); + + if (active_pps->entropy_coding_mode_flag == UVLC) + { + readCoeff4x4_CAVLC(img, inp, CHROMA_DC, 0, 0, + levarr, runarr, &numcoeff); + coef_ctr=-1; + level=1; + for(k = 0; k < numcoeff; k++) + { + if (levarr[k] != 0) + { + currMB->cbp_blk |= ((int64)0xffff0000) << (ll<<3) ; + coef_ctr=coef_ctr+runarr[k]+1; + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + + img->cof[i0][j0+uv_idx][0][0]=levarr[k]; + } + } + } + else + { + coef_ctr=-1; + level=1; + for(k=0;(k<17)&&(level!=0);k++) + { + currSE.context = CHROMA_DC_4x4; + currSE.type = (IS_INTRA(currMB) ? SE_CHR_DC_INTRA : SE_CHR_DC_INTER); + img->is_intra_block = IS_INTRA(currMB); + img->is_v_block = ll; + + #if TRACE + snprintf(currSE.tracestring, TRACESTRING_SIZE, " DC Chroma "); + #endif + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + currSE.mapping = linfo_levrun_c2x2; + else + currSE.reading = readRunLevel_CABAC; + + dP->readSyntaxElement(&currSE,img,inp,dP); + level = currSE.value1; + run = currSE.value2; + len = currSE.len; + if (level != 0) + { + currMB->cbp_blk |= ((int64)0xffff0000) << (ll<<3) ; + coef_ctr=coef_ctr+run+1; + assert (coef_ctr < img->num_cdc_coeff); + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + + img->cof[i0][j0+uv_idx][0][0]=level; + } + } + } + + // Residue Color Transform + if(!residual_transform_dc) + { + // inverse CHROMA DC YUV444 transform + // horizontal + for (j=uv_idx; (j < 4+uv_idx) && !lossless_qpprime;j++) + { + for (i=0;i<4;i++) + m5[i]=img->cof[i][j][0][0]; + + m6[0]=m5[0]+m5[2]; + m6[1]=m5[0]-m5[2]; + m6[2]=m5[1]-m5[3]; + m6[3]=m5[1]+m5[3]; + + for (i=0;i<2;i++) + { + i1=3-i; + img->cof[i][j][0][0]= m6[i]+m6[i1]; + img->cof[i1][j][0][0]=m6[i]-m6[i1]; + } + } + + // vertical + for (i=0;i<4 && !lossless_qpprime;i++) + { + for (j=0; j < 4;j++) + m5[j]=img->cof[i][j+uv_idx][0][0]; + + m6[0]=m5[0]+m5[2]; + m6[1]=m5[0]-m5[2]; + m6[2]=m5[1]-m5[3]; + m6[3]=m5[1]+m5[3]; + + for (j=0;j<2;j++) + { + j1=3-j; + if(qp_per_uv[uv]<4) + { + if(intra == 1) + { + img->cof[i][j +uv_idx][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0]+(1<<(3-qp_per_uv[uv])))>>(4-qp_per_uv[uv]))+2)>>2; + img->cof[i][j1+uv_idx][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0]+(1<<(3-qp_per_uv[uv])))>>(4-qp_per_uv[uv]))+2)>>2; + } + else + { + img->cof[i][j +uv_idx][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0]+(1<<(3-qp_per_uv[uv])))>>(4-qp_per_uv[uv]))+2)>>2; + img->cof[i][j1+uv_idx][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0]+(1<<(3-qp_per_uv[uv])))>>(4-qp_per_uv[uv]))+2)>>2; + } + } + else + { + if(intra == 1) + { + img->cof[i][j +uv_idx][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-4))+2)>>2; + img->cof[i][j1+uv_idx][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-4))+2)>>2; + } + else + { + img->cof[i][j +uv_idx][0][0]=((((m6[j]+m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-4))+2)>>2; + img->cof[i][j1+uv_idx][0][0]=((((m6[j]-m6[j1])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-4))+2)>>2; + } + } + } + }//for (i=0;i<4;i++) + } + else + { //residual_transform_dc + + for (i=0;i<4 && !lossless_qpprime ;i++) + for (j=0; j < 4;j++) + { + if(qp_per_uv[uv]<4) + { + if(intra == 1) + { + img->cof[i][j +uv_idx][0][0]=((((img->cof[i][j +uv_idx][0][0])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0]+(1<<(3-qp_per_uv[uv])))>>(4-qp_per_uv[uv]))); + } + else + { + img->cof[i][j +uv_idx][0][0]=((((img->cof[i][j +uv_idx][0][0])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0]+(1<<(3-qp_per_uv[uv])))>>(4-qp_per_uv[uv]))); + } + } + else + { + if(intra == 1) + { + img->cof[i][j +uv_idx][0][0]=((((img->cof[i][j +uv_idx][0][0])*InvLevelScale4x4Chroma_Intra[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-4))); + } + else + { + img->cof[i][j +uv_idx][0][0]=((((img->cof[i][j +uv_idx][0][0])*InvLevelScale4x4Chroma_Inter[uv][qp_rem_uv[uv]][0][0])<<(qp_per_uv[uv]-4))); + } + } + } + } //residual_transform_dc + }//else (dec_picture->chroma_format_idc == YUV444) + }//for (ll=0;ll<3;ll+=2) + } + + // chroma AC coeff, all zero fram start_scan + if (cbp<=31) + for (j=4; j < (4 + img->num_blk8x8_uv); j++) + for (i=0; i < 4; i++) + img->nz_coeff [img->current_mb_nr ][i][j]=0; + + + //========================== CHROMA AC ============================ + //----------------------------------------------------------------- + // chroma AC coeff, all zero fram start_scan + if (cbp>31) + { + for (b8=0; b8 < img->num_blk8x8_uv; b8++) + { + for (b4=0; b4 < 4; b4++) + { + i = cofuv_blk_x[yuv][b8][b4]; + j = cofuv_blk_y[yuv][b8][b4]; + + img->is_v_block = uv = (b8 > ((img->num_blk8x8_uv>>1) - 1 )); + + + if (active_pps->entropy_coding_mode_flag == UVLC) + { + readCoeff4x4_CAVLC(img, inp, CHROMA_AC, i, j, + levarr, runarr, &numcoeff); + coef_ctr=0; + level=1; + for(k = 0; k < numcoeff;k++) + { + if (levarr[k] != 0) + { + currMB->cbp_blk |= ((int64)1) << cbp_blk_chroma[b8][b4]; + coef_ctr=coef_ctr+runarr[k]+1; + + if ((img->structure == FRAME) && (!currMB->mb_field)) + { + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + } + else + { // Alternate scan for field coding + i0=FIELD_SCAN[coef_ctr][0]; + j0=FIELD_SCAN[coef_ctr][1]; + } + + if(lossless_qpprime) + { + img->cof[i][j][i0][j0]=levarr[k]; + } + else if(qp_per_uv[uv]<4) + { + if(intra == 1) + img->cof[i][j][i0][j0]=(levarr[k]*InvLevelScale4x4Chroma_Intra[img->is_v_block][qp_rem_uv[uv]][i0][j0]+qp_const_uv[uv])>>(4-qp_per_uv[uv]); + else + img->cof[i][j][i0][j0]=(levarr[k]*InvLevelScale4x4Chroma_Inter[img->is_v_block][qp_rem_uv[uv]][i0][j0]+qp_const_uv[uv])>>(4-qp_per_uv[uv]); + } + else + { + if(intra == 1) + img->cof[i][j][i0][j0]=(levarr[k]*InvLevelScale4x4Chroma_Intra[img->is_v_block][qp_rem_uv[uv]][i0][j0])<<(qp_per_uv[uv]-4); + else + img->cof[i][j][i0][j0]=(levarr[k]*InvLevelScale4x4Chroma_Inter[img->is_v_block][qp_rem_uv[uv]][i0][j0])<<(qp_per_uv[uv]-4); + } + } + } + } + else + { + coef_ctr=0; + level=1; + + img->subblock_y = subblk_offset_y[yuv][b8][b4]>>2; + img->subblock_x = subblk_offset_x[yuv][b8][b4]>>2; + + currSE.context = CHROMA_AC; + currSE.type = (IS_INTRA(currMB) ? SE_CHR_AC_INTRA : SE_CHR_AC_INTER); + img->is_intra_block = IS_INTRA(currMB); + + for(k=0;(k<16)&&(level!=0);k++) + { + #if TRACE + snprintf(currSE.tracestring, TRACESTRING_SIZE, " AC Chroma "); + #endif + dP = &(currSlice->partArr[partMap[currSE.type]]); + + if (active_pps->entropy_coding_mode_flag == UVLC || dP->bitstream->ei_flag) + currSE.mapping = linfo_levrun_inter; + else + currSE.reading = readRunLevel_CABAC; + + dP->readSyntaxElement(&currSE,img,inp,dP); + level = currSE.value1; + run = currSE.value2; + len = currSE.len; + + if (level != 0) + { + currMB->cbp_blk |= ((int64)1) << cbp_blk_chroma[b8][b4]; + coef_ctr=coef_ctr+run+1; + + if ((img->structure == FRAME) && (!currMB->mb_field)) + { + i0=SNGL_SCAN[coef_ctr][0]; + j0=SNGL_SCAN[coef_ctr][1]; + } + else { // Alternate scan for field coding + i0=FIELD_SCAN[coef_ctr][0]; + j0=FIELD_SCAN[coef_ctr][1]; + } + + if(lossless_qpprime) + { + img->cof[i][j][i0][j0]=level; + } + else if(qp_per_uv[uv]<4) + { + if(intra == 1) + img->cof[i][j][i0][j0]=(level*InvLevelScale4x4Chroma_Intra[img->is_v_block][qp_rem_uv[uv]][i0][j0]+qp_const_uv[uv])>>(4-qp_per_uv[uv]); + else + img->cof[i][j][i0][j0]=(level*InvLevelScale4x4Chroma_Inter[img->is_v_block][qp_rem_uv[uv]][i0][j0]+qp_const_uv[uv])>>(4-qp_per_uv[uv]); + } + else + { + if(intra == 1) + img->cof[i][j][i0][j0]=(level*InvLevelScale4x4Chroma_Intra[img->is_v_block][qp_rem_uv[uv]][i0][j0])<<(qp_per_uv[uv]-4); + else + img->cof[i][j][i0][j0]=(level*InvLevelScale4x4Chroma_Inter[img->is_v_block][qp_rem_uv[uv]][i0][j0])<<(qp_per_uv[uv]-4); + } + } + } //for(k=0;(k<16)&&(level!=0);k++) + } //else / if (active_pps->entropy_coding_mode_flag == UVLC) + } //for (b4=0; b4 < 4; b4++) + } //for (b8=0; b8 < img->num_blk8x8_uv; b8++) + } //if (cbp>31) + } //if (dec_picture->chroma_format_idc != YUV400) + } + + + /*! + ************************************************************************ + * \brief + * Copy IPCM coefficients to decoded picture buffer and set parameters for this MB + * (for IPCM CABAC and IPCM CAVLC 28/11/2003) + * + * \author + * Dong Wang + ************************************************************************ + */ + + void decode_ipcm_mb(struct img_par *img) + { + int i,j; + + Macroblock *currMb = &img->mb_data[img->current_mb_nr]; + + //Copy coefficients to decoded picture buffer + //IPCM coefficients are stored in img->cof which is set in function readIPCMcoeffsFromNAL() + + for(i=0;i<16;i++) + for(j=0;j<16;j++) + dec_picture->imgY[img->pix_y+i][img->pix_x+j]=img->cof[i/4][j/4][i%4][j%4]; + + if (dec_picture->chroma_format_idc != YUV400) + { + for(i=0;imb_cr_size_y;i++) + for(j=0;jmb_cr_size_x;j++) + dec_picture->imgUV[0][img->pix_c_y+i][img->pix_c_x+j]=img->cof[i/4][j/4+4][i%4][j%4]; //TODO-VG + + for(i=0;imb_cr_size_y;i++) + for(j=0;jmb_cr_size_x;j++) + dec_picture->imgUV[1][img->pix_c_y+i][img->pix_c_x+j]=img->cof[i/4+2][j/4+4][i%4][j%4]; //TODO-VG + } + + + + // for deblocking filter + currMb->qp=0; + + // for CAVLC: Set the nz_coeff to 16. + // These parameters are to be used in CAVLC decoding of neighbour blocks + for(i=0;i<4;i++) + for (j=0;j<(4 + img->num_blk8x8_uv);j++) + img->nz_coeff[img->current_mb_nr][i][j]=16; + + + // for CABAC decoding of MB skip flag + currMb->skip_flag = 0; + + //for deblocking filter CABAC + currMb->cbp_blk=0xFFFF; + + //For CABAC decoding of Dquant + last_dquant=0; + } + + /*! + ************************************************************************ + * \brief + * decode one macroblock + ************************************************************************ + */ + + int decode_one_macroblock(struct img_par *img,struct inp_par *inp) + { + int tmp_block[BLOCK_SIZE][BLOCK_SIZE]; + int tmp_blockbw[BLOCK_SIZE][BLOCK_SIZE]; + int i=0,j=0,k,l,ii=0,jj=0,i1=0,j1=0,j4=0,i4=0; + int uv, hv; + int vec1_x=0,vec1_y=0,vec2_x=0,vec2_y=0; + int ioff,joff; + int block8x8; // needed for ABT + + int bw_pred=0, fw_pred=0, pred, ifx; + int ii0,jj0,ii1,jj1,if1,jf1,if0,jf0; + int mv_mul, f1_x, f1_y, f2_x, f2_y, f3, f4; + + const byte decode_block_scan[16] = {0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15}; + + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + short ref_idx, fw_refframe=-1, bw_refframe=-1; + int mv_mode, pred_dir, intra_prediction; // = currMB->ref_frame; + short fw_ref_idx=-1, bw_ref_idx=-1; + + short *** mv_array, ***fw_mv_array, ***bw_mv_array; + + int mv_scale; + + int mb_nr = img->current_mb_nr; + int smb = ((img->type==SP_SLICE) && IS_INTER (currMB)) || (img->type == SI_SLICE && currMB->mb_type == SI4MB); + int list_offset; + int max_y_cr; + + StorablePicture **list; + + int jf; + + char fw_rFrame=-1,bw_rFrame=-1; + + short pmvfw[2]={0,0}, + pmvbw[2]={0,0}; + + int direct_pdir=-1; + + int curr_mb_field = ((img->MbaffFrameFlag)&&(currMB->mb_field)); + + byte ** moving_block; + short **** co_located_mv; + char *** co_located_ref_idx; + int64 *** co_located_ref_id; + + int need_4x4_transform = (!currMB->luma_transform_size_8x8_flag); + + int b8, b4; + int uv_shift; + int yuv = dec_picture->chroma_format_idc - 1; + + // Residue Color Transform + int residue_transform_flag = img->residue_transform_flag; + int residue_R, residue_G, residue_B, temp; + + if(currMB->mb_type==IPCM) + { + //copy readed data into imgY and set parameters + decode_ipcm_mb(img); + return 0; + } + + ////////////////////////// + + // find out the correct list offsets + if (curr_mb_field) + { + if(mb_nr%2) + { + list_offset = 4; // top field mb + moving_block = Co_located->bottom_moving_block; + co_located_mv = Co_located->bottom_mv; + co_located_ref_idx = Co_located->bottom_ref_idx; + co_located_ref_id = Co_located->bottom_ref_pic_id; + } + else + { + list_offset = 2; // bottom field mb + moving_block = Co_located->top_moving_block; + co_located_mv = Co_located->top_mv; + co_located_ref_idx = Co_located->top_ref_idx; + co_located_ref_id = Co_located->top_ref_pic_id; + } + max_y_cr = dec_picture->size_y_cr/2-1; + } + else + { + list_offset = 0; // no mb aff or frame mb + moving_block = Co_located->moving_block; + co_located_mv = Co_located->mv; + co_located_ref_idx = Co_located->ref_idx; + co_located_ref_id = Co_located->ref_pic_id; + max_y_cr = dec_picture->size_y_cr-1; + } + + + + if (!img->MbaffFrameFlag) + { + for (l=0+list_offset;l<(2+list_offset);l++) + { + for(k = 0; k < listXsize[l]; k++) + { + listX[l][k]->chroma_vector_adjustment= 0; + if(img->structure == TOP_FIELD && img->structure != listX[l][k]->structure) + listX[l][k]->chroma_vector_adjustment = -2; + if(img->structure == BOTTOM_FIELD && img->structure != listX[l][k]->structure) + listX[l][k]->chroma_vector_adjustment = 2; + } + } + } + else + { + if (curr_mb_field) + { + for (l=0+list_offset;l<(2+list_offset);l++) + { + for(k = 0; k < listXsize[l]; k++) + { + listX[l][k]->chroma_vector_adjustment= 0; + if(img->current_mb_nr % 2 == 0 && listX[l][k]->structure == BOTTOM_FIELD) + listX[l][k]->chroma_vector_adjustment = -2; + if(img->current_mb_nr % 2 == 1 && listX[l][k]->structure == TOP_FIELD) + listX[l][k]->chroma_vector_adjustment = 2; + } + } + } + else + { + for (l=0+list_offset;l<(2+list_offset);l++) + { + for(k = 0; k < listXsize[l]; k++) + { + listX[l][k]->chroma_vector_adjustment= 0; + } + } + } + } + + mv_mul=4; + + // luma decoding ************************************************** + + // get prediction for INTRA_MB_16x16 + if (IS_NEWINTRA (currMB)) + { + intrapred_luma_16x16(img, currMB->i16mode); + } + + if (img->type==B_SLICE && img->direct_spatial_mv_pred_flag && (IS_DIRECT (currMB) || + (IS_P8x8(currMB) && !(currMB->b8mode[0] && currMB->b8mode[1] && currMB->b8mode[2] && currMB->b8mode[3])))) + { + char fw_rFrameL, fw_rFrameU, fw_rFrameUL, fw_rFrameUR; + char bw_rFrameL, bw_rFrameU, bw_rFrameUL, bw_rFrameUR; + + PixelPos mb_left, mb_up, mb_upleft, mb_upright; + + getLuma4x4Neighbour(img->current_mb_nr,0,0,-1, 0,&mb_left); + getLuma4x4Neighbour(img->current_mb_nr,0,0, 0,-1,&mb_up); + getLuma4x4Neighbour(img->current_mb_nr,0,0,16, -1,&mb_upright); + getLuma4x4Neighbour(img->current_mb_nr,0,0, -1,-1,&mb_upleft); + + if (!img->MbaffFrameFlag) + { + fw_rFrameL = mb_left.available ? dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] : -1; + fw_rFrameU = mb_up.available ? dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] : -1; + fw_rFrameUL = mb_upleft.available ? dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + fw_rFrameUR = mb_upright.available ? dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] : fw_rFrameUL; + + bw_rFrameL = mb_left.available ? dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] : -1; + bw_rFrameU = mb_up.available ? dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] : -1; + bw_rFrameUL = mb_upleft.available ? dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + bw_rFrameUR = mb_upright.available ? dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] : bw_rFrameUL; + } + else + { + if (img->mb_data[img->current_mb_nr].mb_field) + { + fw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] < 0? + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] : + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] * 2: -1; + + fw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] < 0? + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] : + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] * 2: -1; + + fw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] < 0? + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] : + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] *2: -1; + + fw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] < 0? + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] : + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] * 2: fw_rFrameUL; + + bw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] < 0? + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] : + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] * 2: -1; + + bw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] < 0? + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] : + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] * 2: -1; + + bw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] < 0? + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] : + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] *2: -1; + + bw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] < 0? + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] : + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] * 2: bw_rFrameUL; + } + else + { + fw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x] >> 1 : + dec_picture->ref_idx[LIST_0][mb_left.pos_y][mb_left.pos_x]: -1; + + fw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] >> 1 : + dec_picture->ref_idx[LIST_0][mb_up.pos_y][mb_up.pos_x] : -1; + + fw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x]>> 1 : + dec_picture->ref_idx[LIST_0][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + + fw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] < 0 ? + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] >> 1 : + dec_picture->ref_idx[LIST_0][mb_upright.pos_y][mb_upright.pos_x] : fw_rFrameUL; + + bw_rFrameL = mb_left.available ? + img->mb_data[mb_left.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] >> 1 : + dec_picture->ref_idx[LIST_1][mb_left.pos_y][mb_left.pos_x] : -1; + + bw_rFrameU = mb_up.available ? + img->mb_data[mb_up.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] >> 1 : + dec_picture->ref_idx[LIST_1][mb_up.pos_y][mb_up.pos_x] : -1; + + bw_rFrameUL = mb_upleft.available ? + img->mb_data[mb_upleft.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] >> 1 : + dec_picture->ref_idx[LIST_1][mb_upleft.pos_y][mb_upleft.pos_x] : -1; + + bw_rFrameUR = mb_upright.available ? + img->mb_data[mb_upright.mb_addr].mb_field || dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] < 0 ? + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] >> 1: + dec_picture->ref_idx[LIST_1][mb_upright.pos_y][mb_upright.pos_x] : bw_rFrameUL; + } + } + + fw_rFrame = (fw_rFrameL >= 0 && fw_rFrameU >= 0) ? min(fw_rFrameL,fw_rFrameU): max(fw_rFrameL,fw_rFrameU); + fw_rFrame = (fw_rFrame >= 0 && fw_rFrameUR >= 0) ? min(fw_rFrame,fw_rFrameUR): max(fw_rFrame,fw_rFrameUR); + + bw_rFrame = (bw_rFrameL >= 0 && bw_rFrameU >= 0) ? min(bw_rFrameL,bw_rFrameU): max(bw_rFrameL,bw_rFrameU); + bw_rFrame = (bw_rFrame >= 0 && bw_rFrameUR >= 0) ? min(bw_rFrame,bw_rFrameUR): max(bw_rFrame,bw_rFrameUR); + + if (fw_rFrame >=0) + SetMotionVectorPredictor (img, pmvfw, pmvfw+1, fw_rFrame, LIST_0, dec_picture->ref_idx, dec_picture->mv, 0, 0, 16, 16); + + if (bw_rFrame >=0) + SetMotionVectorPredictor (img, pmvbw, pmvbw+1, bw_rFrame, LIST_1, dec_picture->ref_idx, dec_picture->mv, 0, 0, 16, 16); + + } + + for (block8x8=0; block8x8<4; block8x8++) + { + if (currMB->b8mode[block8x8] == I8MB) + { + //=========== 8x8 BLOCK TYPE ============ + ioff = 8*(block8x8%2); + joff = 8*(block8x8/2); + + //PREDICTION + if(!residue_transform_flag) + intrapred8x8(img, block8x8); + + itrans8x8(img,ioff,joff); // use DCT transform and make 8x8 block m7 from prediction block mpr + + for(ii=0;ii<8;ii++) + { + for(jj=0;jj<8;jj++) + { + // Residue Color Transform + if(!residue_transform_flag) + { + dec_picture->imgY[img->pix_y + joff + jj][img->pix_x + ioff + ii]=img->m7[ii + ioff][jj + joff]; // contruct picture from 4x4 blocks + } + else + { + rec_res[0][ii+ioff][jj+joff] = img->m7[ii + ioff][jj + joff]; + } + } + } + continue; + } + + for (k = block8x8*4; k < block8x8*4+4; k ++) + { + i = (decode_block_scan[k] & 3); + j = ((decode_block_scan[k] >> 2) & 3); + + ioff=i*4; + i4=img->block_x+i; + + joff=j*4; + j4=img->block_y+j; + + mv_mode = currMB->b8mode[2*(j/2)+(i/2)]; + pred_dir = currMB->b8pdir[2*(j/2)+(i/2)]; + + assert (pred_dir<=2); + + // PREDICTION + if (mv_mode==IBLOCK) + { + // Residue Color Transform + if(!residue_transform_flag) + { + //===== INTRA PREDICTION ===== + if (intrapred(img,ioff,joff,i4,j4)==SEARCH_SYNC) /* make 4x4 prediction block mpr from given prediction img->mb_mode */ + return SEARCH_SYNC; /* bit error */ + } + } + else if (!IS_NEWINTRA (currMB)) + { + if (pred_dir != 2) + { + //===== FORWARD/BACKWARD PREDICTION ===== + fw_refframe = ref_idx = dec_picture->ref_idx[LIST_0 + pred_dir][j4][i4]; + mv_array = dec_picture->mv[LIST_0 + pred_dir]; + list = listX[0+list_offset+ pred_dir]; + vec1_x = i4*4*mv_mul + mv_array[j4][i4][0]; + + if (!curr_mb_field) + { + vec1_y = j4*4*mv_mul + mv_array[j4][i4][1]; + } + else + { + if (mb_nr%2 == 0) + vec1_y = (img->block_y * 2 + joff) * mv_mul + mv_array[j4][i4][1]; + else + vec1_y = ((img->block_y-4) * 2 + joff)* mv_mul + mv_array[j4][i4][1]; + } + + get_block (ref_idx, list, vec1_x, vec1_y, img, tmp_block); + + if (img->apply_weights) + { + if (((active_pps->weighted_pred_flag&&(img->type==P_SLICE|| img->type == SP_SLICE))|| + (active_pps->weighted_bipred_idc==1 && (img->type==B_SLICE))) && curr_mb_field) + { + ref_idx >>=1; + } + + for(ii=0;iimpr[ii+ioff][jj+joff] = Clip1(((img->wp_weight[pred_dir][ref_idx][0] * tmp_block[ii][jj]+ img->wp_round_luma) >>img->luma_log2_weight_denom) + img->wp_offset[pred_dir][fw_refframe>>curr_mb_field][0] ); + } + else + { + for(ii=0;iimpr[ii+ioff][jj+joff] = tmp_block[ii][jj]; + } + } + else + { + if (mv_mode != 0) + { + //===== BI-DIRECTIONAL PREDICTION ===== + fw_mv_array = dec_picture->mv[LIST_0]; + bw_mv_array = dec_picture->mv[LIST_1]; + + fw_refframe = dec_picture->ref_idx[LIST_0][j4][i4]; + bw_refframe = dec_picture->ref_idx[LIST_1][j4][i4]; + fw_ref_idx = fw_refframe; + bw_ref_idx = bw_refframe; + } + else + { + //===== DIRECT PREDICTION ===== + fw_mv_array = dec_picture->mv[LIST_0]; + bw_mv_array = dec_picture->mv[LIST_1]; + bw_refframe = 0; + + if (img->direct_spatial_mv_pred_flag ) + { + int imgblock_y= ((img->MbaffFrameFlag)&&(currMB->mb_field))? (img->current_mb_nr%2) ? (img->block_y-4)/2:img->block_y/2: img->block_y; + int j6=imgblock_y+j; + + + if (fw_rFrame >=0) + { + if (!fw_rFrame && ((!moving_block[j6][i4]) && (!listX[1+list_offset][0]->is_long_term))) + { + dec_picture->mv [LIST_0][j4][i4][0]= 0; + dec_picture->mv [LIST_0][j4][i4][1]= 0; + dec_picture->ref_idx[LIST_0][j4][i4] = 0; + } + else + { + dec_picture->mv [LIST_0][j4][i4][0]= pmvfw[0]; + dec_picture->mv [LIST_0][j4][i4][1]= pmvfw[1]; + dec_picture->ref_idx[LIST_0][j4][i4] = fw_rFrame; + } + } + else + { + dec_picture->ref_idx[LIST_0][j4][i4] = -1; + dec_picture->mv [LIST_0][j4][i4][0]= 0; + dec_picture->mv [LIST_0][j4][i4][1]= 0; + } + + if (bw_rFrame >=0) + { + if (bw_rFrame==0 && ((!moving_block[j6][i4]) && (!listX[1+list_offset][0]->is_long_term))) + { + + dec_picture->mv [LIST_1][j4][i4][0]= 0; + dec_picture->mv [LIST_1][j4][i4][1]= 0; + dec_picture->ref_idx[LIST_1][j4][i4] = bw_rFrame; + + } + else + { + dec_picture->mv [LIST_1][j4][i4][0]= pmvbw[0]; + dec_picture->mv [LIST_1][j4][i4][1]= pmvbw[1]; + + dec_picture->ref_idx[LIST_1][j4][i4] = bw_rFrame; + } + } + else + { + dec_picture->mv [LIST_1][j4][i4][0]=0; + dec_picture->mv [LIST_1][j4][i4][1]=0; + dec_picture->ref_idx[LIST_1][j4][i4] = -1; + + } + + if (fw_rFrame < 0 && bw_rFrame < 0) + { + dec_picture->ref_idx[LIST_0][j4][i4] = 0; + dec_picture->ref_idx[LIST_1][j4][i4] = 0; + } + + fw_refframe = (dec_picture->ref_idx[LIST_0][j4][i4]!=-1) ? dec_picture->ref_idx[LIST_0][j4][i4]:0; + bw_refframe = (dec_picture->ref_idx[LIST_1][j4][i4]!=-1) ? dec_picture->ref_idx[LIST_1][j4][i4]:0; + + fw_ref_idx = fw_refframe; + bw_ref_idx = bw_refframe; + + if (dec_picture->ref_idx[LIST_1][j4][i4]==-1) direct_pdir = 0; + else if (dec_picture->ref_idx[LIST_0][j4][i4]==-1) direct_pdir = 1; + else direct_pdir = 2; + + } + else // Temporal Mode + { + + int imgblock_y= ((img->MbaffFrameFlag)&&(currMB->mb_field))? (img->current_mb_nr%2) ? (img->block_y-4)/2:img->block_y/2: img->block_y; + int j6= imgblock_y + j; + + int refList = (co_located_ref_idx[LIST_0][j6][i4]== -1 ? LIST_1 : LIST_0); + int ref_idx = co_located_ref_idx[refList][j6][i4]; + + + if(ref_idx==-1) // co-located is intra mode + { + for(hv=0; hv<2; hv++) + { + dec_picture->mv [LIST_0][j4][i4][hv]=0; + dec_picture->mv [LIST_1][j4][i4][hv]=0; + } + + dec_picture->ref_idx[LIST_0][j4][i4] = 0; + dec_picture->ref_idx[LIST_1][j4][i4] = 0; + + fw_refframe = 0; + fw_ref_idx = 0; + } + else // co-located skip or inter mode + { + int mapped_idx=0; + int iref; + + { + for (iref=0;irefnum_ref_idx_l0_active,listXsize[LIST_0 + list_offset]);iref++) + { + #if 1 + if(img->structure==0 && curr_mb_field==0) + { + // If the current MB is a frame MB and the colocated is from a field picture, + // then the co_located_ref_id may have been generated from the wrong value of + // frame_poc if it references it's complementary field, so test both POC values + if(listX[0][iref]->top_poc*2 == co_located_ref_id[refList][j6][i4] || listX[0][iref]->bottom_poc*2 == co_located_ref_id[refList][j6][i4]) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + mapped_idx=INVALIDINDEX; + continue; + } + #endif + if (dec_picture->ref_pic_num[img->current_slice_nr][LIST_0 + list_offset][iref]==co_located_ref_id[refList][j6][i4]) + { + mapped_idx=iref; + break; + } + else //! invalid index. Default to zero even though this case should not happen + { + mapped_idx=INVALIDINDEX; + } + } + if (INVALIDINDEX == mapped_idx) + { + error("temporal direct error\ncolocated block has ref that is unavailable",-1111); + } + } + + fw_ref_idx = mapped_idx; + mv_scale = img->mvscale[LIST_0 + list_offset][mapped_idx]; + + //! In such case, an array is needed for each different reference. + if (mv_scale == 9999 || listX[LIST_0+list_offset][mapped_idx]->is_long_term) + { + dec_picture->mv [LIST_0][j4][i4][0]=co_located_mv[refList][j6][i4][0]; + dec_picture->mv [LIST_0][j4][i4][1]=co_located_mv[refList][j6][i4][1]; + + dec_picture->mv [LIST_1][j4][i4][0]=0; + dec_picture->mv [LIST_1][j4][i4][1]=0; + } + else + { + dec_picture->mv [LIST_0][j4][i4][0]=(mv_scale * co_located_mv[refList][j6][i4][0] + 128 ) >> 8; + dec_picture->mv [LIST_0][j4][i4][1]=(mv_scale * co_located_mv[refList][j6][i4][1] + 128 ) >> 8; + + dec_picture->mv [LIST_1][j4][i4][0]=dec_picture->mv [LIST_0][j4][i4][0] - co_located_mv[refList][j6][i4][0] ; + dec_picture->mv [LIST_1][j4][i4][1]=dec_picture->mv [LIST_0][j4][i4][1] - co_located_mv[refList][j6][i4][1] ; + } + + fw_refframe = dec_picture->ref_idx[LIST_0][j4][i4] = mapped_idx; //listX[1][0]->ref_idx[refList][j4][i4]; + bw_refframe = dec_picture->ref_idx[LIST_1][j4][i4] = 0; + + fw_ref_idx = fw_refframe; + bw_ref_idx = bw_refframe; + } + } + // store reference picture ID determined by direct mode + dec_picture->ref_pic_id[LIST_0][j4][i4] = dec_picture->ref_pic_num[img->current_slice_nr][LIST_0 + list_offset][(short)dec_picture->ref_idx[LIST_0][j4][i4]]; + dec_picture->ref_pic_id[LIST_1][j4][i4] = dec_picture->ref_pic_num[img->current_slice_nr][LIST_1 + list_offset][(short)dec_picture->ref_idx[LIST_1][j4][i4]]; + } + + if (mv_mode==0 && img->direct_spatial_mv_pred_flag ) + { + if (dec_picture->ref_idx[LIST_0][j4][i4] >= 0) + { + + vec1_x = i4*4*mv_mul + fw_mv_array[j4][i4][0]; + if (!curr_mb_field) + { + vec1_y = j4*4*mv_mul + fw_mv_array[j4][i4][1]; + } + else + { + if (mb_nr%2 == 0) + { + vec1_y = (img->block_y * 2 + joff) * mv_mul + fw_mv_array[j4][i4][1]; + } + else + { + vec1_y = ((img->block_y-4) * 2 + joff)* mv_mul + fw_mv_array[j4][i4][1]; + } + } + get_block(fw_refframe, listX[0+list_offset], vec1_x, vec1_y, img, tmp_block); + } + + if (dec_picture->ref_idx[LIST_1][j4][i4] >= 0) + { + vec2_x = i4*4*mv_mul + bw_mv_array[j4][i4][0]; + if (!curr_mb_field) + { + vec2_y = j4*4*mv_mul + bw_mv_array[j4][i4][1]; + } + else + { + if (mb_nr%2 == 0) + { + vec2_y = (img->block_y * 2 + joff) * mv_mul + bw_mv_array[j4][i4][1]; + } + else + { + vec2_y = ((img->block_y-4) * 2 + joff)* mv_mul + bw_mv_array[j4][i4][1]; + } + } + get_block(bw_refframe, listX[1+list_offset], vec2_x, vec2_y, img, tmp_blockbw); + } + } + else + { + vec1_x = i4*4*mv_mul + fw_mv_array[j4][i4][0]; + vec2_x = i4*4*mv_mul + bw_mv_array[j4][i4][0]; + + if (!curr_mb_field) + { + vec1_y = j4*4*mv_mul + fw_mv_array[j4][i4][1]; + vec2_y = j4*4*mv_mul + bw_mv_array[j4][i4][1]; + } + else + { + if (mb_nr%2 == 0) + { + vec1_y = (img->block_y * 2 + joff) * mv_mul + fw_mv_array[j4][i4][1]; + vec2_y = (img->block_y * 2 + joff) * mv_mul + bw_mv_array[j4][i4][1]; + } + else + { + vec1_y = ((img->block_y-4) * 2 + joff)* mv_mul + fw_mv_array[j4][i4][1]; + vec2_y = ((img->block_y-4) * 2 + joff)* mv_mul + bw_mv_array[j4][i4][1]; + } + } + + get_block(fw_refframe, listX[0+list_offset], vec1_x, vec1_y, img, tmp_block); + get_block(bw_refframe, listX[1+list_offset], vec2_x, vec2_y, img, tmp_blockbw); + } + + if (mv_mode==0 && img->direct_spatial_mv_pred_flag && direct_pdir==0) + { + if (img->apply_weights) + { + if (((active_pps->weighted_pred_flag&&(img->type==P_SLICE|| img->type == SP_SLICE))|| + (active_pps->weighted_bipred_idc==1 && (img->type==B_SLICE))) && curr_mb_field) + { + fw_ref_idx >>=1; + } + for(ii=0;iimpr[ii+ioff][jj+joff] = Clip1(((tmp_block[ii][jj] * img->wp_weight[0][fw_ref_idx][0] + + img->wp_round_luma)>>img->luma_log2_weight_denom) + + img->wp_offset[0][fw_refframe>>curr_mb_field][0]); + } + else + { + for(ii=0;iimpr[ii+ioff][jj+joff] = tmp_block[ii][jj]; + } + } + else if (mv_mode==0 && img->direct_spatial_mv_pred_flag && direct_pdir==1) + { + if (img->apply_weights) + { + if (((active_pps->weighted_pred_flag&&(img->type==P_SLICE|| img->type == SP_SLICE))|| + (active_pps->weighted_bipred_idc==1 && (img->type==B_SLICE))) && curr_mb_field) + { + fw_ref_idx >>=1; + bw_ref_idx >>=1; + } + + for(ii=0;iimpr[ii+ioff][jj+joff] = Clip1(((tmp_blockbw[ii][jj] * img->wp_weight[1][bw_ref_idx][0] + + img->wp_round_luma)>>img->luma_log2_weight_denom) + + img->wp_offset[1][bw_refframe>>curr_mb_field][0]); + } + else + { + for(ii=0;iimpr[ii+ioff][jj+joff] = tmp_blockbw[ii][jj]; + } + } + else if(img->apply_weights) + { + int alpha_fw, alpha_bw; + int wt_list_offset = (active_pps->weighted_bipred_idc==2)?list_offset:0; + + if (mv_mode==0 && img->direct_spatial_mv_pred_flag==0 )bw_ref_idx=0; //temporal direct + + if (((active_pps->weighted_pred_flag&&(img->type==P_SLICE|| img->type == SP_SLICE))|| + (active_pps->weighted_bipred_idc==1 && (img->type==B_SLICE))) && curr_mb_field) + { + fw_ref_idx >>=1; + bw_ref_idx >>=1; + } + + alpha_fw = img->wbp_weight[0+wt_list_offset][fw_ref_idx][bw_ref_idx][0]; + alpha_bw = img->wbp_weight[1+wt_list_offset][fw_ref_idx][bw_ref_idx][0]; + + for(ii=0;iimpr[ii+ioff][jj+joff] = (int)Clip1(((alpha_fw * tmp_block[ii][jj] + alpha_bw * tmp_blockbw[ii][jj] + + (1<luma_log2_weight_denom)) >> (img->luma_log2_weight_denom+1)) + + ((img->wp_offset[wt_list_offset+0][fw_ref_idx][0] + img->wp_offset[wt_list_offset+1][bw_ref_idx][0] + 1) >>1)); + } + else + { + for(ii=0;iimpr[ii+ioff][jj+joff] = (tmp_block[ii][jj]+tmp_blockbw[ii][jj]+1)/2; + } + } + } + + // =============== 4x4 itrans ================ + // ------------------------------------------- + if (smb && mv_mode!=IBLOCK) + { + itrans_sp(img,ioff,joff,i,j); + } + else + { + if(need_4x4_transform) + itrans (img,ioff,joff,i,j, 0); // use DCT transform and make 4x4 block m7 from prediction block mpr + } + if(need_4x4_transform) + { + for(ii=0;iiimgY[j4*BLOCK_SIZE+jj][i4*BLOCK_SIZE+ii]=img->m7[ii][jj]; // construct picture from 4x4 blocks + } + else + { + mprRGB[0][ii+ioff][jj+joff] = img->mpr[ii+ioff][jj+joff]; + rec_res[0][ii+ioff][jj+joff] = img->m7[ii][jj]; + } + } + } + }// if(need_4x4_transform) + } + + if(!need_4x4_transform) + { + // =============== 8x8 itrans ================ + // ------------------------------------------- + ioff = 8*(block8x8%2); + joff = 8*(block8x8/2); + + itrans8x8(img,ioff,joff); // use DCT transform and make 8x8 block m7 from prediction block mpr + + for(ii=0;ii<8;ii++) + { + for(jj=0;jj<8;jj++) + { + // Residue Color Transform + if(!residue_transform_flag) + { + dec_picture->imgY[img->pix_y + joff + jj][img->pix_x + ioff + ii]=img->m7[ii + ioff][jj + joff]; // construct picture from 4x4 blocks + } + else + { + mprRGB[0][ii+ioff][jj+joff] = img->mpr[ii+ioff][jj+joff]; + rec_res[0][ii+ioff][jj+joff] = img->m7[ii + ioff][jj + joff]; + } + } + } + } + } + + if (dec_picture->chroma_format_idc != YUV400) + { + // chroma decoding ******************************************************* + f1_x = 64/img->mb_cr_size_x; + f2_x=f1_x-1; + + f1_y = 64/img->mb_cr_size_y; + f2_y=f1_y-1; + + f3=f1_x*f1_y; + f4=f3>>1; + + for(uv=0;uv<2;uv++) + { + uv_shift = uv*(img->num_blk8x8_uv/2); + intra_prediction = IS_INTRA (currMB); + + if (intra_prediction) + { + intrapred_chroma(img, uv); + } + + for (b8=0;b8<(img->num_blk8x8_uv/2);b8++) + { + for(b4=0;b4<4;b4++) + { + joff = subblk_offset_y[yuv][b8][b4]; + j4=img->pix_c_y+joff; + ioff = subblk_offset_x[yuv][b8][b4]; + i4=img->pix_c_x+ioff; + + mv_mode = currMB->b8mode[block8x8_idx[yuv][b8][b4]]; + pred_dir = currMB->b8pdir[block8x8_idx[yuv][b8][b4]]; + assert (pred_dir<=2); + + + if (!intra_prediction) + { + if (pred_dir != 2) + { + //--- FORWARD/BACKWARD PREDICTION --- + mv_array = dec_picture->mv[LIST_0 + pred_dir]; + list = listX[0+list_offset+pred_dir]; + for(jj=0;jj<4;jj++) + { + jf=(j4+jj)/(img->mb_cr_size_y/4); // jf = Subblock_y-coordinate + for(ii=0;ii<4;ii++) + { + ifx=(i4+ii)/(img->mb_cr_size_x/4); // ifx = Subblock_x-coordinate + fw_refframe = ref_idx = dec_picture->ref_idx[LIST_0+pred_dir][jf][ifx]; + i1=(i4+ii)*f1_x+mv_array[jf][ifx][0]; + + if (!curr_mb_field) + j1=(j4+jj)*f1_y+mv_array[jf][ifx][1]; + else + { + if (mb_nr%2 == 0) + j1=((img->pix_c_y/2) + jj + joff)*f1_y + mv_array[jf][ifx][1]; + else + j1=((img->pix_c_y-img->mb_cr_size_y)/2 + jj + joff)*f1_y + mv_array[jf][ifx][1]; + } + + if (active_sps->chroma_format_idc == 1) + j1 += list[ref_idx]->chroma_vector_adjustment; + + ii0=max (0, min (i1/f1_x, img->width_cr-1)); + jj0=max (0, min (j1/f1_y, max_y_cr)); + ii1=max (0, min ((i1+f2_x)/f1_x, img->width_cr-1)); + jj1=max (0, min ((j1+f2_y)/f1_y, max_y_cr)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + if (img->apply_weights) + { + pred = (if0*jf0*list[ref_idx]->imgUV[uv][jj0][ii0]+ + if1*jf0*list[ref_idx]->imgUV[uv][jj0][ii1]+ + if0*jf1*list[ref_idx]->imgUV[uv][jj1][ii0]+ + if1*jf1*list[ref_idx]->imgUV[uv][jj1][ii1]+f4)/f3; + if (((active_pps->weighted_pred_flag&&(img->type==P_SLICE|| img->type == SP_SLICE))|| + (active_pps->weighted_bipred_idc==1 && (img->type==B_SLICE))) && curr_mb_field) + { + ref_idx >>=1; + } + + img->mpr[ii+ioff][jj+joff] = Clip1_Chr(((img->wp_weight[pred_dir][ref_idx][uv+1] * pred + img->wp_round_chroma)>>img->chroma_log2_weight_denom) + img->wp_offset[pred_dir][ref_idx][uv+1]); + } + else + { + img->mpr[ii+ioff][jj+joff]=(if0*jf0*list[ref_idx]->imgUV[uv][jj0][ii0]+ + if1*jf0*list[ref_idx]->imgUV[uv][jj0][ii1]+ + if0*jf1*list[ref_idx]->imgUV[uv][jj1][ii0]+ + if1*jf1*list[ref_idx]->imgUV[uv][jj1][ii1]+f4)/f3; + } + } + } + } + else + { + fw_mv_array = dec_picture->mv[LIST_0]; + bw_mv_array = dec_picture->mv[LIST_1]; + + for(jj=0;jj<4;jj++) + { + jf=(j4+jj)/(img->mb_cr_size_y/4); // jf = Subblock_y-coordinate + for(ii=0;ii<4;ii++) + { + ifx=(i4+ii)/(img->mb_cr_size_x/4); // ifx = Subblock_x-coordinate + direct_pdir = 2; + + if (mv_mode == 0 && img->direct_spatial_mv_pred_flag) + { + //===== DIRECT PREDICTION ===== + if (dec_picture->ref_idx[LIST_0][2*(jf/2)][(ifx/2)*2]!=-1) + { + fw_refframe = dec_picture->ref_idx[LIST_0][2*(jf/2)][(ifx/2)*2]; + fw_ref_idx = fw_refframe; + } + if (dec_picture->ref_idx[LIST_1][2*(jf/2)][(ifx/2)*2]!=-1) + { + bw_refframe = dec_picture->ref_idx[LIST_1][2*(jf/2)][(ifx/2)*2]; + bw_ref_idx = bw_refframe; + } + + if (dec_picture->ref_idx[LIST_1][2*(jf/2)][(ifx/2)*2]==-1) direct_pdir = 0; + else if (dec_picture->ref_idx[LIST_0][2*(jf/2)][(ifx/2)*2]==-1) direct_pdir = 1; + + if (direct_pdir == 0 || direct_pdir == 2) + { + i1=(img->pix_c_x+ii+ioff)*f1_x+fw_mv_array[jf][ifx][0]; + + if (!curr_mb_field) + { + j1=(img->pix_c_y+jj+joff)*f1_y+fw_mv_array[jf][ifx][1]; + } + else + { + if (mb_nr%2 == 0) + j1=((img->pix_c_y)/2 + jj + joff)*f1_y + fw_mv_array[jf][ifx][1]; + else + j1=((img->pix_c_y-img->mb_cr_size_y)/2 + jj + joff)*f1_y + fw_mv_array[jf][ifx][1]; + } + + if (active_sps->chroma_format_idc == 1) + j1 += listX[0+list_offset][fw_refframe]->chroma_vector_adjustment; + + ii0=max (0, min (i1/f1_x, img->width_cr-1)); + jj0=max (0, min (j1/f1_y, max_y_cr)); + ii1=max (0, min ((i1+f2_x)/f1_x, img->width_cr-1)); + jj1=max (0, min ((j1+f2_y)/f1_y, max_y_cr)); + + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + fw_pred=(if0*jf0*listX[0+list_offset][fw_refframe]->imgUV[uv][jj0][ii0]+ + if1*jf0*listX[0+list_offset][fw_refframe]->imgUV[uv][jj0][ii1]+ + if0*jf1*listX[0+list_offset][fw_refframe]->imgUV[uv][jj1][ii0]+ + if1*jf1*listX[0+list_offset][fw_refframe]->imgUV[uv][jj1][ii1]+f4)/f3; + } + if (direct_pdir == 1 || direct_pdir == 2) + { + i1=(img->pix_c_x+ii+ioff)*f1_x+bw_mv_array[jf][ifx][0]; + + if (!curr_mb_field) + { + j1=(img->pix_c_y+jj+joff)*f1_y+bw_mv_array[jf][ifx][1]; + } + else + { + if (mb_nr%2 == 0) + j1=((img->pix_c_y)/2 + jj + joff)*f1_y + bw_mv_array[jf][ifx][1]; + else + j1=((img->pix_c_y-img->mb_cr_size_y)/2 + jj + joff)*f1_y + bw_mv_array[jf][ifx][1]; + } + if (active_sps->chroma_format_idc == 1) + j1 += listX[1+list_offset][bw_refframe]->chroma_vector_adjustment; + + ii0=max (0, min (i1/f1_x, img->width_cr-1)); + jj0=max (0, min (j1/f1_y, max_y_cr)); + ii1=max (0, min ((i1+f2_x)/f1_x, img->width_cr-1)); + jj1=max (0, min ((j1+f2_y)/f1_y, max_y_cr)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + bw_pred=(if0*jf0*listX[1+list_offset][bw_refframe]->imgUV[uv][jj0][ii0]+ + if1*jf0*listX[1+list_offset][bw_refframe]->imgUV[uv][jj0][ii1]+ + if0*jf1*listX[1+list_offset][bw_refframe]->imgUV[uv][jj1][ii0]+ + if1*jf1*listX[1+list_offset][bw_refframe]->imgUV[uv][jj1][ii1]+f4)/f3; + } + + } + else + { + //===== BI-DIRECTIONAL PREDICTION ===== + fw_refframe = dec_picture->ref_idx[LIST_0][jf][ifx]; + bw_refframe = dec_picture->ref_idx[LIST_1][jf][ifx]; + + fw_ref_idx = fw_refframe; + bw_ref_idx = bw_refframe; + + i1=(img->pix_c_x+ii+ioff)*f1_x+fw_mv_array[jf][ifx][0]; + + if (!curr_mb_field) + { + j1=(img->pix_c_y+jj+joff)*f1_y+fw_mv_array[jf][ifx][1]; + } + else + { + if (mb_nr%2 == 0) + j1=((img->pix_c_y)/2 + jj + joff)*f1_y + fw_mv_array[jf][ifx][1]; + else + j1=((img->pix_c_y-img->mb_cr_size_y)/2 + jj + joff)*f1_y + fw_mv_array[jf][ifx][1]; + } + + if (active_sps->chroma_format_idc == 1) + j1 += listX[0+list_offset][fw_refframe]->chroma_vector_adjustment; + + ii0=max (0, min (i1/f1_x, img->width_cr-1)); + jj0=max (0, min (j1/f1_y, max_y_cr)); + ii1=max (0, min ((i1+f2_x)/f1_x, img->width_cr-1)); + jj1=max (0, min ((j1+f2_y)/f1_y, max_y_cr)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + fw_pred=(if0*jf0*listX[0+list_offset][fw_refframe]->imgUV[uv][jj0][ii0]+ + if1*jf0*listX[0+list_offset][fw_refframe]->imgUV[uv][jj0][ii1]+ + if0*jf1*listX[0+list_offset][fw_refframe]->imgUV[uv][jj1][ii0]+ + if1*jf1*listX[0+list_offset][fw_refframe]->imgUV[uv][jj1][ii1]+f4)/f3; + + i1=(img->pix_c_x+ii+ioff)*f1_x+bw_mv_array[jf][ifx][0]; + + if (!curr_mb_field) + { + j1=(img->pix_c_y+jj+joff)*f1_y+bw_mv_array[jf][ifx][1]; + } + else + { + if (mb_nr%2 == 0) + j1=((img->pix_c_y)/2 + jj + joff)*f1_y + bw_mv_array[jf][ifx][1]; + else + j1=((img->pix_c_y-img->mb_cr_size_y)/2 + jj + joff)*f1_y + bw_mv_array[jf][ifx][1]; + } + + if (active_sps->chroma_format_idc == 1) + j1 += listX[1+list_offset][bw_refframe]->chroma_vector_adjustment; + + ii0=max (0, min (i1/f1_x, img->width_cr-1)); + jj0=max (0, min (j1/f1_y, max_y_cr)); + ii1=max (0, min ((i1+f2_x)/f1_x, img->width_cr-1)); + jj1=max (0, min ((j1+f2_y)/f1_y, max_y_cr)); + + if1=(i1 & f2_x); + jf1=(j1 & f2_y); + if0=f1_x-if1; + jf0=f1_y-jf1; + + bw_pred=(if0*jf0*listX[1+list_offset][bw_refframe]->imgUV[uv][jj0][ii0]+ + if1*jf0*listX[1+list_offset][bw_refframe]->imgUV[uv][jj0][ii1]+ + if0*jf1*listX[1+list_offset][bw_refframe]->imgUV[uv][jj1][ii0]+ + if1*jf1*listX[1+list_offset][bw_refframe]->imgUV[uv][jj1][ii1]+f4)/f3; + + } + + if (img->apply_weights) + { + if (((active_pps->weighted_pred_flag&&(img->type==P_SLICE|| img->type == SP_SLICE))|| + (active_pps->weighted_bipred_idc==1 && (img->type==B_SLICE))) && curr_mb_field) + { + fw_ref_idx >>=1; + bw_ref_idx >>=1; + } + + if (img->direct_spatial_mv_pred_flag && direct_pdir==1) + { + img->mpr[ii+ioff][jj+joff]= Clip1_Chr(((img->wp_weight[1][bw_ref_idx][uv+1] * bw_pred + img->wp_round_chroma)>>img->chroma_log2_weight_denom) + img->wp_offset[1][bw_refframe>>curr_mb_field][uv+1]); // Replaced with integer only operations + } + else if (img->direct_spatial_mv_pred_flag && direct_pdir==0) + { + img->mpr[ii+ioff][jj+joff]=Clip1_Chr(((img->wp_weight[0][fw_ref_idx][uv+1] * fw_pred + img->wp_round_chroma)>>img->chroma_log2_weight_denom) + img->wp_offset[0][fw_refframe>>curr_mb_field][uv+1]); // Replaced with integer only operations + } + else + { + int wt_list_offset = (active_pps->weighted_bipred_idc==2)?list_offset:0; + + int alpha_fw = img->wbp_weight[0+wt_list_offset][fw_ref_idx][bw_ref_idx][uv+1]; + int alpha_bw = img->wbp_weight[1+wt_list_offset][fw_ref_idx][bw_ref_idx][uv+1]; + + img->mpr[ii+ioff][jj+joff]= Clip1_Chr(((alpha_fw * fw_pred + alpha_bw * bw_pred + (1<chroma_log2_weight_denom)) >> (img->chroma_log2_weight_denom + 1))+ ((img->wp_offset[wt_list_offset + 0][fw_ref_idx][uv+1] + img->wp_offset[wt_list_offset + 1][bw_ref_idx][uv+1] + 1)>>1) ); + } + } + else + { + if (img->direct_spatial_mv_pred_flag && direct_pdir==1) + { + img->mpr[ii+ioff][jj+joff]=bw_pred; + } + else if (img->direct_spatial_mv_pred_flag && direct_pdir==0) + { + img->mpr[ii+ioff][jj+joff]=fw_pred; + } + else + { + img->mpr[ii+ioff][jj+joff]=(fw_pred + bw_pred + 1 )/2; + } + } + } + } + } + } //if (!intra_prediction) + + if (!smb) + { + itrans(img,ioff,joff, cofuv_blk_x[yuv][b8+uv_shift][b4], cofuv_blk_y[yuv][b8+uv_shift][b4], 1); + for(ii=0;ii<4;ii++) + for(jj=0;jj<4;jj++) + { + // Residue Color Transform + if(!residue_transform_flag) + { + dec_picture->imgUV[uv][j4+jj][i4+ii]=img->m7[ii][jj]; + } else + { + mprRGB[uv+1][ii+ioff][jj+joff] = img->mpr[ii+ioff][jj+joff]; + rec_res[uv+1][ii+ioff][jj+joff] = img->m7[ii][jj]; + } + } + } + } + } + + if(smb) + { + itrans_sp_chroma(img,2*uv); + for (j=4;j<6;j++) + { + joff=(j-4)*4; + j4=img->pix_c_y+joff; + for(i=0;i<2;i++) + { + ioff=i*4; + i4=img->pix_c_x+ioff; + itrans(img,ioff,joff,2*uv+i,j, 1); + + for(ii=0;ii<4;ii++) + for(jj=0;jj<4;jj++) + { + dec_picture->imgUV[uv][j4+jj][i4+ii]=img->m7[ii][jj]; + } + } + } + } + } + } + + // Residue Color Transform + if(residue_transform_flag) + { + if(currMB->mb_type != I8MB) + { + for(k=0;k<16;k++) + { + + i = (decode_block_scan[k] & 3); + j = ((decode_block_scan[k] >> 2) & 3); + + ioff=i*4; + i4=img->block_x+i; + + joff=j*4; + j4=img->block_y+j; + + mv_mode = currMB->b8mode[2*(j/2)+(i/2)]; + pred_dir = currMB->b8pdir[2*(j/2)+(i/2)]; + + assert (pred_dir<=2); + + // PREDICTION + if (mv_mode==IBLOCK) + { + //===== INTRA PREDICTION ===== + if (intrapred(img,ioff,joff,i4,j4)==SEARCH_SYNC) /* make 4x4 prediction block mpr from given prediction img->mb_mode */ + return SEARCH_SYNC; /* bit error */ + + for(ii=0;ii<4;ii++) + for(jj=0;jj<4;jj++) + { + mprRGB[0][ii+ioff][jj+joff] = img->mpr[ii+ioff][jj+joff]; + } + } + + for(jj=0;jj>1); + residue_G = rec_res[1][ii+ioff][jj+joff]+temp; + residue_B = temp - (rec_res[2][ii+ioff][jj+joff]>>1); + residue_R = residue_B+rec_res[2][ii+ioff][jj+joff]; + + dec_picture->imgUV[0][j4*BLOCK_SIZE+jj][i4*BLOCK_SIZE+ii] = min(img->max_imgpel_value_uv,max(0,residue_B+mprRGB[1][ii+ioff][jj+joff])); + dec_picture->imgY[j4*BLOCK_SIZE+jj][i4*BLOCK_SIZE+ii] = min(img->max_imgpel_value,max(0,residue_G+mprRGB[0][ii+ioff][jj+joff])); + dec_picture->imgUV[1][j4*BLOCK_SIZE+jj][i4*BLOCK_SIZE+ii] = min(img->max_imgpel_value_uv,max(0,residue_R+mprRGB[2][ii+ioff][jj+joff])); + } + } + }// for(k=0;k<16;k++) + } + else // currMB->b8mode[block8x8] == I8MB + { + for(block8x8=0; block8x8<4; block8x8++) + { + + //=========== 8x8 BLOCK TYPE ============ + ioff = 8*(block8x8%2); + joff = 8*(block8x8/2); + + //PREDICTION + intrapred8x8(img, block8x8); + for(ii=0;ii<8;ii++) + for(jj=0;jj<8;jj++) + mprRGB[0][ii+ioff][jj+joff] = img->mpr[ii+ioff][jj+joff]; + + for(jj=0;jj<8;jj++) + for(ii=0;ii<8;ii++) + { + /* Inverse Residue Transform */ + temp = rec_res[0][ii+ioff][jj+joff]-(rec_res[1][ii+ioff][jj+joff]>>1); + residue_G = rec_res[1][ii+ioff][jj+joff]+temp; + residue_B = temp - (rec_res[2][ii+ioff][jj+joff]>>1); + residue_R = residue_B+rec_res[2][ii+ioff][jj+joff]; + + dec_picture->imgUV[0][img->pix_y+joff+jj][img->pix_x+ioff+ii] = min(img->max_imgpel_value_uv,max(0,residue_B+mprRGB[1][ii+ioff][jj+joff])); + dec_picture->imgY[img->pix_y+joff+jj][img->pix_x+ioff+ii] = min(img->max_imgpel_value,max(0,residue_G+mprRGB[0][ii+ioff][jj+joff])); + dec_picture->imgUV[1][img->pix_y+joff+jj][img->pix_x+ioff+ii] = min(img->max_imgpel_value_uv,max(0,residue_R+mprRGB[2][ii+ioff][jj+joff])); + } + } + } + } + + return 0; + } Index: llvm-test/MultiSource/Applications/JM/ldecod/macroblock.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/macroblock.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/macroblock.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,338 ---- + + /*! + ************************************************************************ + * \file macroblock.h + * + * \brief + * Arrays for macroblock encoding + * + * \author + * Inge Lille-Langoy + * Copyright (C) 1999 Telenor Satellite Services, Norway + ************************************************************************ + */ + + #ifndef _MACROBLOCK_H_ + #define _MACROBLOCK_H_ + + + //! single scan pattern + const byte SNGL_SCAN[16][2] = + { + {0,0},{1,0},{0,1},{0,2}, + {1,1},{2,0},{3,0},{2,1}, + {1,2},{0,3},{1,3},{2,2}, + {3,1},{3,2},{2,3},{3,3} + }; + + //! field scan pattern + const byte FIELD_SCAN[16][2] = + { + {0,0},{0,1},{1,0},{0,2}, + {0,3},{1,1},{1,2},{1,3}, + {2,0},{2,1},{2,2},{2,3}, + {3,0},{3,1},{3,2},{3,3} + }; + + + //! gives CBP value from codeword number, both for intra and inter + const unsigned char NCBP[2][48][2]= + { + { // 0 1 2 3 4 5 6 7 8 9 10 11 + {15, 0},{ 0, 1},{ 7, 2},{11, 4},{13, 8},{14, 3},{ 3, 5},{ 5,10},{10,12},{12,15},{ 1, 7},{ 2,11}, + { 4,13},{ 8,14},{ 6, 6},{ 9, 9},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0}, + { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0}, + { 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0},{ 0, 0} + }, + { + {47, 0},{31,16},{15, 1},{ 0, 2},{23, 4},{27, 8},{29,32},{30, 3},{ 7, 5},{11,10},{13,12},{14,15}, + {39,47},{43, 7},{45,11},{46,13},{16,14},{ 3, 6},{ 5, 9},{10,31},{12,35},{19,37},{21,42},{26,44}, + {28,33},{35,34},{37,36},{42,40},{44,39},{ 1,43},{ 2,45},{ 4,46},{ 8,17},{17,18},{18,20},{20,24}, + {24,19},{ 6,21},{ 9,26},{22,28},{25,23},{32,27},{33,29},{34,30},{36,22},{40,25},{38,38},{41,41} + } + }; + + + //! used to control block sizes : Not used/16x16/16x8/8x16/8x8/8x4/4x8/4x4 + const int BLOCK_STEP[8][2]= + { + {0,0},{4,4},{4,2},{2,4},{2,2},{2,1},{1,2},{1,1} + }; + + //! Dequantization coefficients + const int dequant_coef[6][4][4] = { + {{10, 13, 10, 13},{ 13, 16, 13, 16},{10, 13, 10, 13},{ 13, 16, 13, 16}}, + {{11, 14, 11, 14},{ 14, 18, 14, 18},{11, 14, 11, 14},{ 14, 18, 14, 18}}, + {{13, 16, 13, 16},{ 16, 20, 16, 20},{13, 16, 13, 16},{ 16, 20, 16, 20}}, + {{14, 18, 14, 18},{ 18, 23, 18, 23},{14, 18, 14, 18},{ 18, 23, 18, 23}}, + {{16, 20, 16, 20},{ 20, 25, 20, 25},{16, 20, 16, 20},{ 20, 25, 20, 25}}, + {{18, 23, 18, 23},{ 23, 29, 23, 29},{18, 23, 18, 23},{ 23, 29, 23, 29}} + }; + + const byte QP_SCALE_CR[52]= + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, + 12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27, + 28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37, + 37,38,38,38,39,39,39,39 + + }; + + //! single scan pattern + const byte SNGL_SCAN8x8[64][2] = { + {0,0}, {1,0}, {0,1}, {0,2}, {1,1}, {2,0}, {3,0}, {2,1}, {1,2}, {0,3}, {0,4}, {1,3}, {2,2}, {3,1}, {4,0}, {5,0}, + {4,1}, {3,2}, {2,3}, {1,4}, {0,5}, {0,6}, {1,5}, {2,4}, {3,3}, {4,2}, {5,1}, {6,0}, {7,0}, {6,1}, {5,2}, {4,3}, + {3,4}, {2,5}, {1,6}, {0,7}, {1,7}, {2,6}, {3,5}, {4,4}, {5,3}, {6,2}, {7,1}, {7,2}, {6,3}, {5,4}, {4,5}, {3,6}, + {2,7}, {3,7}, {4,6}, {5,5}, {6,4}, {7,3}, {7,4}, {6,5}, {5,6}, {4,7}, {5,7}, {6,6}, {7,5}, {7,6}, {6,7}, {7,7} + }; + + /* + //! field scan pattern + const byte FIELD_SCAN8x8[64][2] = { + {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2}, {2,0}, {2,1}, {1,3}, {0,5}, {0,6}, {1,4}, {2,2}, {3,0}, + {3,1}, {2,3}, {1,5}, {0,7}, {1,6}, {2,4}, {3,2}, {4,0}, {4,1}, {3,3}, {2,5}, {1,7}, {2,6}, {3,4}, {4,2}, {5,0}, + {5,1}, {4,3}, {3,5}, {2,7}, {3,6}, {4,4}, {5,2}, {6,0}, {6,1}, {5,3}, {4,5}, {3,7}, {4,6}, {5,4}, {6,2}, {7,0}, + {7,1}, {6,3}, {5,5}, {4,7}, {5,6}, {6,4}, {7,2}, {7,3}, {6,5}, {5,7}, {6,6}, {7,4}, {7,5}, {6,7}, {7,6}, {7,7} + }; + */ + + //! field scan pattern + //original from ABT + const byte FIELD_SCAN8x8[64][2] = { // 8x8 + {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2}, {2,0}, {1,3}, {0,5}, {0,6}, {0,7}, {1,4}, {2,1}, {3,0}, + {2,2}, {1,5}, {1,6}, {1,7}, {2,3}, {3,1}, {4,0}, {3,2}, {2,4}, {2,5}, {2,6}, {2,7}, {3,3}, {4,1}, {5,0}, {4,2}, + {3,4}, {3,5}, {3,6}, {3,7}, {4,3}, {5,1}, {6,0}, {5,2}, {4,4}, {4,5}, {4,6}, {4,7}, {5,3}, {6,1}, {6,2}, {5,4}, + {5,5}, {5,6}, {5,7}, {6,3}, {7,0}, {7,1}, {6,4}, {6,5}, {6,6}, {6,7}, {7,2}, {7,3}, {7,4}, {7,5}, {7,6}, {7,7} + }; + + + static const int dequant_coef8[6][8][8] = + { + { + {20, 19, 25, 19, 20, 19, 25, 19}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {25, 24, 32, 24, 25, 24, 32, 24}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {20, 19, 25, 19, 20, 19, 25, 19}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {25, 24, 32, 24, 25, 24, 32, 24}, + {19, 18, 24, 18, 19, 18, 24, 18} + }, + { + {22, 21, 28, 21, 22, 21, 28, 21}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {22, 21, 28, 21, 22, 21, 28, 21}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {21, 19, 26, 19, 21, 19, 26, 19} + }, + { + {26, 24, 33, 24, 26, 24, 33, 24}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {33, 31, 42, 31, 33, 31, 42, 31}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {26, 24, 33, 24, 26, 24, 33, 24}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {33, 31, 42, 31, 33, 31, 42, 31}, + {24, 23, 31, 23, 24, 23, 31, 23} + }, + { + {28, 26, 35, 26, 28, 26, 35, 26}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {35, 33, 45, 33, 35, 33, 45, 33}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {35, 33, 45, 33, 35, 33, 45, 33}, + {26, 25, 33, 25, 26, 25, 33, 25} + }, + { + {32, 30, 40, 30, 32, 30, 40, 30}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {40, 38, 51, 38, 40, 38, 51, 38}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {32, 30, 40, 30, 32, 30, 40, 30}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {40, 38, 51, 38, 40, 38, 51, 38}, + {30, 28, 38, 28, 30, 28, 38, 28} + }, + { + {36, 34, 46, 34, 36, 34, 46, 34}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {46, 43, 58, 43, 46, 43, 58, 43}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {36, 34, 46, 34, 36, 34, 46, 34}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {46, 43, 58, 43, 46, 43, 58, 43}, + {34, 32, 43, 32, 34, 32, 43, 32} + } + + }; + + //ADD-VG-13052004 + + //! single scan pattern + const byte SCAN_YUV422[8][2] = + { + {0,0},{0,1}, + {1,0},{0,2}, + {0,3},{1,1}, + {1,2},{1,3} + }; + + //! look up tables for FRExt_chroma support + const unsigned char subblk_offset_x[3][8][4] = + { + { {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, }, + + { {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, }, + + { {0, 4, 0, 4}, + {8,12, 8,12}, + {0, 4, 0, 4}, + {8,12, 8,12}, + {0, 4, 0, 4}, + {8,12, 8,12}, + {0, 4, 0, 4}, + {8,12, 8,12} } + }; + + const unsigned char subblk_offset_y[3][8][4] = + { { {0, 0, 4, 4}, + {0, 0, 4, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, }, + + { {0, 0, 4, 4}, + {8, 8,12,12}, + {0, 0, 4, 4}, + {8, 8,12,12}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} }, + + { {0, 0, 4, 4}, + {0, 0, 4, 4}, + {8, 8,12,12}, + {8, 8,12,12}, + {0, 0, 4, 4}, + {0, 0, 4, 4}, + {8, 8,12,12}, + {8, 8,12,12} } + }; + + + static unsigned char cofuv_blk_x[3][8][4] = + { { {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} }, + + { {0, 1, 0, 1}, + {0, 1, 0, 1}, + {2, 3, 2, 3}, + {2, 3, 2, 3}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} }, + + { {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 1, 0, 1}, + {2, 3, 2, 3} } + }; + + static unsigned char cofuv_blk_y[3][8][4] = + { + { { 4, 4, 5, 5}, + { 4, 4, 5, 5}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0} }, + + { { 4, 4, 5, 5}, + { 6, 6, 7, 7}, + { 4, 4, 5, 5}, + { 6, 6, 7, 7}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0} }, + + { { 4, 4, 5, 5}, + { 4, 4, 5, 5}, + { 6, 6, 7, 7}, + { 6, 6, 7, 7}, + { 8, 8, 9, 9}, + { 8, 8, 9, 9}, + {10,10,11,11}, + {10,10,11,11} } + }; + static unsigned char cbp_blk_chroma[8][4] = + { {16, 17, 18, 19}, + {20, 21, 22, 23}, + {24, 25, 26, 27}, + {28, 29, 30, 31}, + {32, 33, 34, 35}, + {36, 37, 38, 39}, + {40, 41, 42, 43}, + {44, 45, 46, 47} }; + + + int block8x8_idx[3][4][4] = + { { {0, 1, 2, 3}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, }, + + { {0, 1, 0, 1}, + {2, 3, 2, 3}, + {0, 0, 0, 0}, + {0, 0, 0, 0} }, + + { {0, 0, 0, 0}, + {1, 1, 1, 1}, + {2, 2, 2, 2}, + {3, 3, 3, 3} } + }; + + //ADD-VG-13052004-END + + #define _NEW_8x8_ARRAYS_INCLUDED_ + + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/mb_access.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/mb_access.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/mb_access.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,677 ---- + + /*! + ************************************************************************************* + * \file mb_access.c + * + * \brief + * Functions for macroblock neighborhoods + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten S?hring + ************************************************************************************* + */ + #include + + #include "global.h" + #include "mbuffer.h" + + extern StorablePicture *dec_picture; + + /*! + ************************************************************************ + * \brief + * returns 1 if the macroblock at the given address is available + ************************************************************************ + */ + int mb_is_available(int mbAddr, int currMbAddr) + { + if ((mbAddr < 0) || (mbAddr > ((int)dec_picture->PicSizeInMbs - 1))) + return 0; + + // the following line checks both: slice number and if the mb has been decoded + if (!img->DeblockCall) + { + if (img->mb_data[mbAddr].slice_nr != img->mb_data[currMbAddr].slice_nr) + return 0; + } + + return 1; + } + + + /*! + ************************************************************************ + * \brief + * Checks the availability of neighboring macroblocks of + * the current macroblock for prediction and context determination; + ************************************************************************ + */ + void CheckAvailabilityOfNeighbors() + { + const int mb_nr = img->current_mb_nr; + Macroblock *currMB = &img->mb_data[mb_nr]; + + // mark all neighbors as unavailable + currMB->mb_available_up = NULL; + currMB->mb_available_left = NULL; + + if (dec_picture->MbaffFrameFlag) + { + currMB->mbAddrA = 2 * (mb_nr/2 - 1); + currMB->mbAddrB = 2 * (mb_nr/2 - dec_picture->PicWidthInMbs); + currMB->mbAddrC = 2 * (mb_nr/2 - dec_picture->PicWidthInMbs + 1); + currMB->mbAddrD = 2 * (mb_nr/2 - dec_picture->PicWidthInMbs - 1); + + currMB->mbAvailA = mb_is_available(currMB->mbAddrA, mb_nr) && (((mb_nr/2) % dec_picture->PicWidthInMbs)!=0); + currMB->mbAvailB = mb_is_available(currMB->mbAddrB, mb_nr); + currMB->mbAvailC = mb_is_available(currMB->mbAddrC, mb_nr) && (((mb_nr/2 +1) % dec_picture->PicWidthInMbs)!=0); + currMB->mbAvailD = mb_is_available(currMB->mbAddrD, mb_nr) && (((mb_nr/2) % dec_picture->PicWidthInMbs)!=0); + } + else + { + currMB->mbAddrA = mb_nr - 1; + currMB->mbAddrB = mb_nr - dec_picture->PicWidthInMbs; + currMB->mbAddrC = mb_nr - dec_picture->PicWidthInMbs + 1; + currMB->mbAddrD = mb_nr - dec_picture->PicWidthInMbs - 1; + + currMB->mbAvailA = mb_is_available(currMB->mbAddrA, mb_nr) && ((mb_nr % dec_picture->PicWidthInMbs)!=0); + currMB->mbAvailB = mb_is_available(currMB->mbAddrB, mb_nr); + currMB->mbAvailC = mb_is_available(currMB->mbAddrC, mb_nr) && (((mb_nr+1) % dec_picture->PicWidthInMbs)!=0); + currMB->mbAvailD = mb_is_available(currMB->mbAddrD, mb_nr) && ((mb_nr % dec_picture->PicWidthInMbs)!=0); + } + } + + + /*! + ************************************************************************ + * \brief + * returns the x and y macroblock coordinates for a given MbAddress + ************************************************************************ + */ + void get_mb_block_pos (int mb_addr, int *x, int*y) + { + + if (dec_picture->MbaffFrameFlag) + { + *x = ((mb_addr/2) % dec_picture->PicWidthInMbs); + *y = ( ((mb_addr/2) / dec_picture->PicWidthInMbs) * 2 + (mb_addr%2)); + } + else + { + *x = (mb_addr % dec_picture->PicWidthInMbs); + *y = (mb_addr / dec_picture->PicWidthInMbs); + } + } + + + /*! + ************************************************************************ + * \brief + * returns the x and y sample coordinates for a given MbAddress + ************************************************************************ + */ + void get_mb_pos (int mb_addr, int *x, int*y) + { + get_mb_block_pos(mb_addr, x, y); + + (*x) *= MB_BLOCK_SIZE; + (*y) *= MB_BLOCK_SIZE; + } + + + /*! + ************************************************************************ + * \brief + * get neighbouring positions for non-aff coding + * \param curr_mb_nr + * current macroblock number (decoding order) + * \param xN + * input x position + * \param yN + * input y position + * \param luma + * 1 if luma coding, 0 for chroma + * \param pix + * returns position informations + ************************************************************************ + */ + void getNonAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix) + { + Macroblock *currMb = &img->mb_data[curr_mb_nr]; + int maxW, maxH; + + if (luma) + { + maxW = 16; + maxH = 16; + } + else + { + assert(dec_picture->chroma_format_idc != 0); + maxW = img->mb_cr_size_x; + maxH = img->mb_cr_size_y; + } + + if ((xN<0)&&(yN<0)) + { + pix->mb_addr = currMb->mbAddrD; + pix->available = currMb->mbAvailD; + } + else if ((xN<0)&&((yN>=0)&&(yNmb_addr = currMb->mbAddrA; + pix->available = currMb->mbAvailA; + } + else if (((xN>=0)&&(xNmb_addr = currMb->mbAddrB; + pix->available = currMb->mbAvailB; + } + else if (((xN>=0)&&(xN=0)&&(yNmb_addr = curr_mb_nr; + pix->available = 1; + } + else if ((xN>=maxW)&&(yN<0)) + { + pix->mb_addr = currMb->mbAddrC; + pix->available = currMb->mbAvailC; + } + else + { + pix->available = 0; + } + + if (pix->available || img->DeblockCall) + { + pix->x = (xN + maxW) % maxW; + pix->y = (yN + maxH) % maxH; + get_mb_pos(pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + if (luma) + { + pix->pos_x += pix->x; + pix->pos_y += pix->y; + } + else + { + pix->pos_x = pix->pos_x/(16/img->mb_cr_size_x) + pix->x; + pix->pos_y = pix->pos_y/(16/img->mb_cr_size_y) + pix->y; + } + } + } + + /*! + ************************************************************************ + * \brief + * get neighbouring positions for aff coding + * \param curr_mb_nr + * current macroblock number (decoding order) + * \param xN + * input x position + * \param yN + * input y position + * \param luma + * 1 if luma coding, 0 for chroma + * \param pix + * returns position informations + ************************************************************************ + */ + void getAffNeighbour(unsigned int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix) + { + Macroblock *currMb = &img->mb_data[curr_mb_nr]; + int maxW, maxH; + int yM = -1; + + if (luma) + { + maxW = 16; + maxH = 16; + } + else + { + assert(dec_picture->chroma_format_idc != 0); + maxW = img->mb_cr_size_x; + maxH = img->mb_cr_size_y; + } + + // initialize to "not available" + pix->available = 0; + + if(yN > (maxH - 1)) + { + return; + } + if (xN > (maxW -1) && yN >= 0 && yN < maxH) + { + return; + } + + if (xN < 0) + { + if (yN < 0) + { + if(!currMb->mb_field) + { + // frame + if (curr_mb_nr%2 == 0) + { + // top + pix->mb_addr = currMb->mbAddrD + 1; + pix->available = currMb->mbAvailD; + yM = yN; + } + else + { + // bottom + pix->mb_addr = currMb->mbAddrA; + pix->available = currMb->mbAvailA; + if (currMb->mbAvailA) + { + if(!img->mb_data[currMb->mbAddrA].mb_field) + { + yM = yN; + } + else + { + (pix->mb_addr)++; + yM = (yN + maxH) >> 1; + } + } + } + } + else + { + // field + if(curr_mb_nr % 2 == 0) + { + // top + pix->mb_addr = currMb->mbAddrD; + pix->available = currMb->mbAvailD; + if (currMb->mbAvailD) + { + if(!img->mb_data[currMb->mbAddrD].mb_field) + { + (pix->mb_addr)++; + yM = 2 * yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMb->mbAddrD+1; + pix->available = currMb->mbAvailD; + yM = yN; + } + } + } + else + { // xN < 0 && yN >= 0 + if (yN >= 0 && yN mb_field) + { + // frame + if(curr_mb_nr % 2 == 0) + { + // top + pix->mb_addr = currMb->mbAddrA; + pix->available = currMb->mbAvailA; + if (currMb->mbAvailA) + { + if(!img->mb_data[currMb->mbAddrA].mb_field) + { + yM = yN; + } + else + { + if (yN %2 == 0) + { + yM = yN>> 1; + } + else + { + (pix->mb_addr)++; + yM = yN>> 1; + } + } + } + } + else + { + // bottom + pix->mb_addr = currMb->mbAddrA; + pix->available = currMb->mbAvailA; + if (currMb->mbAvailA) + { + if(!img->mb_data[currMb->mbAddrA].mb_field) + { + (pix->mb_addr)++; + yM = yN; + } + else + { + if (yN %2 == 0) + { + yM = (yN + maxH) >> 1; + } + else + { + (pix->mb_addr)++; + yM = (yN + maxH) >> 1; + } + } + } + } + } + else + { + // field + if (curr_mb_nr % 2 == 0) + { + // top + pix->mb_addr = currMb->mbAddrA; + pix->available = currMb->mbAvailA; + if (currMb->mbAvailA) + { + if(!img->mb_data[currMb->mbAddrA].mb_field) + { + if (yN < (maxH / 2)) + { + yM = yN << 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) - maxH; + } + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMb->mbAddrA; + pix->available = currMb->mbAvailA; + if (currMb->mbAvailA) + { + if(!img->mb_data[currMb->mbAddrA].mb_field) + { + if (yN < (maxH / 2)) + { + yM = (yN << 1) + 1; + } + else + { + (pix->mb_addr)++; + yM = (yN << 1 ) + 1 - maxH; + } + } + else + { + (pix->mb_addr)++; + yM = yN; + } + } + } + } + } + } + } + else + { // xN >= 0 + if (xN >= 0 && xN < maxW) + { + if (yN<0) + { + if (!currMb->mb_field) + { + //frame + if (curr_mb_nr % 2 == 0) + { + //top + pix->mb_addr = currMb->mbAddrB; + // for the deblocker if the current MB is a frame and the one above is a field + // then the neighbor is the top MB of the pair + if (currMb->mbAvailB) + { + if (!(img->DeblockCall == 1 && (img->mb_data[currMb->mbAddrB]).mb_field)) + pix->mb_addr += 1; + } + + pix->available = currMb->mbAvailB; + yM = yN; + } + else + { + // bottom + pix->mb_addr = curr_mb_nr - 1; + pix->available = 1; + yM = yN; + } + } + else + { + // field + if (curr_mb_nr % 2 == 0) + { + // top + pix->mb_addr = currMb->mbAddrB; + pix->available = currMb->mbAvailB; + if (currMb->mbAvailB) + { + if(!img->mb_data[currMb->mbAddrB].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMb->mbAddrB + 1; + pix->available = currMb->mbAvailB; + yM = yN; + } + } + } + else + { + // yN >=0 + // for the deblocker if this is the extra edge then do this special stuff + if (yN == 0 && img->DeblockCall == 2) + { + pix->mb_addr = currMb->mbAddrB + 1; + pix->available = 1; + yM = yN - 1; + } + + else if ((yN >= 0) && (yN mb_addr = curr_mb_nr; + pix->available = 1; + yM = yN; + } + } + } + else + { // xN >= maxW + if(yN < 0) + { + if (!currMb->mb_field) + { + // frame + if (curr_mb_nr % 2 == 0) + { + // top + pix->mb_addr = currMb->mbAddrC + 1; + pix->available = currMb->mbAvailC; + yM = yN; + } + else + { + // bottom + pix->available = 0; + } + } + else + { + // field + if (curr_mb_nr % 2 == 0) + { + // top + pix->mb_addr = currMb->mbAddrC; + pix->available = currMb->mbAvailC; + if (currMb->mbAvailC) + { + if(!img->mb_data[currMb->mbAddrC].mb_field) + { + (pix->mb_addr)++; + yM = 2* yN; + } + else + { + yM = yN; + } + } + } + else + { + // bottom + pix->mb_addr = currMb->mbAddrC + 1; + pix->available = currMb->mbAvailC; + yM = yN; + } + } + } + } + } + if (pix->available || img->DeblockCall) + { + pix->x = (xN + maxW) % maxW; + pix->y = (yM + maxH) % maxH; + get_mb_pos(pix->mb_addr, &(pix->pos_x), &(pix->pos_y)); + if (luma) + { + pix->pos_x += pix->x; + pix->pos_y += pix->y; + } + else + { + pix->pos_x = pix->pos_x/(16/img->mb_cr_size_x) + pix->x; + pix->pos_y = pix->pos_y/(16/img->mb_cr_size_y) + pix->y; + } + } + } + + + /*! + ************************************************************************ + * \brief + * get neighbouring positions. MB AFF is automatically used from img structure + * \param curr_mb_nr + * current macroblock number (decoding order) + * \param xN + * input x position + * \param yN + * input y position + * \param luma + * 1 if luma coding, 0 for chroma + * \param pix + * returns position informations + ************************************************************************ + */ + void getNeighbour(int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix) + { + if (curr_mb_nr<0) + error ("getNeighbour: invalid macroblock number", 100); + + if (dec_picture->MbaffFrameFlag) + getAffNeighbour(curr_mb_nr, xN, yN, luma, pix); + else + getNonAffNeighbour(curr_mb_nr, xN, yN, luma, pix); + } + + + /*! + ************************************************************************ + * \brief + * get neighbouring get neighbouring 4x4 luma block + * \param curr_mb_nr + * current macroblock number (decoding order) + * \param block_x + * input x block position + * \param block_y + * input y block position + * \param rel_x + * relative x position of neighbor + * \param rel_y + * relative y position of neighbor + * \param pix + * returns position informations + ************************************************************************ + */ + void getLuma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix) + { + int x = 4* block_x + rel_x; + int y = 4* block_y + rel_y; + + getNeighbour(curr_mb_nr, x, y, 1, pix); + + if (pix->available) + { + pix->x /= 4; + pix->y /= 4; + pix->pos_x /= 4; + pix->pos_y /= 4; + } + } + + + /*! + ************************************************************************ + * \brief + * get neighbouring 4x4 chroma block + * \param curr_mb_nr + * current macroblock number (decoding order) + * \param block_x + * input x block position + * \param block_y + * input y block position + * \param rel_x + * relative x position of neighbor + * \param rel_y + * relative y position of neighbor + * \param pix + * returns position informations + ************************************************************************ + */ + void getChroma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix) + { + int x = 4* block_x + rel_x; + int y = 4* block_y + rel_y; + + getNeighbour(curr_mb_nr, x, y, 0, pix); + + if (pix->available) + { + pix->x /= 4; + pix->y /= 4; + pix->pos_x /= 4; + pix->pos_y /= 4; + } + } Index: llvm-test/MultiSource/Applications/JM/ldecod/mb_access.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/mb_access.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/mb_access.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,30 ---- + + /*! + ************************************************************************************* + * \file mb_access.h + * + * \brief + * Functions for macroblock neighborhoods + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten S?hring + ************************************************************************************* + */ + + #ifndef _MB_ACCESS_H_ + #define _MB_ACCESS_H_ + + void CheckAvailabilityOfNeighbors(); + + void getNeighbour(int curr_mb_nr, int xN, int yN, int luma, PixelPos *pix); + void getLuma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix); + void getChroma4x4Neighbour (int curr_mb_nr, int block_x, int block_y, int rel_x, int rel_y, PixelPos *pix); + + int mb_is_available(int mbAddr, int currMbAddr); + void get_mb_pos (int mb_addr, int *x, int*y); + void get_mb_block_pos (int mb_addr, int *x, int*y); + + + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/mbuffer.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/mbuffer.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/mbuffer.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,3830 ---- + + /*! + *********************************************************************** + * \file + * mbuffer.c + * + * \brief + * Frame buffer functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten S?hring + * - Alexis Tourapis + * - Jill Boyce + * - Saurav K Bandyopadhyay + * - Zhenyu Wu + * + *********************************************************************** + */ + + #include + #include + #include + #include + + #include "global.h" + #include "mbuffer.h" + #include "memalloc.h" + #include "output.h" + #include "image.h" + #include "header.h" + + // picture error concealment + #include "erc_api.h" + + static void insert_picture_in_dpb(FrameStore* fs, StorablePicture* p); + static void output_one_frame_from_dpb(); + static int is_used_for_reference(FrameStore* fs); + static void get_smallest_poc(int *poc,int * pos); + static int remove_unused_frame_from_dpb(); + static int is_short_term_reference(FrameStore* fs); + static int is_long_term_reference(FrameStore* fs); + void gen_field_ref_ids(StorablePicture *p); + + DecodedPictureBuffer dpb; + + StorablePicture **listX[6]; + + ColocatedParams *Co_located = NULL; + + extern StorablePicture *dec_picture; + + int listXsize[6]; + + #define MAX_LIST_SIZE 33 + + /*! + ************************************************************************ + * \brief + * Print out list of pictures in DPB. Used for debug purposes. + ************************************************************************ + */ + void dump_dpb() + { + unsigned i; + + return; + + for (i=0; iframe_num); + if (dpb.fs[i]->is_used & 1) + { + if (dpb.fs[i]->top_field) + printf("T: poc=%d ", dpb.fs[i]->top_field->poc); + else + printf("T: poc=%d ", dpb.fs[i]->frame->top_poc); + } + if (dpb.fs[i]->is_used & 2) + { + if (dpb.fs[i]->bottom_field) + printf("B: poc=%d ", dpb.fs[i]->bottom_field->poc); + else + printf("B: poc=%d ", dpb.fs[i]->frame->bottom_poc); + } + if (dpb.fs[i]->is_used == 3) + printf("F: poc=%d ", dpb.fs[i]->frame->poc); + printf("G: poc=%d) ", dpb.fs[i]->poc); + if (dpb.fs[i]->is_reference) printf ("ref (%d) ", dpb.fs[i]->is_reference); + if (dpb.fs[i]->is_long_term) printf ("lt_ref (%d) ", dpb.fs[i]->is_reference); + if (dpb.fs[i]->is_output) printf ("out "); + if (dpb.fs[i]->is_used == 3) + { + if (dpb.fs[i]->frame->non_existing) printf ("ne "); + } + printf ("\n"); + } + } + + /*! + ************************************************************************ + * \brief + * Returns the size of the dpb depending on level and picture size + * + * + ************************************************************************ + */ + int getDpbSize() + { + int pic_size = (active_sps->pic_width_in_mbs_minus1 + 1) * (active_sps->pic_height_in_map_units_minus1 + 1) * (active_sps->frame_mbs_only_flag?1:2) * 384; + + int size = 0; + + switch (active_sps->level_idc) + { + case 10: + size = 152064; + break; + case 11: + size = 345600; + break; + case 12: + size = 912384; + break; + case 13: + size = 912384; + break; + case 20: + size = 912384; + break; + case 21: + size = 1824768; + break; + case 22: + size = 3110400; + break; + case 30: + size = 3110400; + break; + case 31: + size = 6912000; + break; + case 32: + size = 7864320; + break; + case 40: + size = 12582912; + break; + case 41: + size = 12582912; + break; + case 42: + if( (active_sps->profile_idc==FREXT_HP ) || (active_sps->profile_idc==FREXT_Hi10P) + || (active_sps->profile_idc==FREXT_Hi422) || (active_sps->profile_idc==FREXT_Hi444)) + size = 13369344; + else + size = 12582912; + break; + case 50: + size = 42393600; + break; + case 51: + size = 70778880; + break; + default: + error ("undefined level", 500); + break; + } + + size /= pic_size; + size = min( size, 16); + + if (active_sps->vui_parameters_present_flag && active_sps->vui_seq_parameters.bitstream_restriction_flag) + { + if ((int)active_sps->vui_seq_parameters.max_dec_frame_buffering > size) + { + error ("max_dec_frame_buffering larger than MaxDpbSize", 500); + } + size = max (1, active_sps->vui_seq_parameters.max_dec_frame_buffering); + } + + return size; + } + + /*! + ************************************************************************ + * \brief + * Check then number of frames marked "used for reference" and break + * if maximum is exceeded + * + ************************************************************************ + */ + void check_num_ref() + { + if ((int)(dpb.ltref_frames_in_buffer + dpb.ref_frames_in_buffer ) > (max(1,dpb.num_ref_frames))) + { + error ("Max. number of reference frames exceeded. Invalid stream.", 500); + } + } + + + /*! + ************************************************************************ + * \brief + * Allocate memory for decoded picture buffer and initialize with sane values. + * + ************************************************************************ + */ + void init_dpb() + { + unsigned i,j; + + if (dpb.init_done) + { + free_dpb(); + } + + dpb.size = getDpbSize(); + + dpb.num_ref_frames = active_sps->num_ref_frames; + + if (dpb.size < active_sps->num_ref_frames) + { + error ("DPB size at specified level is smaller than the specified number of reference frames. This is not allowed.\n", 1000); + } + + dpb.used_size = 0; + dpb.last_picture = NULL; + + dpb.ref_frames_in_buffer = 0; + dpb.ltref_frames_in_buffer = 0; + + dpb.fs = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==dpb.fs) + no_mem_exit("init_dpb: dpb->fs"); + + dpb.fs_ref = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==dpb.fs_ref) + no_mem_exit("init_dpb: dpb->fs_ref"); + + dpb.fs_ltref = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==dpb.fs_ltref) + no_mem_exit("init_dpb: dpb->fs_ltref"); + + for (i=0; ilast_has_mmco_5 = 0; + + dpb.init_done = 1; + + // picture error concealment + if(img->conceal_mode !=0) + last_out_fs = alloc_frame_store(); + } + /*! + ************************************************************************ + * \brief + * Free memory for decoded picture buffer. + ************************************************************************ + */ + void free_dpb() + { + unsigned i; + if (dpb.fs) + { + for (i=0; iconceal_mode != 0) + free_frame_store(last_out_fs); + } + + + /*! + ************************************************************************ + * \brief + * Allocate memory for decoded picture buffer frame stores an initialize with sane values. + * + * \return + * the allocated FrameStore structure + ************************************************************************ + */ + FrameStore* alloc_frame_store() + { + FrameStore *f; + + f = calloc (1, sizeof(FrameStore)); + if (NULL==f) + no_mem_exit("alloc_frame_store: f"); + + f->is_used = 0; + f->is_reference = 0; + f->is_long_term = 0; + f->is_orig_reference = 0; + + f->is_output = 0; + + f->frame = NULL;; + f->top_field = NULL; + f->bottom_field = NULL; + + return f; + } + + /*! + ************************************************************************ + * \brief + * Allocate memory for a stored picture. + * + * \param structure + * picture structure + * \param size_x + * horizontal luma size + * \param size_y + * vertical luma size + * \param size_x_cr + * horizontal chroma size + * \param size_y_cr + * vertical chroma size + * + * \return + * the allocated StorablePicture structure + ************************************************************************ + */ + StorablePicture* alloc_storable_picture(PictureStructure structure, int size_x, int size_y, int size_x_cr, int size_y_cr) + { + StorablePicture *s; + + //printf ("Allocating (%s) picture (x=%d, y=%d, x_cr=%d, y_cr=%d)\n", (type == FRAME)?"FRAME":(type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", size_x, size_y, size_x_cr, size_y_cr); + + s = calloc (1, sizeof(StorablePicture)); + if (NULL==s) + no_mem_exit("alloc_storable_picture: s"); + + if (structure!=FRAME) + { + size_y /= 2; + size_y_cr /= 2; + } + + s->PicSizeInMbs = (size_x*size_y)/256; + s->imgUV = NULL; + + get_mem2Dpel (&(s->imgY), size_y, size_x); + if (active_sps->chroma_format_idc != YUV400) + get_mem3Dpel (&(s->imgUV), 2, size_y_cr, size_x_cr); + + s->mb_field = calloc (s->PicSizeInMbs, sizeof(int)); + if (NULL==s->mb_field) + no_mem_exit("alloc_storable_picture: s->mb_field"); + + get_mem2Dshort (&(s->slice_id), size_y / MB_BLOCK_SIZE, size_x / MB_BLOCK_SIZE); + + get_mem3D ((byte****)(&(s->ref_idx)) , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + get_mem3Dint64 (&(s->ref_pic_id), 6, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + get_mem3Dint64 (&(s->ref_id) , 6, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + get_mem4Dshort (&(s->mv) , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE, 2); + + get_mem2D (&(s->moving_block), size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + get_mem2D (&(s->field_frame) , size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + + s->pic_num=0; + s->frame_num=0; + s->long_term_frame_idx=0; + s->long_term_pic_num=0; + s->used_for_reference=0; + s->is_long_term=0; + s->non_existing=0; + s->is_output = 0; + s->max_slice_id = 0; + + s->structure=structure; + + s->size_x = size_x; + s->size_y = size_y; + s->size_x_cr = size_x_cr; + s->size_y_cr = size_y_cr; + + s->top_field = NULL; + s->bottom_field = NULL; + s->frame = NULL; + + s->dec_ref_pic_marking_buffer = NULL; + + s->coded_frame = 0; + s->MbaffFrameFlag = 0; + + return s; + } + + /*! + ************************************************************************ + * \brief + * Free frame store memory. + * + * \param f + * FrameStore to be freed + * + ************************************************************************ + */ + void free_frame_store(FrameStore* f) + { + if (f) + { + if (f->frame) + { + free_storable_picture(f->frame); + f->frame=NULL; + } + if (f->top_field) + { + free_storable_picture(f->top_field); + f->top_field=NULL; + } + if (f->bottom_field) + { + free_storable_picture(f->bottom_field); + f->bottom_field=NULL; + } + free(f); + } + } + + /*! + ************************************************************************ + * \brief + * Free picture memory. + * + * \param p + * Picture to be freed + * + ************************************************************************ + */ + void free_storable_picture(StorablePicture* p) + { + if (p) + { + if (p->ref_idx) + { + free_mem3D ((byte***)p->ref_idx, 2); + p->ref_idx = NULL; + } + + if (p->ref_pic_id) + { + free_mem3Dint64 (p->ref_pic_id, 6); + p->ref_pic_id = NULL; + } + if (p->ref_id) + { + free_mem3Dint64 (p->ref_id, 6); + p->ref_id = NULL; + } + if (p->mv) + { + free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE); + p->mv = NULL; + } + + if (p->moving_block) + { + free_mem2D (p->moving_block); + p->moving_block=NULL; + } + + if (p->field_frame) + { + free_mem2D (p->field_frame); + p->field_frame=NULL; + } + + + if (p->imgY) + { + free_mem2Dpel (p->imgY); + p->imgY=NULL; + } + if (p->imgUV) + { + free_mem3Dpel (p->imgUV, 2); + p->imgUV=NULL; + } + + if (p->mb_field) + { + free(p->mb_field); + p->mb_field=NULL; + } + + free(p); + p = NULL; + } + } + + /*! + ************************************************************************ + * \brief + * mark FrameStore unused for reference + * + ************************************************************************ + */ + static void unmark_for_reference(FrameStore* fs) + { + + if (fs->is_used & 1) + { + if (fs->top_field) + { + fs->top_field->used_for_reference = 0; + } + } + if (fs->is_used & 2) + { + if (fs->bottom_field) + { + fs->bottom_field->used_for_reference = 0; + } + } + if (fs->is_used == 3) + { + if (fs->top_field && fs->bottom_field) + { + fs->top_field->used_for_reference = 0; + fs->bottom_field->used_for_reference = 0; + } + fs->frame->used_for_reference = 0; + } + + fs->is_reference = 0; + + if(fs->frame) + { + if (fs->frame->ref_pic_id) + { + free_mem3Dint64 (fs->frame->ref_pic_id, 6); + fs->frame->ref_pic_id = NULL; + } + if (fs->frame->ref_id) + { + free_mem3Dint64 (fs->frame->ref_id, 6); + fs->frame->ref_id = NULL; + } + } + + if (fs->top_field) + { + if (fs->top_field->ref_pic_id) + { + free_mem3Dint64 (fs->top_field->ref_pic_id, 6); + fs->top_field->ref_pic_id = NULL; + } + if (fs->top_field->ref_id) + { + free_mem3Dint64 (fs->top_field->ref_id, 6); + fs->top_field->ref_id = NULL; + } + + } + if (fs->bottom_field) + { + if (fs->bottom_field->ref_pic_id) + { + free_mem3Dint64 (fs->bottom_field->ref_pic_id, 6); + fs->bottom_field->ref_pic_id = NULL; + } + if (fs->bottom_field->ref_id) + { + free_mem3Dint64 (fs->bottom_field->ref_id, 6); + fs->bottom_field->ref_id = NULL; + } + } + + } + + + /*! + ************************************************************************ + * \brief + * mark FrameStore unused for reference and reset long term flags + * + ************************************************************************ + */ + static void unmark_for_long_term_reference(FrameStore* fs) + { + + if (fs->is_used & 1) + { + if (fs->top_field) + { + fs->top_field->used_for_reference = 0; + fs->top_field->is_long_term = 0; + } + } + if (fs->is_used & 2) + { + if (fs->bottom_field) + { + fs->bottom_field->used_for_reference = 0; + fs->bottom_field->is_long_term = 0; + } + } + if (fs->is_used == 3) + { + if (fs->top_field && fs->bottom_field) + { + fs->top_field->used_for_reference = 0; + fs->top_field->is_long_term = 0; + fs->bottom_field->used_for_reference = 0; + fs->bottom_field->is_long_term = 0; + } + fs->frame->used_for_reference = 0; + fs->frame->is_long_term = 0; + } + + fs->is_reference = 0; + fs->is_long_term = 0; + } + + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by picture number for qsort in descending order + * + ************************************************************************ + */ + static int compare_pic_by_pic_num_desc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->pic_num < (*(StorablePicture**)arg2)->pic_num) + return 1; + if ( (*(StorablePicture**)arg1)->pic_num > (*(StorablePicture**)arg2)->pic_num) + return -1; + else + return 0; + } + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by picture number for qsort in descending order + * + ************************************************************************ + */ + static int compare_pic_by_lt_pic_num_asc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->long_term_pic_num < (*(StorablePicture**)arg2)->long_term_pic_num) + return -1; + if ( (*(StorablePicture**)arg1)->long_term_pic_num > (*(StorablePicture**)arg2)->long_term_pic_num) + return 1; + else + return 0; + } + + /*! + ************************************************************************ + * \brief + * compares two frame stores by pic_num for qsort in descending order + * + ************************************************************************ + */ + static int compare_fs_by_frame_num_desc( const void *arg1, const void *arg2 ) + { + if ( (*(FrameStore**)arg1)->frame_num_wrap < (*(FrameStore**)arg2)->frame_num_wrap) + return 1; + if ( (*(FrameStore**)arg1)->frame_num_wrap > (*(FrameStore**)arg2)->frame_num_wrap) + return -1; + else + return 0; + } + + + /*! + ************************************************************************ + * \brief + * compares two frame stores by lt_pic_num for qsort in descending order + * + ************************************************************************ + */ + static int compare_fs_by_lt_pic_idx_asc( const void *arg1, const void *arg2 ) + { + if ( (*(FrameStore**)arg1)->long_term_frame_idx < (*(FrameStore**)arg2)->long_term_frame_idx) + return -1; + if ( (*(FrameStore**)arg1)->long_term_frame_idx > (*(FrameStore**)arg2)->long_term_frame_idx) + return 1; + else + return 0; + } + + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by poc for qsort in ascending order + * + ************************************************************************ + */ + static int compare_pic_by_poc_asc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc) + return -1; + if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc) + return 1; + else + return 0; + } + + + /*! + ************************************************************************ + * \brief + * compares two stored pictures by poc for qsort in descending order + * + ************************************************************************ + */ + static int compare_pic_by_poc_desc( const void *arg1, const void *arg2 ) + { + if ( (*(StorablePicture**)arg1)->poc < (*(StorablePicture**)arg2)->poc) + return 1; + if ( (*(StorablePicture**)arg1)->poc > (*(StorablePicture**)arg2)->poc) + return -1; + else + return 0; + } + + + /*! + ************************************************************************ + * \brief + * compares two frame stores by poc for qsort in ascending order + * + ************************************************************************ + */ + static int compare_fs_by_poc_asc( const void *arg1, const void *arg2 ) + { + if ( (*(FrameStore**)arg1)->poc < (*(FrameStore**)arg2)->poc) + return -1; + if ( (*(FrameStore**)arg1)->poc > (*(FrameStore**)arg2)->poc) + return 1; + else + return 0; + } + + + /*! + ************************************************************************ + * \brief + * compares two frame stores by poc for qsort in descending order + * + ************************************************************************ + */ + static int compare_fs_by_poc_desc( const void *arg1, const void *arg2 ) + { + if ( (*(FrameStore**)arg1)->poc < (*(FrameStore**)arg2)->poc) + return 1; + if ( (*(FrameStore**)arg1)->poc > (*(FrameStore**)arg2)->poc) + return -1; + else + return 0; + } + + + /*! + ************************************************************************ + * \brief + * returns true, if picture is short term reference picture + * + ************************************************************************ + */ + int is_short_ref(StorablePicture *s) + { + return ((s->used_for_reference) && (!(s->is_long_term))); + } + + + /*! + ************************************************************************ + * \brief + * returns true, if picture is long term reference picture + * + ************************************************************************ + */ + int is_long_ref(StorablePicture *s) + { + return ((s->used_for_reference) && (s->is_long_term)); + } + + + /*! + ************************************************************************ + * \brief + * Generates a alternating field list from a given FrameStore list + * + ************************************************************************ + */ + static void gen_pic_list_from_frame_list(PictureStructure currStrcture, FrameStore **fs_list, int list_idx, StorablePicture **list, int *list_size, int long_term) + { + int top_idx = 0; + int bot_idx = 0; + + int (*is_ref)(StorablePicture *s); + + if (long_term) + is_ref=is_long_ref; + else + is_ref=is_short_ref; + + if (currStrcture == TOP_FIELD) + { + while ((top_idxis_used & 1) + { + if(is_ref(fs_list[top_idx]->top_field)) + { + // short term ref pic + list[*list_size] = fs_list[top_idx]->top_field; + (*list_size)++; + top_idx++; + break; + } + } + } + for ( ; bot_idxis_used & 2) + { + if(is_ref(fs_list[bot_idx]->bottom_field)) + { + // short term ref pic + list[*list_size] = fs_list[bot_idx]->bottom_field; + (*list_size)++; + bot_idx++; + break; + } + } + } + } + } + if (currStrcture == BOTTOM_FIELD) + { + while ((top_idxis_used & 2) + { + if(is_ref(fs_list[bot_idx]->bottom_field)) + { + // short term ref pic + list[*list_size] = fs_list[bot_idx]->bottom_field; + (*list_size)++; + bot_idx++; + break; + } + } + } + for ( ; top_idxis_used & 1) + { + if(is_ref(fs_list[top_idx]->top_field)) + { + // short term ref pic + list[*list_size] = fs_list[top_idx]->top_field; + (*list_size)++; + top_idx++; + break; + } + } + } + } + } + } + + + /*! + ************************************************************************ + * \brief + * Initialize listX[0] and list 1 depending on current picture type + * + ************************************************************************ + */ + void init_lists(int currSliceType, PictureStructure currPicStructure) + { + int add_top = 0, add_bottom = 0; + unsigned i; + int j; + int MaxFrameNum = 1 << (active_sps->log2_max_frame_num_minus4 + 4); + int diff; + + int list0idx = 0; + int list0idx_1 = 0; + int listltidx = 0; + + FrameStore **fs_list0; + FrameStore **fs_list1; + FrameStore **fs_listlt; + + StorablePicture *tmp_s; + + if (currPicStructure == FRAME) + { + for (i=0; iis_used==3) + { + if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term)) + { + if( dpb.fs_ref[i]->frame_num > img->frame_num ) + { + dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num - MaxFrameNum; + } + else + { + dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num; + } + dpb.fs_ref[i]->frame->pic_num = dpb.fs_ref[i]->frame_num_wrap; + } + } + } + // update long_term_pic_num + for (i=0; iis_used==3) + { + if (dpb.fs_ltref[i]->frame->is_long_term) + { + dpb.fs_ltref[i]->frame->long_term_pic_num = dpb.fs_ltref[i]->frame->long_term_frame_idx; + } + } + } + } + else + { + if (currPicStructure == TOP_FIELD) + { + add_top = 1; + add_bottom = 0; + } + else + { + add_top = 0; + add_bottom = 1; + } + + for (i=0; iis_reference) + { + if( dpb.fs_ref[i]->frame_num > img->frame_num ) + { + dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num - MaxFrameNum; + } + else + { + dpb.fs_ref[i]->frame_num_wrap = dpb.fs_ref[i]->frame_num; + } + if (dpb.fs_ref[i]->is_reference & 1) + { + dpb.fs_ref[i]->top_field->pic_num = (2 * dpb.fs_ref[i]->frame_num_wrap) + add_top; + } + if (dpb.fs_ref[i]->is_reference & 2) + { + dpb.fs_ref[i]->bottom_field->pic_num = (2 * dpb.fs_ref[i]->frame_num_wrap) + add_bottom; + } + } + } + // update long_term_pic_num + for (i=0; iis_long_term & 1) + { + dpb.fs_ltref[i]->top_field->long_term_pic_num = 2 * dpb.fs_ltref[i]->top_field->long_term_frame_idx + add_top; + } + if (dpb.fs_ltref[i]->is_long_term & 2) + { + dpb.fs_ltref[i]->bottom_field->long_term_pic_num = 2 * dpb.fs_ltref[i]->bottom_field->long_term_frame_idx + add_bottom; + } + } + } + + + + if ((currSliceType == I_SLICE)||(currSliceType == SI_SLICE)) + { + listXsize[0] = 0; + listXsize[1] = 0; + return; + } + + if ((currSliceType == P_SLICE)||(currSliceType == SP_SLICE)) + { + // Calculate FrameNumWrap and PicNum + if (currPicStructure == FRAME) + { + for (i=0; iis_used==3) + { + if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term)) + { + listX[0][list0idx++] = dpb.fs_ref[i]->frame; + } + } + } + // order list 0 by PicNum + qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_pic_num_desc); + listXsize[0] = list0idx; + // printf("listX[0] (PicNum): "); for (i=0; ipic_num);} printf("\n"); + + // long term handling + for (i=0; iis_used==3) + { + if (dpb.fs_ltref[i]->frame->is_long_term) + { + listX[0][list0idx++]=dpb.fs_ltref[i]->frame; + } + } + } + qsort((void *)&listX[0][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + listXsize[0] = list0idx; + } + else + { + fs_list0 = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==fs_list0) + no_mem_exit("init_lists: fs_list0"); + fs_listlt = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==fs_listlt) + no_mem_exit("init_lists: fs_listlt"); + + for (i=0; iis_reference) + { + fs_list0[list0idx++] = dpb.fs_ref[i]; + } + } + + qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_frame_num_desc); + + // printf("fs_list0 (FrameNum): "); for (i=0; iframe_num_wrap);} printf("\n"); + + listXsize[0] = 0; + gen_pic_list_from_frame_list(currPicStructure, fs_list0, list0idx, listX[0], &listXsize[0], 0); + + // printf("listX[0] (PicNum): "); for (i=0; ipic_num);} printf("\n"); + + // long term handling + for (i=0; iis_used==3) + { + if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term)) + { + if (img->framepoc > dpb.fs_ref[i]->frame->poc) + { + listX[0][list0idx++] = dpb.fs_ref[i]->frame; + } + } + } + } + qsort((void *)listX[0], list0idx, sizeof(StorablePicture*), compare_pic_by_poc_desc); + list0idx_1 = list0idx; + for (i=0; iis_used==3) + { + if ((dpb.fs_ref[i]->frame->used_for_reference)&&(!dpb.fs_ref[i]->frame->is_long_term)) + { + if (img->framepoc < dpb.fs_ref[i]->frame->poc) + { + listX[0][list0idx++] = dpb.fs_ref[i]->frame; + } + } + } + } + qsort((void *)&listX[0][list0idx_1], list0idx-list0idx_1, sizeof(StorablePicture*), compare_pic_by_poc_asc); + + for (j=0; jframepoc); for (i=0; ipoc);} printf("\n"); + // printf("listX[1] currPoc=%d (Poc): ", img->framepoc); for (i=0; ipoc);} printf("\n"); + + // long term handling + for (i=0; iis_used==3) + { + if (dpb.fs_ltref[i]->frame->is_long_term) + { + listX[0][list0idx] =dpb.fs_ltref[i]->frame; + listX[1][list0idx++]=dpb.fs_ltref[i]->frame; + } + } + } + qsort((void *)&listX[0][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + qsort((void *)&listX[1][listXsize[0]], list0idx-listXsize[0], sizeof(StorablePicture*), compare_pic_by_lt_pic_num_asc); + listXsize[0] = listXsize[1] = list0idx; + } + else + { + fs_list0 = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==fs_list0) + no_mem_exit("init_lists: fs_list0"); + fs_list1 = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==fs_list1) + no_mem_exit("init_lists: fs_list1"); + fs_listlt = calloc(dpb.size, sizeof (FrameStore*)); + if (NULL==fs_listlt) + no_mem_exit("init_lists: fs_listlt"); + + listXsize[0] = 0; + listXsize[1] = 1; + + for (i=0; iis_used) + { + if (img->ThisPOC >= dpb.fs_ref[i]->poc) + { + fs_list0[list0idx++] = dpb.fs_ref[i]; + } + } + } + qsort((void *)fs_list0, list0idx, sizeof(FrameStore*), compare_fs_by_poc_desc); + list0idx_1 = list0idx; + for (i=0; iis_used) + { + if (img->ThisPOC < dpb.fs_ref[i]->poc) + { + fs_list0[list0idx++] = dpb.fs_ref[i]; + } + } + } + qsort((void *)&fs_list0[list0idx_1], list0idx-list0idx_1, sizeof(FrameStore*), compare_fs_by_poc_asc); + + for (j=0; jThisPOC); for (i=0; ipoc);} printf("\n"); + // printf("fs_list1 currPoc=%d (Poc): ", img->ThisPOC); for (i=0; ipoc);} printf("\n"); + + listXsize[0] = 0; + listXsize[1] = 0; + gen_pic_list_from_frame_list(currPicStructure, fs_list0, list0idx, listX[0], &listXsize[0], 0); + gen_pic_list_from_frame_list(currPicStructure, fs_list1, list0idx, listX[1], &listXsize[1], 0); + + // printf("listX[0] currPoc=%d (Poc): ", img->framepoc); for (i=0; ipoc);} printf("\n"); + // printf("listX[1] currPoc=%d (Poc): ", img->framepoc); for (i=0; ipoc);} printf("\n"); + + // long term handling + for (i=0; i 1)) + { + // check if lists are identical, if yes swap first two elements of listX[1] + diff=0; + for (j = 0; j< listXsize[0]; j++) + { + if (listX[0][j]!=listX[1][j]) + diff=1; + } + if (!diff) + { + tmp_s = listX[1][0]; + listX[1][0]=listX[1][1]; + listX[1][1]=tmp_s; + } + } + // set max size + listXsize[0] = min (listXsize[0], img->num_ref_idx_l0_active); + listXsize[1] = min (listXsize[1], img->num_ref_idx_l1_active); + + // set the unused list entries to NULL + for (i=listXsize[0]; i< (MAX_LIST_SIZE) ; i++) + { + listX[0][i] = NULL; + } + for (i=listXsize[1]; i< (MAX_LIST_SIZE) ; i++) + { + listX[1][i] = NULL; + } + } + + /*! + ************************************************************************ + * \brief + * Initialize listX[2..5] from lists 0 and 1 + * listX[2]: list0 for current_field==top + * listX[3]: list1 for current_field==top + * listX[4]: list0 for current_field==bottom + * listX[5]: list1 for current_field==bottom + * + ************************************************************************ + */ + void init_mbaff_lists() + { + unsigned j; + int i; + + for (i=2;i<6;i++) + { + for (j=0; jtop_field; + listX[2][2*i+1]=listX[0][i]->bottom_field; + listX[4][2*i] =listX[0][i]->bottom_field; + listX[4][2*i+1]=listX[0][i]->top_field; + } + listXsize[2]=listXsize[4]=listXsize[0] * 2; + + for (i=0; itop_field; + listX[3][2*i+1]=listX[1][i]->bottom_field; + listX[5][2*i] =listX[1][i]->bottom_field; + listX[5][2*i+1]=listX[1][i]->top_field; + } + listXsize[3]=listXsize[5]=listXsize[1] * 2; + } + + /*! + ************************************************************************ + * \brief + * Returns short term pic with given picNum + * + ************************************************************************ + */ + static StorablePicture* get_short_term_pic(int picNum) + { + unsigned i; + + for (i=0; istructure==FRAME) + { + if (dpb.fs_ref[i]->is_reference == 3) + if ((!dpb.fs_ref[i]->frame->is_long_term)&&(dpb.fs_ref[i]->frame->pic_num == picNum)) + return dpb.fs_ref[i]->frame; + } + else + { + if (dpb.fs_ref[i]->is_reference & 1) + if ((!dpb.fs_ref[i]->top_field->is_long_term)&&(dpb.fs_ref[i]->top_field->pic_num == picNum)) + return dpb.fs_ref[i]->top_field; + if (dpb.fs_ref[i]->is_reference & 2) + if ((!dpb.fs_ref[i]->bottom_field->is_long_term)&&(dpb.fs_ref[i]->bottom_field->pic_num == picNum)) + return dpb.fs_ref[i]->bottom_field; + } + } + return NULL; + } + + /*! + ************************************************************************ + * \brief + * Returns short term pic with given LongtermPicNum + * + ************************************************************************ + */ + static StorablePicture* get_long_term_pic(int LongtermPicNum) + { + unsigned i; + + for (i=0; istructure==FRAME) + { + if (dpb.fs_ltref[i]->is_reference == 3) + if ((dpb.fs_ltref[i]->frame->is_long_term)&&(dpb.fs_ltref[i]->frame->long_term_pic_num == LongtermPicNum)) + return dpb.fs_ltref[i]->frame; + } + else + { + if (dpb.fs_ltref[i]->is_reference & 1) + if ((dpb.fs_ltref[i]->top_field->is_long_term)&&(dpb.fs_ltref[i]->top_field->long_term_pic_num == LongtermPicNum)) + return dpb.fs_ltref[i]->top_field; + if (dpb.fs_ltref[i]->is_reference & 2) + if ((dpb.fs_ltref[i]->bottom_field->is_long_term)&&(dpb.fs_ltref[i]->bottom_field->long_term_pic_num == LongtermPicNum)) + return dpb.fs_ltref[i]->bottom_field; + } + } + return NULL; + } + + /*! + ************************************************************************ + * \brief + * Reordering process for short-term reference pictures + * + ************************************************************************ + */ + static void reorder_short_term(StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int picNumLX, int *refIdxLX) + { + int cIdx, nIdx; + + StorablePicture *picLX; + + picLX = get_short_term_pic(picNumLX); + + for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- ) + RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1]; + + RefPicListX[ (*refIdxLX)++ ] = picLX; + + nIdx = *refIdxLX; + + for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ ) + if (RefPicListX[ cIdx ]) + if( (RefPicListX[ cIdx ]->is_long_term ) || (RefPicListX[ cIdx ]->pic_num != picNumLX )) + RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ]; + + } + + + /*! + ************************************************************************ + * \brief + * Reordering process for short-term reference pictures + * + ************************************************************************ + */ + static void reorder_long_term(StorablePicture **RefPicListX, int num_ref_idx_lX_active_minus1, int LongTermPicNum, int *refIdxLX) + { + int cIdx, nIdx; + + StorablePicture *picLX; + + picLX = get_long_term_pic(LongTermPicNum); + + for( cIdx = num_ref_idx_lX_active_minus1+1; cIdx > *refIdxLX; cIdx-- ) + RefPicListX[ cIdx ] = RefPicListX[ cIdx - 1]; + + RefPicListX[ (*refIdxLX)++ ] = picLX; + + nIdx = *refIdxLX; + + for( cIdx = *refIdxLX; cIdx <= num_ref_idx_lX_active_minus1+1; cIdx++ ) + if (RefPicListX[ cIdx ]) + if( (!RefPicListX[ cIdx ]->is_long_term ) || (RefPicListX[ cIdx ]->long_term_pic_num != LongTermPicNum )) + RefPicListX[ nIdx++ ] = RefPicListX[ cIdx ]; + } + + + /*! + ************************************************************************ + * \brief + * Reordering process for reference picture lists + * + ************************************************************************ + */ + void reorder_ref_pic_list(StorablePicture **list, int *list_size, int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, int *abs_diff_pic_num_minus1, int *long_term_pic_idx) + { + int i; + + int maxPicNum, currPicNum, picNumLXNoWrap, picNumLXPred, picNumLX; + int refIdxLX = 0; + + if (img->structure==FRAME) + { + maxPicNum = img->MaxFrameNum; + currPicNum = img->frame_num; + } + else + { + maxPicNum = 2 * img->MaxFrameNum; + currPicNum = 2 * img->frame_num + 1; + } + + picNumLXPred = currPicNum; + + for (i=0; reordering_of_pic_nums_idc[i]!=3; i++) + { + if (reordering_of_pic_nums_idc[i]>3) + error ("Invalid remapping_of_pic_nums_idc command", 500); + + if (reordering_of_pic_nums_idc[i] < 2) + { + if (reordering_of_pic_nums_idc[i] == 0) + { + if( picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) < 0 ) + picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ) + maxPicNum; + else + picNumLXNoWrap = picNumLXPred - ( abs_diff_pic_num_minus1[i] + 1 ); + } + else // (remapping_of_pic_nums_idc[i] == 1) + { + if( picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) >= maxPicNum ) + picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ) - maxPicNum; + else + picNumLXNoWrap = picNumLXPred + ( abs_diff_pic_num_minus1[i] + 1 ); + } + picNumLXPred = picNumLXNoWrap; + + if( picNumLXNoWrap > currPicNum ) + picNumLX = picNumLXNoWrap - maxPicNum; + else + picNumLX = picNumLXNoWrap; + + reorder_short_term(list, num_ref_idx_lX_active_minus1, picNumLX, &refIdxLX); + } + else //(remapping_of_pic_nums_idc[i] == 2) + { + reorder_long_term(list, num_ref_idx_lX_active_minus1, long_term_pic_idx[i], &refIdxLX); + } + + } + // that's a definition + *list_size = num_ref_idx_lX_active_minus1 + 1; + } + + + + /*! + ************************************************************************ + * \brief + * Update the list of frame stores that contain reference frames/fields + * + ************************************************************************ + */ + void update_ref_list() + { + unsigned i, j; + for (i=0, j=0; iidr_flag); + + if (p->no_output_of_prior_pics_flag) + { + // free all stored pictures + for (i=0; ilong_term_reference_flag) + { + dpb.max_long_term_pic_idx = 0; + p->is_long_term = 1; + p->long_term_frame_idx = 0; + } + else + { + dpb.max_long_term_pic_idx = -1; + p->is_long_term = 0; + } + } + + /*! + ************************************************************************ + * \brief + * Perform Sliding window decoded reference picture marking process + * + ************************************************************************ + */ + static void sliding_window_memory_management(StorablePicture* p) + { + unsigned i; + + assert (!p->idr_flag); + // if this is a reference pic with sliding sliding window, unmark first ref frame + if (dpb.ref_frames_in_buffer==dpb.num_ref_frames - dpb.ltref_frames_in_buffer) + { + for (i=0; iis_reference && (!(dpb.fs[i]->is_long_term))) + { + unmark_for_reference(dpb.fs[i]); + update_ref_list(); + break; + } + } + } + + p->is_long_term = 0; + } + + /*! + ************************************************************************ + * \brief + * Calculate picNumX + ************************************************************************ + */ + static int get_pic_num_x (StorablePicture *p, int difference_of_pic_nums_minus1) + { + int currPicNum; + + if (p->structure == FRAME) + currPicNum = p->frame_num; + else + currPicNum = 2 * p->frame_num + 1; + + return currPicNum - (difference_of_pic_nums_minus1 + 1); + } + + + /*! + ************************************************************************ + * \brief + * Adaptive Memory Management: Mark short term picture unused + ************************************************************************ + */ + static void mm_unmark_short_term_for_reference(StorablePicture *p, int difference_of_pic_nums_minus1) + { + int picNumX; + + unsigned i; + + picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1); + + for (i=0; istructure == FRAME) + { + if ((dpb.fs_ref[i]->is_reference==3) && (dpb.fs_ref[i]->is_long_term==0)) + { + if (dpb.fs_ref[i]->frame->pic_num == picNumX) + { + unmark_for_reference(dpb.fs_ref[i]); + return; + } + } + } + else + { + if ((dpb.fs_ref[i]->is_reference & 1) && (!(dpb.fs_ref[i]->is_long_term & 1))) + { + if (dpb.fs_ref[i]->top_field->pic_num == picNumX) + { + dpb.fs_ref[i]->top_field->used_for_reference = 0; + dpb.fs_ref[i]->is_reference &= 2; + if (dpb.fs_ref[i]->is_used == 3) + { + dpb.fs_ref[i]->frame->used_for_reference = 0; + } + return; + } + } + if ((dpb.fs_ref[i]->is_reference & 2) && (!(dpb.fs_ref[i]->is_long_term & 2))) + { + if (dpb.fs_ref[i]->bottom_field->pic_num == picNumX) + { + dpb.fs_ref[i]->bottom_field->used_for_reference = 0; + dpb.fs_ref[i]->is_reference &= 1; + if (dpb.fs_ref[i]->is_used == 3) + { + dpb.fs_ref[i]->frame->used_for_reference = 0; + } + return; + } + } + } + } + } + + + /*! + ************************************************************************ + * \brief + * Adaptive Memory Management: Mark long term picture unused + ************************************************************************ + */ + static void mm_unmark_long_term_for_reference(StorablePicture *p, int long_term_pic_num) + { + unsigned i; + for (i=0; istructure == FRAME) + { + if ((dpb.fs_ltref[i]->is_reference==3) && (dpb.fs_ltref[i]->is_long_term==3)) + { + if (dpb.fs_ltref[i]->frame->long_term_pic_num == long_term_pic_num) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + } + } + else + { + if ((dpb.fs_ltref[i]->is_reference & 1) && ((dpb.fs_ltref[i]->is_long_term & 1))) + { + if (dpb.fs_ltref[i]->top_field->long_term_pic_num == long_term_pic_num) + { + dpb.fs_ltref[i]->top_field->used_for_reference = 0; + dpb.fs_ltref[i]->top_field->is_long_term = 0; + dpb.fs_ltref[i]->is_reference &= 2; + dpb.fs_ltref[i]->is_long_term &= 2; + if (dpb.fs_ltref[i]->is_used == 3) + { + dpb.fs_ltref[i]->frame->used_for_reference = 0; + dpb.fs_ltref[i]->frame->is_long_term = 0; + } + return; + } + } + if ((dpb.fs_ltref[i]->is_reference & 2) && ((dpb.fs_ltref[i]->is_long_term & 2))) + { + if (dpb.fs_ltref[i]->bottom_field->long_term_pic_num == long_term_pic_num) + { + dpb.fs_ltref[i]->bottom_field->used_for_reference = 0; + dpb.fs_ltref[i]->bottom_field->is_long_term = 0; + dpb.fs_ltref[i]->is_reference &= 1; + dpb.fs_ltref[i]->is_long_term &= 1; + if (dpb.fs_ltref[i]->is_used == 3) + { + dpb.fs_ltref[i]->frame->used_for_reference = 0; + dpb.fs_ltref[i]->frame->is_long_term = 0; + } + return; + } + } + } + } + } + + + /*! + ************************************************************************ + * \brief + * Mark a long-term reference frame or complementary field pair unused for referemce + ************************************************************************ + */ + static void unmark_long_term_frame_for_reference_by_frame_idx(int long_term_frame_idx) + { + unsigned i; + for(i=0; ilong_term_frame_idx == long_term_frame_idx) + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + } + + /*! + ************************************************************************ + * \brief + * Mark a long-term reference field unused for reference only if it's not + * the complementary field of the picture indicated by picNumX + ************************************************************************ + */ + static void unmark_long_term_field_for_reference_by_frame_idx(PictureStructure structure, int long_term_frame_idx, int mark_current, unsigned curr_frame_num, int curr_pic_num) + { + unsigned i; + + assert(structure!=FRAME); + if (curr_pic_num<0) + curr_pic_num+=(2*img->MaxFrameNum); + + for(i=0; ilong_term_frame_idx == long_term_frame_idx) + { + if (structure == TOP_FIELD) + { + if ((dpb.fs_ltref[i]->is_long_term == 3)) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + else + { + if ((dpb.fs_ltref[i]->is_long_term == 1)) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + else + { + if (mark_current) + { + if (dpb.last_picture) + { + if ( ( dpb.last_picture != dpb.fs_ltref[i] )|| dpb.last_picture->frame_num != curr_frame_num) + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + else + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + } + else + { + if ((dpb.fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2)) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + } + } + } + } + if (structure == BOTTOM_FIELD) + { + if ((dpb.fs_ltref[i]->is_long_term == 3)) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + else + { + if ((dpb.fs_ltref[i]->is_long_term == 2)) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + else + { + if (mark_current) + { + if (dpb.last_picture) + { + if ( ( dpb.last_picture != dpb.fs_ltref[i] )|| dpb.last_picture->frame_num != curr_frame_num) + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + else + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + } + else + { + if ((dpb.fs_ltref[i]->frame_num) != (unsigned)(curr_pic_num/2)) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + } + } + } + } + } + } + } + + + /*! + ************************************************************************ + * \brief + * mark a picture as long-term reference + ************************************************************************ + */ + static void mark_pic_long_term(StorablePicture* p, int long_term_frame_idx, int picNumX) + { + unsigned i; + int add_top, add_bottom; + + if (p->structure == FRAME) + { + for (i=0; iis_reference == 3) + { + if ((!dpb.fs_ref[i]->frame->is_long_term)&&(dpb.fs_ref[i]->frame->pic_num == picNumX)) + { + dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_frame_idx + = long_term_frame_idx; + dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx; + dpb.fs_ref[i]->frame->is_long_term = 1; + + if (dpb.fs_ref[i]->top_field && dpb.fs_ref[i]->bottom_field) + { + dpb.fs_ref[i]->top_field->long_term_frame_idx = dpb.fs_ref[i]->bottom_field->long_term_frame_idx + = long_term_frame_idx; + dpb.fs_ref[i]->top_field->long_term_pic_num = long_term_frame_idx; + dpb.fs_ref[i]->bottom_field->long_term_pic_num = long_term_frame_idx; + + dpb.fs_ref[i]->top_field->is_long_term = dpb.fs_ref[i]->bottom_field->is_long_term + = 1; + + } + dpb.fs_ref[i]->is_long_term = 3; + return; + } + } + } + printf ("Warning: reference frame for long term marking not found\n"); + } + else + { + if (p->structure == TOP_FIELD) + { + add_top = 1; + add_bottom = 0; + } + else + { + add_top = 0; + add_bottom = 1; + } + for (i=0; iis_reference & 1) + { + if ((!dpb.fs_ref[i]->top_field->is_long_term)&&(dpb.fs_ref[i]->top_field->pic_num == picNumX)) + { + if ((dpb.fs_ref[i]->is_long_term) && (dpb.fs_ref[i]->long_term_frame_idx != long_term_frame_idx)) + { + printf ("Warning: assigning long_term_frame_idx different from other field\n"); + } + + dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->top_field->long_term_frame_idx + = long_term_frame_idx; + dpb.fs_ref[i]->top_field->long_term_pic_num = 2 * long_term_frame_idx + add_top; + dpb.fs_ref[i]->top_field->is_long_term = 1; + dpb.fs_ref[i]->is_long_term |= 1; + if (dpb.fs_ref[i]->is_long_term == 3) + { + dpb.fs_ref[i]->frame->is_long_term = 1; + dpb.fs_ref[i]->frame->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx; + } + return; + } + } + if (dpb.fs_ref[i]->is_reference & 2) + { + if ((!dpb.fs_ref[i]->bottom_field->is_long_term)&&(dpb.fs_ref[i]->bottom_field->pic_num == picNumX)) + { + if ((dpb.fs_ref[i]->is_long_term) && (dpb.fs_ref[i]->long_term_frame_idx != long_term_frame_idx)) + { + printf ("Warning: assigning long_term_frame_idx different from other field\n"); + } + + dpb.fs_ref[i]->long_term_frame_idx = dpb.fs_ref[i]->bottom_field->long_term_frame_idx + = long_term_frame_idx; + dpb.fs_ref[i]->bottom_field->long_term_pic_num = 2 * long_term_frame_idx + add_top; + dpb.fs_ref[i]->bottom_field->is_long_term = 1; + dpb.fs_ref[i]->is_long_term |= 2; + if (dpb.fs_ref[i]->is_long_term == 3) + { + dpb.fs_ref[i]->frame->is_long_term = 1; + dpb.fs_ref[i]->frame->long_term_frame_idx = dpb.fs_ref[i]->frame->long_term_pic_num = long_term_frame_idx; + } + return; + } + } + } + printf ("Warning: reference field for long term marking not found\n"); + } + } + + + /*! + ************************************************************************ + * \brief + * Assign a long term frame index to a short term picture + ************************************************************************ + */ + static void mm_assign_long_term_frame_idx(StorablePicture* p, int difference_of_pic_nums_minus1, int long_term_frame_idx) + { + int picNumX; + + picNumX = get_pic_num_x(p, difference_of_pic_nums_minus1); + + // remove frames/fields with same long_term_frame_idx + if (p->structure == FRAME) + { + unmark_long_term_frame_for_reference_by_frame_idx(long_term_frame_idx); + } + else + { + unsigned i; + PictureStructure structure = FRAME; + + for (i=0; iis_reference & 1) + { + if (dpb.fs_ref[i]->top_field->pic_num == picNumX) + { + structure = TOP_FIELD; + break; + } + } + if (dpb.fs_ref[i]->is_reference & 2) + { + if (dpb.fs_ref[i]->bottom_field->pic_num == picNumX) + { + structure = BOTTOM_FIELD; + break; + } + } + } + if (structure==FRAME) + { + error ("field for long term marking not found",200); + } + + unmark_long_term_field_for_reference_by_frame_idx(structure, long_term_frame_idx, 0, 0, picNumX); + } + + mark_pic_long_term(p, long_term_frame_idx, picNumX); + } + + /*! + ************************************************************************ + * \brief + * Set new max long_term_frame_idx + ************************************************************************ + */ + void mm_update_max_long_term_frame_idx(int max_long_term_frame_idx_plus1) + { + unsigned i; + + dpb.max_long_term_pic_idx = max_long_term_frame_idx_plus1 - 1; + + // check for invalid frames + for (i=0; ilong_term_frame_idx > dpb.max_long_term_pic_idx) + { + unmark_for_long_term_reference(dpb.fs_ltref[i]); + } + } + } + + + /*! + ************************************************************************ + * \brief + * Mark all long term reference pictures unused for reference + ************************************************************************ + */ + static void mm_unmark_all_long_term_for_reference () + { + mm_update_max_long_term_frame_idx(0); + } + + /*! + ************************************************************************ + * \brief + * Mark all short term reference pictures unused for reference + ************************************************************************ + */ + static void mm_unmark_all_short_term_for_reference () + { + unsigned int i; + for (i=0; istructure == FRAME) + { + unmark_long_term_frame_for_reference_by_frame_idx(long_term_frame_idx); + } + else + { + unmark_long_term_field_for_reference_by_frame_idx(p->structure, long_term_frame_idx, 1, p->pic_num, 0); + } + + p->is_long_term = 1; + p->long_term_frame_idx = long_term_frame_idx; + } + + + /*! + ************************************************************************ + * \brief + * Perform Adaptive memory control decoded reference picture marking process + ************************************************************************ + */ + static void adaptive_memory_management(StorablePicture* p) + { + DecRefPicMarking_t *tmp_drpm; + + img->last_has_mmco_5 = 0; + + assert (!p->idr_flag); + assert (p->adaptive_ref_pic_buffering_flag); + + while (p->dec_ref_pic_marking_buffer) + { + tmp_drpm = p->dec_ref_pic_marking_buffer; + switch (tmp_drpm->memory_management_control_operation) + { + case 0: + if (tmp_drpm->Next != NULL) + { + error ("memory_management_control_operation = 0 not last operation in buffer", 500); + } + break; + case 1: + mm_unmark_short_term_for_reference(p, tmp_drpm->difference_of_pic_nums_minus1); + update_ref_list(); + break; + case 2: + mm_unmark_long_term_for_reference(p, tmp_drpm->long_term_pic_num); + update_ltref_list(); + break; + case 3: + mm_assign_long_term_frame_idx(p, tmp_drpm->difference_of_pic_nums_minus1, tmp_drpm->long_term_frame_idx); + update_ref_list(); + update_ltref_list(); + break; + case 4: + mm_update_max_long_term_frame_idx (tmp_drpm->max_long_term_frame_idx_plus1); + update_ltref_list(); + break; + case 5: + mm_unmark_all_short_term_for_reference(); + mm_unmark_all_long_term_for_reference(); + img->last_has_mmco_5 = 1; + break; + case 6: + mm_mark_current_picture_long_term(p, tmp_drpm->long_term_frame_idx); + check_num_ref(); + break; + default: + error ("invalid memory_management_control_operation in buffer", 500); + } + p->dec_ref_pic_marking_buffer = tmp_drpm->Next; + free (tmp_drpm); + } + if ( img->last_has_mmco_5 ) + { + p->pic_num = p->frame_num = 0; + + switch (p->structure) + { + case TOP_FIELD: + { + p->poc = p->top_poc = img->toppoc =0; + break; + } + case BOTTOM_FIELD: + { + p->poc = p->bottom_poc = img->bottompoc = 0; + break; + } + case FRAME: + { + p->top_poc -= p->poc; + p->bottom_poc -= p->poc; + + img->toppoc = p->top_poc; + img->bottompoc = p->bottom_poc; + + p->poc = min (p->top_poc, p->bottom_poc); + img->framepoc = p->poc; + break; + } + } + img->ThisPOC = p->poc; + flush_dpb(); + } + } + + + /*! + ************************************************************************ + * \brief + * Store a picture in DPB. This includes cheking for space in DPB and + * flushing frames. + * If we received a frame, we need to check for a new store, if we + * got a field, check if it's the second field of an already allocated + * store. + * + * \param p + * Picture to be stored + * + ************************************************************************ + */ + void store_picture_in_dpb(StorablePicture* p) + { + unsigned i; + int poc, pos; + // picture error concealment + extern int pocs_in_dpb[100]; + // diagnostics + //printf ("Storing (%s) non-ref pic with frame_num #%d\n", (p->type == FRAME)?"FRAME":(p->type == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num); + // if frame, check for new store, + assert (p!=NULL); + + img->last_has_mmco_5=0; + img->last_pic_bottom_field = (p->structure == BOTTOM_FIELD); + + if (p->idr_flag) + { + idr_memory_management(p); + // picture error concealment + memset(pocs_in_dpb, 0, sizeof(int)*100); + } + else + { + // adaptive memory management + if (p->used_for_reference && (p->adaptive_ref_pic_buffering_flag)) + adaptive_memory_management(p); + } + + if ((p->structure==TOP_FIELD)||(p->structure==BOTTOM_FIELD)) + { + // check for frame store with same pic_number + if (dpb.last_picture) + { + if ((int)dpb.last_picture->frame_num == p->pic_num) + { + if (((p->structure==TOP_FIELD)&&(dpb.last_picture->is_used==2))||((p->structure==BOTTOM_FIELD)&&(dpb.last_picture->is_used==1))) + { + if ((p->used_for_reference && (dpb.last_picture->is_orig_reference!=0))|| + (!p->used_for_reference && (dpb.last_picture->is_orig_reference==0))) + { + insert_picture_in_dpb(dpb.last_picture, p); + update_ref_list(); + update_ltref_list(); + dump_dpb(); + dpb.last_picture = NULL; + return; + } + } + } + } + } + + // this is a frame or a field which has no stored complementary field + + // sliding window, if necessary + if ((!p->idr_flag)&&(p->used_for_reference && (!p->adaptive_ref_pic_buffering_flag))) + { + sliding_window_memory_management(p); + } + + // picture error concealment + if(img->conceal_mode != 0) + for(i=0;iis_reference) + dpb.fs[i]->concealment_reference = 1; + + // first try to remove unused frames + if (dpb.used_size==dpb.size) + { + // picture error concealment + if (img->conceal_mode != 0) + conceal_non_ref_pics(2); + remove_unused_frame_from_dpb(); + + if(img->conceal_mode != 0) + sliding_window_poc_management(p); + } + + // then output frames until one can be removed + while (dpb.used_size==dpb.size) + { + // non-reference frames may be output directly + if (!p->used_for_reference) + { + get_smallest_poc(&poc, &pos); + if ((-1==pos) || (p->poc < poc)) + { + direct_output(p, p_out); + return; + } + } + // flush a frame + output_one_frame_from_dpb(); + } + + // check for duplicate frame number in short term reference buffer + if ((p->used_for_reference)&&(!p->is_long_term)) + { + for (i=0; iframe_num == p->frame_num) + { + error("duplicate frame_num in short-term reference picture buffer", 500); + } + } + + } + // store at end of buffer + // printf ("store frame/field at pos %d\n",dpb.used_size); + insert_picture_in_dpb(dpb.fs[dpb.used_size],p); + + // picture error concealment + if (p->idr_flag) + { + img->earlier_missing_poc = 0; + } + + if (p->structure != FRAME) + { + dpb.last_picture = dpb.fs[dpb.used_size]; + } + else + { + dpb.last_picture = NULL; + } + + dpb.used_size++; + + if(img->conceal_mode != 0) + pocs_in_dpb[dpb.used_size-1] = p->poc; + + update_ref_list(); + update_ltref_list(); + + check_num_ref(); + + dump_dpb(); + } + + /*! + ************************************************************************ + * \brief + * Insert the picture into the DPB. A free DPB position is necessary + * for frames, . + * + * \param fs + * FrameStore into which the picture will be inserted + * \param p + * StorablePicture to be inserted + * + ************************************************************************ + */ + static void insert_picture_in_dpb(FrameStore* fs, StorablePicture* p) + { + // printf ("insert (%s) pic with frame_num #%d, poc %d\n", (p->structure == FRAME)?"FRAME":(p->structure == TOP_FIELD)?"TOP_FIELD":"BOTTOM_FIELD", p->pic_num, p->poc); + assert (p!=NULL); + assert (fs!=NULL); + switch (p->structure) + { + case FRAME: + fs->frame = p; + fs->is_used = 3; + if (p->used_for_reference) + { + fs->is_reference = 3; + fs->is_orig_reference = 3; + if (p->is_long_term) + { + fs->is_long_term = 3; + fs->long_term_frame_idx = p->long_term_frame_idx; + } + } + // generate field views + dpb_split_field(fs); + break; + case TOP_FIELD: + fs->top_field = p; + fs->is_used |= 1; + if (p->used_for_reference) + { + fs->is_reference |= 1; + fs->is_orig_reference |= 1; + if (p->is_long_term) + { + fs->is_long_term |= 1; + fs->long_term_frame_idx = p->long_term_frame_idx; + } + } + if (fs->is_used == 3) + { + // generate frame view + dpb_combine_field(fs); + } else + { + fs->poc = p->poc; + gen_field_ref_ids(p); + } + break; + case BOTTOM_FIELD: + fs->bottom_field = p; + fs->is_used |= 2; + if (p->used_for_reference) + { + fs->is_reference |= 2; + fs->is_orig_reference |= 2; + if (p->is_long_term) + { + fs->is_long_term |= 2; + fs->long_term_frame_idx = p->long_term_frame_idx; + } + } + if (fs->is_used == 3) + { + // generate frame view + dpb_combine_field(fs); + } else + { + fs->poc = p->poc; + gen_field_ref_ids(p); + } + break; + } + fs->frame_num = p->pic_num; + fs->is_output = p->is_output; + + if (fs->is_used==3) + { + if (-1!=p_ref) + find_snr(snr, fs->frame, p_ref); + } + } + + /*! + ************************************************************************ + * \brief + * Check if one of the frames/fields in frame store is used for reference + ************************************************************************ + */ + static int is_used_for_reference(FrameStore* fs) + { + if (fs->is_reference) + { + return 1; + } + + if (fs->is_used == 3) // frame + { + if (fs->frame->used_for_reference) + { + return 1; + } + } + + if (fs->is_used & 1) // top field + { + if (fs->top_field) + { + if (fs->top_field->used_for_reference) + { + return 1; + } + } + } + + if (fs->is_used & 2) // bottom field + { + if (fs->bottom_field) + { + if (fs->bottom_field->used_for_reference) + { + return 1; + } + } + } + return 0; + } + + + /*! + ************************************************************************ + * \brief + * Check if one of the frames/fields in frame store is used for short-term reference + ************************************************************************ + */ + static int is_short_term_reference(FrameStore* fs) + { + + if (fs->is_used==3) // frame + { + if ((fs->frame->used_for_reference)&&(!fs->frame->is_long_term)) + { + return 1; + } + } + + if (fs->is_used & 1) // top field + { + if (fs->top_field) + { + if ((fs->top_field->used_for_reference)&&(!fs->top_field->is_long_term)) + { + return 1; + } + } + } + + if (fs->is_used & 2) // bottom field + { + if (fs->bottom_field) + { + if ((fs->bottom_field->used_for_reference)&&(!fs->bottom_field->is_long_term)) + { + return 1; + } + } + } + return 0; + } + + + /*! + ************************************************************************ + * \brief + * Check if one of the frames/fields in frame store is used for short-term reference + ************************************************************************ + */ + static int is_long_term_reference(FrameStore* fs) + { + + if (fs->is_used==3) // frame + { + if ((fs->frame->used_for_reference)&&(fs->frame->is_long_term)) + { + return 1; + } + } + + if (fs->is_used & 1) // top field + { + if (fs->top_field) + { + if ((fs->top_field->used_for_reference)&&(fs->top_field->is_long_term)) + { + return 1; + } + } + } + + if (fs->is_used & 2) // bottom field + { + if (fs->bottom_field) + { + if ((fs->bottom_field->used_for_reference)&&(fs->bottom_field->is_long_term)) + { + return 1; + } + } + } + return 0; + } + + + /*! + ************************************************************************ + * \brief + * remove one frame from DPB + ************************************************************************ + */ + static void remove_frame_from_dpb(int pos) + { + FrameStore* fs = dpb.fs[pos]; + FrameStore* tmp; + unsigned i; + + // printf ("remove frame with frame_num #%d\n", fs->frame_num); + switch (fs->is_used) + { + case 3: + free_storable_picture(fs->frame); + free_storable_picture(fs->top_field); + free_storable_picture(fs->bottom_field); + fs->frame=NULL; + fs->top_field=NULL; + fs->bottom_field=NULL; + break; + case 2: + free_storable_picture(fs->bottom_field); + fs->bottom_field=NULL; + break; + case 1: + free_storable_picture(fs->top_field); + fs->top_field=NULL; + break; + case 0: + break; + default: + error("invalid frame store type",500); + } + fs->is_used = 0; + fs->is_long_term = 0; + fs->is_reference = 0; + fs->is_orig_reference = 0; + + // move empty framestore to end of buffer + tmp = dpb.fs[pos]; + + for (i=pos; idpb.fs[i]->poc)&&(!dpb.fs[i]->is_output)) + { + *poc = dpb.fs[i]->poc; + *pos=i; + } + } + } + + /*! + ************************************************************************ + * \brief + * Remove a picture from DPB which is no longer needed. + ************************************************************************ + */ + static int remove_unused_frame_from_dpb() + { + unsigned i; + + // check for frames that were already output and no longer used for reference + for (i=0; iis_output && (!is_used_for_reference(dpb.fs[i]))) + { + remove_frame_from_dpb(i); + return 1; + } + } + return 0; + } + + /*! + ************************************************************************ + * \brief + * Output one picture stored in the DPB. + ************************************************************************ + */ + static void output_one_frame_from_dpb() + { + int poc, pos; + //diagnostics + if (dpb.used_size<1) + { + error("Cannot output frame, DPB empty.",150); + } + + // find smallest POC + get_smallest_poc(&poc, &pos); + + if(pos==-1) + { + error("no frames for output available", 150); + } + + // call the output function + // printf ("output frame with frame_num #%d, poc %d (dpb. dpb.size=%d, dpb.used_size=%d)\n", dpb.fs[pos]->frame_num, dpb.fs[pos]->frame->poc, dpb.size, dpb.used_size); + + // picture error concealment + if(img->conceal_mode != 0) + { + if(dpb.last_output_poc == 0) + { + write_lost_ref_after_idr(pos); + } + write_lost_non_ref_pic(poc, p_out); + } + + // JVT-P072 ends + + write_stored_frame(dpb.fs[pos], p_out); + + // picture error concealment + if(img->conceal_mode == 0) + if (dpb.last_output_poc >= poc) + { + error ("output POC must be in ascending order", 150); + } + dpb.last_output_poc = poc; + // free frame store and move empty store to end of buffer + if (!is_used_for_reference(dpb.fs[pos])) + { + remove_frame_from_dpb(pos); + } + } + + + + /*! + ************************************************************************ + * \brief + * All stored picture are output. Should be called to empty the buffer + ************************************************************************ + */ + void flush_dpb() + { + unsigned i; + + //diagnostics + // printf("Flush remaining frames from dpb. dpb.size=%d, dpb.used_size=%d\n",dpb.size,dpb.used_size); + + if(img->conceal_mode == 0) + if (img->conceal_mode != 0) + conceal_non_ref_pics(0); + + // mark all frames unused + for (i=0; isize_x/4 ; i++) + { + for (j=0 ; jsize_y/4 ; j++) + { + dummylist0= p->ref_idx[LIST_0][j][i]; + dummylist1= p->ref_idx[LIST_1][j][i]; + //! association with id already known for fields. + p->ref_id[LIST_0][j][i] = (dummylist0>=0)? p->ref_pic_num[p->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + p->ref_id[LIST_1][j][i] = (dummylist1>=0)? p->ref_pic_num[p->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + p->field_frame[j][i]=1; + } + } + } + + /*! + ************************************************************************ + * \brief + * Extract top field from a frame + ************************************************************************ + */ + void dpb_split_field(FrameStore *fs) + { + int i, j, ii, jj, jj4; + int idiv,jdiv; + int currentmb; + int dummylist0,dummylist1; + int twosz16 = 2*(fs->frame->size_x/16); + + fs->poc = fs->frame->poc; + + if (!fs->frame->frame_mbs_only_flag) + { + fs->top_field = alloc_storable_picture(TOP_FIELD, fs->frame->size_x, fs->frame->size_y, fs->frame->size_x_cr, fs->frame->size_y_cr); + fs->bottom_field = alloc_storable_picture(BOTTOM_FIELD, fs->frame->size_x, fs->frame->size_y, fs->frame->size_x_cr, fs->frame->size_y_cr); + + for (i=0; iframe->size_y/2; i++) + { + memcpy(fs->top_field->imgY[i], fs->frame->imgY[i*2], fs->frame->size_x*sizeof(imgpel)); + } + + for (i=0; iframe->size_y_cr/2; i++) + { + memcpy(fs->top_field->imgUV[0][i], fs->frame->imgUV[0][i*2], fs->frame->size_x_cr*sizeof(imgpel)); + memcpy(fs->top_field->imgUV[1][i], fs->frame->imgUV[1][i*2], fs->frame->size_x_cr*sizeof(imgpel)); + } + + for (i=0; iframe->size_y/2; i++) + { + memcpy(fs->bottom_field->imgY[i], fs->frame->imgY[i*2 + 1], fs->frame->size_x*sizeof(imgpel)); + } + + for (i=0; iframe->size_y_cr/2; i++) + { + memcpy(fs->bottom_field->imgUV[0][i], fs->frame->imgUV[0][i*2 + 1], fs->frame->size_x_cr*sizeof(imgpel)); + memcpy(fs->bottom_field->imgUV[1][i], fs->frame->imgUV[1][i*2 + 1], fs->frame->size_x_cr*sizeof(imgpel)); + } + + fs->top_field->poc = fs->frame->top_poc; + fs->bottom_field->poc = fs->frame->bottom_poc; + + fs->top_field->frame_poc = fs->frame->frame_poc; + + fs->top_field->bottom_poc =fs->bottom_field->bottom_poc = fs->frame->bottom_poc; + fs->top_field->top_poc =fs->bottom_field->top_poc = fs->frame->top_poc; + fs->bottom_field->frame_poc = fs->frame->frame_poc; + + fs->top_field->used_for_reference = fs->bottom_field->used_for_reference + = fs->frame->used_for_reference; + fs->top_field->is_long_term = fs->bottom_field->is_long_term + = fs->frame->is_long_term; + fs->long_term_frame_idx = fs->top_field->long_term_frame_idx + = fs->bottom_field->long_term_frame_idx + = fs->frame->long_term_frame_idx; + + fs->top_field->coded_frame = fs->bottom_field->coded_frame = 1; + fs->top_field->MbaffFrameFlag = fs->bottom_field->MbaffFrameFlag + = fs->frame->MbaffFrameFlag; + + fs->frame->top_field = fs->top_field; + fs->frame->bottom_field = fs->bottom_field; + + fs->top_field->bottom_field = fs->bottom_field; + fs->top_field->frame = fs->frame; + fs->bottom_field->top_field = fs->top_field; + fs->bottom_field->frame = fs->frame; + + fs->top_field->chroma_format_idc = fs->bottom_field->chroma_format_idc = fs->frame->chroma_format_idc; + + //store reference picture index + for (j=0; j<=fs->frame->max_slice_id; j++) + { + for (i=0;itop_field->ref_pic_num[j][LIST_1][2*i] =fs->frame->ref_pic_num[j][2 + LIST_1][2*i]; + fs->top_field->ref_pic_num[j][LIST_1][2*i + 1] =fs->frame->ref_pic_num[j][2 + LIST_1][2*i+1]; + fs->bottom_field->ref_pic_num[j][LIST_1][2*i] =fs->frame->ref_pic_num[j][4 + LIST_1][2*i]; + fs->bottom_field->ref_pic_num[j][LIST_1][2*i+1]=fs->frame->ref_pic_num[j][4 + LIST_1][2*i+1] ; + } + + for (i=0;itop_field->ref_pic_num[j][LIST_0][2*i] =fs->frame->ref_pic_num[j][2 + LIST_0][2*i]; + fs->top_field->ref_pic_num[j][LIST_0][2*i + 1] =fs->frame->ref_pic_num[j][2 + LIST_0][2*i+1]; + fs->bottom_field->ref_pic_num[j][LIST_0][2*i] =fs->frame->ref_pic_num[j][4 + LIST_0][2*i]; + fs->bottom_field->ref_pic_num[j][LIST_0][2*i+1]=fs->frame->ref_pic_num[j][4 + LIST_0][2*i+1] ; + } + } + } + else + { + fs->top_field=NULL; + fs->bottom_field=NULL; + fs->frame->top_field=NULL; + fs->frame->bottom_field=NULL; + } + + for (j=0 ; jframe->size_y/4 ; j++) + { + jdiv=j/4; + for (i=0 ; iframe->size_x/4 ; i++) + { + idiv=i/4; + currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2); + + if (fs->frame->MbaffFrameFlag && fs->frame->mb_field[currentmb]) + { + int list_offset = currentmb%2? 4: 2; + dummylist0 = fs->frame->ref_idx[LIST_0][j][i]; + dummylist1 = fs->frame->ref_idx[LIST_1][j][i]; + //! association with id already known for fields. + fs->frame->ref_id[LIST_0 + list_offset][j][i] = (dummylist0>=0)? fs->frame->ref_pic_num[fs->frame->slice_id[jdiv][idiv]][LIST_0 + list_offset][dummylist0] : 0; + fs->frame->ref_id[LIST_1 + list_offset][j][i] = (dummylist1>=0)? fs->frame->ref_pic_num[fs->frame->slice_id[jdiv][idiv]][LIST_1 + list_offset][dummylist1] : 0; + //! need to make association with frames + fs->frame->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->frm_ref_pic_num[fs->frame->slice_id[jdiv][idiv]][LIST_0 + list_offset][dummylist0] : 0; + fs->frame->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->frm_ref_pic_num[fs->frame->slice_id[jdiv][idiv]][LIST_1 + list_offset][dummylist1] : 0; + + } + else + { + dummylist0 = fs->frame->ref_idx[LIST_0][j][i]; + dummylist1 = fs->frame->ref_idx[LIST_1][j][i]; + fs->frame->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->ref_pic_num[fs->frame->slice_id[jdiv][idiv]][LIST_0][dummylist0] : -1; + fs->frame->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->ref_pic_num[fs->frame->slice_id[jdiv][idiv]][LIST_1][dummylist1] : -1; + } + } + } + + if (!fs->frame->frame_mbs_only_flag && fs->frame->MbaffFrameFlag) + { + for (j=0 ; jframe->size_y/8; j++) + { + jj = (j/4)*8 + j%4; + jj4 = jj + 4; + jdiv=j/2; + for (i=0 ; iframe->size_x/4 ; i++) + { + idiv=i/4; + + currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2); + // Assign field mvs attached to MB-Frame buffer to the proper buffer + if (fs->frame->mb_field[currentmb]) + { + fs->bottom_field->field_frame[j][i] = fs->top_field->field_frame[j][i]=1; + fs->frame->field_frame[2*j][i] = fs->frame->field_frame[2*j+1][i]=1; + + fs->bottom_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj4][i][0]; + fs->bottom_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj4][i][1]; + fs->bottom_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj4][i][0]; + fs->bottom_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj4][i][1]; + fs->bottom_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj4][i]; + fs->bottom_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj4][i]; + fs->bottom_field->ref_id[LIST_0][j][i] = fs->frame->ref_id[LIST_0+4][jj4][i]; + fs->bottom_field->ref_id[LIST_1][j][i] = fs->frame->ref_id[LIST_1+4][jj4][i]; + + + fs->top_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj][i][0]; + fs->top_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj][i][1]; + fs->top_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj][i][0]; + fs->top_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj][i][1]; + fs->top_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj][i]; + fs->top_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj][i]; + fs->top_field->ref_id[LIST_0][j][i] = fs->frame->ref_id[LIST_0+2][jj][i]; + fs->top_field->ref_id[LIST_1][j][i] = fs->frame->ref_id[LIST_1+2][jj][i]; + } + } + } + } + + //! Generate field MVs from Frame MVs + if (!fs->frame->frame_mbs_only_flag) + { + for (j=0 ; jframe->size_y/8 ; j++) + { + jj = 2* RSD(j); + jdiv = j/2; + for (i=0 ; iframe->size_x/4 ; i++) + { + ii = RSD(i); + idiv = i/4; + + currentmb = twosz16*(jdiv/2)+ (idiv)*2 + (jdiv%2); + + if (!fs->frame->MbaffFrameFlag || !fs->frame->mb_field[currentmb]) + { + fs->frame->field_frame[2*j+1][i] = fs->frame->field_frame[2*j][i]=0; + + fs->top_field->field_frame[j][i] = fs->bottom_field->field_frame[j][i] = 0; + + fs->top_field->mv[LIST_0][j][i][0] = fs->bottom_field->mv[LIST_0][j][i][0] = fs->frame->mv[LIST_0][jj][ii][0]; + fs->top_field->mv[LIST_0][j][i][1] = fs->bottom_field->mv[LIST_0][j][i][1] = fs->frame->mv[LIST_0][jj][ii][1]; + fs->top_field->mv[LIST_1][j][i][0] = fs->bottom_field->mv[LIST_1][j][i][0] = fs->frame->mv[LIST_1][jj][ii][0]; + fs->top_field->mv[LIST_1][j][i][1] = fs->bottom_field->mv[LIST_1][j][i][1] = fs->frame->mv[LIST_1][jj][ii][1]; + + // Scaling of references is done here since it will not affect spatial direct (2*0 =0) + if (fs->frame->ref_idx[LIST_0][jj][ii] == -1) + fs->top_field->ref_idx[LIST_0][j][i] = fs->bottom_field->ref_idx[LIST_0][j][i] = - 1; + else + { + dummylist0=fs->top_field->ref_idx[LIST_0][j][i] = fs->bottom_field->ref_idx[LIST_0][j][i] = fs->frame->ref_idx[LIST_0][jj][ii] ; + fs->top_field ->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->top_ref_pic_num[fs->frame->slice_id[jj>>2][ii>>2]][LIST_0][dummylist0] : 0; + fs->bottom_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->frame->bottom_ref_pic_num[fs->frame->slice_id[jj>>2][ii>>2]][LIST_0][dummylist0] : 0; + } + + if (fs->frame->ref_idx[LIST_1][jj][ii] == -1) + fs->top_field->ref_idx[LIST_1][j][i] = fs->bottom_field->ref_idx[LIST_1][j][i] = - 1; + else + { + dummylist1=fs->top_field->ref_idx[LIST_1][j][i] = fs->bottom_field->ref_idx[LIST_1][j][i] = fs->frame->ref_idx[LIST_1][jj][ii]; + + fs->top_field ->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->top_ref_pic_num[fs->frame->slice_id[jj>>2][ii>>2]][LIST_1][dummylist1] : 0; + fs->bottom_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->frame->bottom_ref_pic_num[fs->frame->slice_id[jj>>2][ii>>2]][LIST_1][dummylist1] : 0; + } + } + else + { + fs->frame->field_frame[2*j+1][i] = fs->frame->field_frame[2*j][i]= fs->frame->mb_field[currentmb]; + } + } + } + } + else + { + memset( &(fs->frame->field_frame[0][0]), 0, fs->frame->size_y * fs->frame->size_x /16 * sizeof(byte)); + } + } + + + /*! + ************************************************************************ + * \brief + * Generate a frame from top and bottom fields, + * YUV components and display information only + ************************************************************************ + */ + void dpb_combine_field_yuv(FrameStore *fs) + { + int i; + + fs->frame = alloc_storable_picture(FRAME, fs->top_field->size_x, fs->top_field->size_y*2, fs->top_field->size_x_cr, fs->top_field->size_y_cr*2); + + for (i=0; itop_field->size_y; i++) + { + memcpy(fs->frame->imgY[i*2], fs->top_field->imgY[i] , fs->top_field->size_x*sizeof(imgpel)); // top field + memcpy(fs->frame->imgY[i*2 + 1], fs->bottom_field->imgY[i], fs->bottom_field->size_x*sizeof(imgpel)); // bottom field + } + + for (i=0; itop_field->size_y_cr; i++) + { + memcpy(fs->frame->imgUV[0][i*2], fs->top_field->imgUV[0][i], fs->top_field->size_x_cr*sizeof(imgpel)); + memcpy(fs->frame->imgUV[0][i*2 + 1], fs->bottom_field->imgUV[0][i], fs->bottom_field->size_x_cr*sizeof(imgpel)); + memcpy(fs->frame->imgUV[1][i*2], fs->top_field->imgUV[1][i], fs->top_field->size_x_cr*sizeof(imgpel)); + memcpy(fs->frame->imgUV[1][i*2 + 1], fs->bottom_field->imgUV[1][i], fs->bottom_field->size_x_cr*sizeof(imgpel)); + } + + fs->poc=fs->frame->poc =fs->frame->frame_poc = min (fs->top_field->poc, fs->bottom_field->poc); + + fs->bottom_field->frame_poc=fs->top_field->frame_poc=fs->frame->poc; + + fs->bottom_field->top_poc=fs->frame->top_poc=fs->top_field->poc; + fs->top_field->bottom_poc=fs->frame->bottom_poc=fs->bottom_field->poc; + + fs->frame->used_for_reference = (fs->top_field->used_for_reference && fs->bottom_field->used_for_reference ); + fs->frame->is_long_term = (fs->top_field->is_long_term && fs->bottom_field->is_long_term ); + + if (fs->frame->is_long_term) + fs->frame->long_term_frame_idx = fs->long_term_frame_idx; + + fs->frame->top_field = fs->top_field; + fs->frame->bottom_field = fs->bottom_field; + + fs->frame->coded_frame = 0; + + fs->frame->chroma_format_idc = fs->top_field->chroma_format_idc; + fs->frame->frame_cropping_flag = fs->top_field->frame_cropping_flag; + if (fs->frame->frame_cropping_flag) + { + fs->frame->frame_cropping_rect_top_offset = fs->top_field->frame_cropping_rect_top_offset; + fs->frame->frame_cropping_rect_bottom_offset = fs->top_field->frame_cropping_rect_bottom_offset; + fs->frame->frame_cropping_rect_left_offset = fs->top_field->frame_cropping_rect_left_offset; + fs->frame->frame_cropping_rect_right_offset = fs->top_field->frame_cropping_rect_right_offset; + } + + fs->top_field->frame = fs->bottom_field->frame = fs->frame; + } + + + /*! + ************************************************************************ + * \brief + * Generate a frame from top and bottom fields + ************************************************************************ + */ + void dpb_combine_field(FrameStore *fs) + { + int i,j, jj, jj4; + int dummylist0, dummylist1; + + dpb_combine_field_yuv(fs); + + + //combine field for frame + for (j=0; j<=(max(fs->top_field->max_slice_id, fs->bottom_field->max_slice_id)); j++) + { + for (i=0;i<(listXsize[LIST_1]+1)/2;i++) + { + fs->frame->ref_pic_num[j][LIST_1][i]= min ((fs->top_field->ref_pic_num[j][LIST_1][2*i]/2)*2, (fs->bottom_field->ref_pic_num[j][LIST_1][2*i]/2)*2); + } + + for (i=0;i<(listXsize[LIST_0]+1)/2;i++) + { + fs->frame->ref_pic_num[j][LIST_0][i] = min ((fs->top_field->ref_pic_num[j][LIST_0][2*i]/2)*2, (fs->bottom_field->ref_pic_num[j][LIST_0][2*i]/2)*2); + } + } + + //! Use inference flag to remap mvs/references + + //! Generate Frame parameters from field information. + for (j=0 ; jtop_field->size_y/4 ; j++) + { + jj = 8*(j/4) + (j%4); + jj4 = jj + 4; + for (i=0 ; itop_field->size_x/4 ; i++) + { + fs->frame->field_frame[jj][i]= fs->frame->field_frame[jj4][i]=1; + + fs->frame->mv[LIST_0][jj][i][0] = fs->top_field->mv[LIST_0][j][i][0]; + fs->frame->mv[LIST_0][jj][i][1] = fs->top_field->mv[LIST_0][j][i][1] ; + fs->frame->mv[LIST_1][jj][i][0] = fs->top_field->mv[LIST_1][j][i][0]; + fs->frame->mv[LIST_1][jj][i][1] = fs->top_field->mv[LIST_1][j][i][1] ; + + dummylist0=fs->frame->ref_idx[LIST_0][jj][i] = fs->top_field->ref_idx[LIST_0][j][i]; + dummylist1=fs->frame->ref_idx[LIST_1][jj][i] = fs->top_field->ref_idx[LIST_1][j][i]; + + //! association with id already known for fields. + fs->top_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->top_field->ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + fs->top_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->top_field->ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + + //! need to make association with frames + fs->frame->ref_id[LIST_0][jj][i] = (dummylist0>=0)? fs->top_field->frm_ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + fs->frame->ref_id[LIST_1][jj][i] = (dummylist1>=0)? fs->top_field->frm_ref_pic_num[fs->top_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + + fs->frame->mv[LIST_0][jj4][i][0] = fs->bottom_field->mv[LIST_0][j][i][0]; + fs->frame->mv[LIST_0][jj4][i][1] = fs->bottom_field->mv[LIST_0][j][i][1] ; + fs->frame->mv[LIST_1][jj4][i][0] = fs->bottom_field->mv[LIST_1][j][i][0]; + fs->frame->mv[LIST_1][jj4][i][1] = fs->bottom_field->mv[LIST_1][j][i][1] ; + + dummylist0=fs->frame->ref_idx[LIST_0][jj4][i] = fs->bottom_field->ref_idx[LIST_0][j][i]; + dummylist1=fs->frame->ref_idx[LIST_1][jj4][i] = fs->bottom_field->ref_idx[LIST_1][j][i]; + + fs->bottom_field->ref_id[LIST_0][j][i] = (dummylist0>=0)? fs->bottom_field->ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : 0; + fs->bottom_field->ref_id[LIST_1][j][i] = (dummylist1>=0)? fs->bottom_field->ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : 0; + + //! need to make association with frames + fs->frame->ref_id[LIST_0][jj4][i] = (dummylist0>=0)? fs->bottom_field->frm_ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_0][dummylist0] : -1; + fs->frame->ref_id[LIST_1][jj4][i] = (dummylist1>=0)? fs->bottom_field->frm_ref_pic_num[fs->bottom_field->slice_id[j>>2][i>>2]][LIST_1][dummylist1] : -1; + + fs->top_field->field_frame[j][i]=1; + fs->bottom_field->field_frame[j][i]=1; + } + } + } + + + /*! + ************************************************************************ + * \brief + * Allocate memory for buffering of reference picture reordering commands + ************************************************************************ + */ + void alloc_ref_pic_list_reordering_buffer(Slice *currSlice) + { + int size = img->num_ref_idx_l0_active+1; + + if (img->type!=I_SLICE && img->type!=SI_SLICE) + { + if ((currSlice->reordering_of_pic_nums_idc_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: reordering_of_pic_nums_idc_l0"); + if ((currSlice->abs_diff_pic_num_minus1_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l0"); + if ((currSlice->long_term_pic_idx_l0 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l0"); + } + else + { + currSlice->reordering_of_pic_nums_idc_l0 = NULL; + currSlice->abs_diff_pic_num_minus1_l0 = NULL; + currSlice->long_term_pic_idx_l0 = NULL; + } + + size = img->num_ref_idx_l1_active+1; + + if (img->type==B_SLICE) + { + if ((currSlice->reordering_of_pic_nums_idc_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: reordering_of_pic_nums_idc_l1"); + if ((currSlice->abs_diff_pic_num_minus1_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: abs_diff_pic_num_minus1_l1"); + if ((currSlice->long_term_pic_idx_l1 = calloc(size,sizeof(int)))==NULL) no_mem_exit("alloc_ref_pic_list_reordering_buffer: long_term_pic_idx_l1"); + } + else + { + currSlice->reordering_of_pic_nums_idc_l1 = NULL; + currSlice->abs_diff_pic_num_minus1_l1 = NULL; + currSlice->long_term_pic_idx_l1 = NULL; + } + } + + + /*! + ************************************************************************ + * \brief + * Free memory for buffering of reference picture reordering commands + ************************************************************************ + */ + void free_ref_pic_list_reordering_buffer(Slice *currSlice) + { + + if (currSlice->reordering_of_pic_nums_idc_l0) + free(currSlice->reordering_of_pic_nums_idc_l0); + if (currSlice->abs_diff_pic_num_minus1_l0) + free(currSlice->abs_diff_pic_num_minus1_l0); + if (currSlice->long_term_pic_idx_l0) + free(currSlice->long_term_pic_idx_l0); + + currSlice->reordering_of_pic_nums_idc_l0 = NULL; + currSlice->abs_diff_pic_num_minus1_l0 = NULL; + currSlice->long_term_pic_idx_l0 = NULL; + + if (currSlice->reordering_of_pic_nums_idc_l1) + free(currSlice->reordering_of_pic_nums_idc_l1); + if (currSlice->abs_diff_pic_num_minus1_l1) + free(currSlice->abs_diff_pic_num_minus1_l1); + if (currSlice->long_term_pic_idx_l1) + free(currSlice->long_term_pic_idx_l1); + + currSlice->reordering_of_pic_nums_idc_l1 = NULL; + currSlice->abs_diff_pic_num_minus1_l1 = NULL; + currSlice->long_term_pic_idx_l1 = NULL; + } + + /*! + ************************************************************************ + * \brief + * Tian Dong + * June 13, 2002, Modifed on July 30, 2003 + * + * If a gap in frame_num is found, try to fill the gap + * \param img + * + ************************************************************************ + */ + void fill_frame_num_gap(ImageParameters *img) + { + int CurrFrameNum; + int UnusedShortTermFrameNum; + StorablePicture *picture = NULL; + int tmp1 = img->delta_pic_order_cnt[0]; + int tmp2 = img->delta_pic_order_cnt[1]; + img->delta_pic_order_cnt[0] = img->delta_pic_order_cnt[1] = 0; + + // printf("A gap in frame number is found, try to fill it.\n"); + + + UnusedShortTermFrameNum = (img->pre_frame_num + 1) % img->MaxFrameNum; + CurrFrameNum = img->frame_num; + + while (CurrFrameNum != UnusedShortTermFrameNum) + { + picture = alloc_storable_picture (FRAME, img->width, img->height, img->width_cr, img->height_cr); + picture->coded_frame = 1; + picture->pic_num = UnusedShortTermFrameNum; + picture->frame_num = UnusedShortTermFrameNum; + picture->non_existing = 1; + picture->is_output = 1; + picture->used_for_reference = 1; + + picture->adaptive_ref_pic_buffering_flag = 0; + + img->frame_num = UnusedShortTermFrameNum; + if (active_sps->pic_order_cnt_type!=0) + { + decode_poc(img); + } + picture->top_poc=img->toppoc; + picture->bottom_poc=img->bottompoc; + picture->frame_poc=img->framepoc; + picture->poc=img->framepoc; + + store_picture_in_dpb(picture); + + picture=NULL; + img->pre_frame_num = UnusedShortTermFrameNum; + UnusedShortTermFrameNum = (UnusedShortTermFrameNum + 1) % img->MaxFrameNum; + } + img->delta_pic_order_cnt[0] = tmp1; + img->delta_pic_order_cnt[1] = tmp2; + img->frame_num = CurrFrameNum; + + } + + /*! + ************************************************************************ + * \brief + * Allocate co-located memory + * + * \param size_x + * horizontal luma size + * \param size_y + * vertical luma size + * \param mb_adaptive_frame_field_flag + * flag that indicates macroblock adaptive frame/field coding + * + * \return + * the allocated StorablePicture structure + ************************************************************************ + */ + ColocatedParams* alloc_colocated(int size_x, int size_y, int mb_adaptive_frame_field_flag) + { + ColocatedParams *s; + + s = calloc(1, sizeof(ColocatedParams)); + if (NULL == s) + no_mem_exit("alloc_colocated: s"); + + s->size_x = size_x; + s->size_y = size_y; + + + get_mem3D ((byte****)(&(s->ref_idx)) , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + get_mem3Dint64 (&(s->ref_pic_id), 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + get_mem4Dshort (&(s->mv) , 2, size_y / BLOCK_SIZE, size_x / BLOCK_SIZE,2 ); + + get_mem2D (&(s->moving_block), size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + get_mem2D (&(s->field_frame) , size_y / BLOCK_SIZE, size_x / BLOCK_SIZE); + + if (mb_adaptive_frame_field_flag) + { + get_mem3D ((byte****)(&(s->top_ref_idx)) , 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE); + get_mem3Dint64 (&(s->top_ref_pic_id), 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE); + get_mem4Dshort (&(s->top_mv), 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2); + get_mem2D (&(s->top_moving_block), size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE); + + get_mem3D ((byte****)(&(s->bottom_ref_idx)), 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE); + get_mem3Dint64 (&(s->bottom_ref_pic_id), 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE); + get_mem4Dshort (&(s->bottom_mv), 2, size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE, 2); + get_mem2D (&(s->bottom_moving_block), size_y / BLOCK_SIZE/2, size_x / BLOCK_SIZE); + } + + s->mb_adaptive_frame_field_flag = mb_adaptive_frame_field_flag; + + return s; + } + + /*! + ************************************************************************ + * \brief + * Free co-located memory. + * + * \param p + * Picture to be freed + * + ************************************************************************ + */ + void free_colocated(ColocatedParams* p) + { + if (p) + { + free_mem3D ((byte***)p->ref_idx, 2); + free_mem3Dint64 (p->ref_pic_id, 2); + free_mem4Dshort (p->mv, 2, p->size_y / BLOCK_SIZE); + + if (p->moving_block) + { + free_mem2D (p->moving_block); + p->moving_block=NULL; + } + if (p->field_frame) + { + free_mem2D (p->field_frame); + p->field_frame=NULL; + } + + if (p->mb_adaptive_frame_field_flag) + { + free_mem3D ((byte***)p->top_ref_idx, 2); + free_mem3Dint64 (p->top_ref_pic_id, 2); + free_mem4Dshort (p->top_mv, 2, p->size_y / BLOCK_SIZE / 2); + + + if (p->top_moving_block) + { + free_mem2D (p->top_moving_block); + p->top_moving_block=NULL; + } + + free_mem3D ((byte***)p->bottom_ref_idx, 2); + free_mem3Dint64 (p->bottom_ref_pic_id, 2); + free_mem4Dshort (p->bottom_mv, 2, p->size_y / BLOCK_SIZE / 2); + + + if (p->bottom_moving_block) + { + free_mem2D (p->bottom_moving_block); + p->bottom_moving_block=NULL; + } + + } + + free(p); + + p=NULL; + } + } + + /*! + ************************************************************************ + * \brief + * Compute co-located motion info + * + ************************************************************************ + */ + + void compute_colocated(ColocatedParams* p, StorablePicture **listX[6]) + { + StorablePicture *fs, *fs_top, *fs_bottom; + int i,j, ii, jj, jdiv; + + fs_top=fs_bottom=fs = listX[LIST_1 ][0]; + + if (img->MbaffFrameFlag) + { + fs_top= listX[LIST_1 + 2][0]; + fs_bottom= listX[LIST_1 + 4][0]; + } + else + { + if (img->field_pic_flag) + { + if ((img->structure != fs->structure) && (fs->coded_frame)) + { + if (img->structure==TOP_FIELD) + { + fs_top=fs_bottom=fs = listX[LIST_1 ][0]->top_field; + } + else + { + fs_top=fs_bottom=fs = listX[LIST_1 ][0]->bottom_field; + } + } + } + } + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + for (j=0 ; jsize_y/4 ; j++) + { + jdiv = j/2; + jj = j/2 + 4 * (j/8); + for (i=0 ; isize_x/4 ; i++) + { + + if (img->MbaffFrameFlag && fs->field_frame[j][i]) + { + //! Assign frame buffers for field MBs + //! Check whether we should use top or bottom field mvs. + //! Depending on the assigned poc values. + + if (abs(dec_picture->poc - fs_bottom->poc)> abs(dec_picture->poc -fs_top->poc) ) + { + p->mv[LIST_0][j][i][0] = fs_top->mv[LIST_0][jdiv][i][0]; + p->mv[LIST_0][j][i][1] = fs_top->mv[LIST_0][jdiv][i][1] ; + p->mv[LIST_1][j][i][0] = fs_top->mv[LIST_1][jdiv][i][0]; + p->mv[LIST_1][j][i][1] = fs_top->mv[LIST_1][jdiv][i][1] ; + p->ref_idx[LIST_0][j][i] = fs_top->ref_idx[LIST_0][jdiv][i]; + p->ref_idx[LIST_1][j][i] = fs_top->ref_idx[LIST_1][jdiv][i]; + p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj][i]; + p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj][i]; + + p->is_long_term = fs_top->is_long_term; + } + else + { + p->mv[LIST_0][j][i][0] = fs_bottom->mv[LIST_0][jdiv][i][0]; + p->mv[LIST_0][j][i][1] = fs_bottom->mv[LIST_0][jdiv][i][1] ; + p->mv[LIST_1][j][i][0] = fs_bottom->mv[LIST_1][jdiv][i][0]; + p->mv[LIST_1][j][i][1] = fs_bottom->mv[LIST_1][jdiv][i][1] ; + p->ref_idx[LIST_0][j][i] = fs_bottom->ref_idx[LIST_0][jdiv][i]; + p->ref_idx[LIST_1][j][i] = fs_bottom->ref_idx[LIST_1][jdiv][i]; + p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj + 4][i]; + p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj + 4][i]; + + p->is_long_term = fs_bottom->is_long_term; + } + } + else + { + p->mv[LIST_0][j][i][0] = fs->mv[LIST_0][j][i][0]; + p->mv[LIST_0][j][i][1] = fs->mv[LIST_0][j][i][1] ; + p->mv[LIST_1][j][i][0] = fs->mv[LIST_1][j][i][0]; + p->mv[LIST_1][j][i][1] = fs->mv[LIST_1][j][i][1] ; + p->ref_idx[LIST_0][j][i] = fs->ref_idx[LIST_0][j][i]; + p->ref_idx[LIST_1][j][i] = fs->ref_idx[LIST_1][j][i]; + p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][j][i]; + p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][j][i]; + + p->is_long_term = fs->is_long_term; + } + } + } + } + + + //! Generate field MVs from Frame MVs + if (img->structure || img->MbaffFrameFlag) + { + for (j=0 ; jsize_y/8 ; j++) + { + jj = RSD(j); + for (i=0 ; isize_x/4 ; i++) + { + ii = RSD(i); + //! Do nothing if macroblock as field coded in MB-AFF + if (!img->MbaffFrameFlag ) + { + p->mv[LIST_0][j][i][0] = fs->mv[LIST_0][jj][ii][0]; + p->mv[LIST_0][j][i][1] = fs->mv[LIST_0][jj][ii][1]; + p->mv[LIST_1][j][i][0] = fs->mv[LIST_1][jj][ii][0]; + p->mv[LIST_1][j][i][1] = fs->mv[LIST_1][jj][ii][1]; + + // Scaling of references is done here since it will not affect spatial direct (2*0 =0) + + if (fs->ref_idx[LIST_0][jj][ii] == -1) + { + p->ref_idx [LIST_0][j][i] = -1; + p->ref_pic_id[LIST_0][j][i] = -1; + } + else + { + p->ref_idx [LIST_0][j][i] = fs->ref_idx[LIST_0][jj][ii] ; + p->ref_pic_id[LIST_0][j][i] = fs->ref_id [LIST_0][jj][ii]; + } + + if (fs->ref_idx[LIST_1][jj][ii] == -1) + { + p->ref_idx [LIST_1][j][i] = -1; + p->ref_pic_id[LIST_1][j][i] = -1; + } + else + { + p->ref_idx [LIST_1][j][i] = fs->ref_idx[LIST_1][jj][ii]; + p->ref_pic_id[LIST_1][j][i] = fs->ref_id [LIST_1][jj][ii]; + } + + p->is_long_term = fs->is_long_term; + + if (img->direct_spatial_mv_pred_flag == 1) + { + p->moving_block[j][i] = + !((!p->is_long_term + && ((p->ref_idx[LIST_0][j][i] == 0) + && (abs(p->mv[LIST_0][j][i][0])>>1 == 0) + && (abs(p->mv[LIST_0][j][i][1])>>1 == 0))) + || ((p->ref_idx[LIST_0][j][i] == -1) + && (p->ref_idx[LIST_1][j][i] == 0) + && (abs(p->mv[LIST_1][j][i][0])>>1 == 0) + && (abs(p->mv[LIST_1][j][i][1])>>1 == 0))); + } + } + else + { + p->bottom_mv[LIST_0][j][i][0] = fs_bottom->mv[LIST_0][jj][ii][0]; + p->bottom_mv[LIST_0][j][i][1] = fs_bottom->mv[LIST_0][jj][ii][1]; + p->bottom_mv[LIST_1][j][i][0] = fs_bottom->mv[LIST_1][jj][ii][0]; + p->bottom_mv[LIST_1][j][i][1] = fs_bottom->mv[LIST_1][jj][ii][1]; + p->bottom_ref_idx[LIST_0][j][i] = fs_bottom->ref_idx[LIST_0][jj][ii]; + p->bottom_ref_idx[LIST_1][j][i] = fs_bottom->ref_idx[LIST_1][jj][ii]; + p->bottom_ref_pic_id[LIST_0][j][i] = fs_bottom->ref_id[LIST_0][jj][ii]; + p->bottom_ref_pic_id[LIST_1][j][i] = fs_bottom->ref_id[LIST_1][jj][ii]; + + if (img->direct_spatial_mv_pred_flag == 1) + { + p->bottom_moving_block[j][i] = + !((!fs_bottom->is_long_term + && ((p->bottom_ref_idx[LIST_0][j][i] == 0) + && (abs(p->bottom_mv[LIST_0][j][i][0])>>1 == 0) + && (abs(p->bottom_mv[LIST_0][j][i][1])>>1 == 0))) + || ((p->bottom_ref_idx[LIST_0][j][i] == -1) + && (p->bottom_ref_idx[LIST_1][j][i] == 0) + && (abs(p->bottom_mv[LIST_1][j][i][0])>>1 == 0) + && (abs(p->bottom_mv[LIST_1][j][i][1])>>1 == 0))); + } + + p->top_mv[LIST_0][j][i][0] = fs_top->mv[LIST_0][jj][ii][0]; + p->top_mv[LIST_0][j][i][1] = fs_top->mv[LIST_0][jj][ii][1]; + p->top_mv[LIST_1][j][i][0] = fs_top->mv[LIST_1][jj][ii][0]; + p->top_mv[LIST_1][j][i][1] = fs_top->mv[LIST_1][jj][ii][1]; + p->top_ref_idx[LIST_0][j][i] = fs_top->ref_idx[LIST_0][jj][ii]; + p->top_ref_idx[LIST_1][j][i] = fs_top->ref_idx[LIST_1][jj][ii]; + p->top_ref_pic_id[LIST_0][j][i] = fs_top->ref_id[LIST_0][jj][ii]; + p->top_ref_pic_id[LIST_1][j][i] = fs_top->ref_id[LIST_1][jj][ii]; + + if (img->direct_spatial_mv_pred_flag == 1) + { + p->top_moving_block[j][i] = + !((!fs_top->is_long_term + && ((p->top_ref_idx[LIST_0][j][i] == 0) + && (abs(p->top_mv[LIST_0][j][i][0])>>1 == 0) + && (abs(p->top_mv[LIST_0][j][i][1])>>1 == 0))) + || ((p->top_ref_idx[LIST_0][j][i] == -1) + && (p->top_ref_idx[LIST_1][j][i] == 0) + && (abs(p->top_mv[LIST_1][j][i][0])>>1 == 0) + && (abs(p->top_mv[LIST_1][j][i][1])>>1 == 0))); + } + + if ((img->direct_spatial_mv_pred_flag == 0 ) && !fs->field_frame[2*j][i]) + { + p->top_mv[LIST_0][j][i][1] /= 2; + p->top_mv[LIST_1][j][i][1] /= 2; + p->bottom_mv[LIST_0][j][i][1] /= 2; + p->bottom_mv[LIST_1][j][i][1] /= 2; + } + + } + } + } + } + + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + //! Use inference flag to remap mvs/references + //! Frame with field co-located + + if (!img->structure) + { + for (j=0 ; jsize_y/4 ; j++) + { + jdiv = j/2; + jj = j/2 + 4*(j/8); + for (i=0 ; isize_x/4 ; i++) + { + + if (fs->field_frame[j][i]) + { + if (abs(dec_picture->poc - fs->bottom_field->poc) > abs(dec_picture->poc - fs->top_field->poc)) + { + p->mv[LIST_0][j][i][0] = fs->top_field->mv[LIST_0][jdiv][i][0]; + p->mv[LIST_0][j][i][1] = fs->top_field->mv[LIST_0][jdiv][i][1] ; + p->mv[LIST_1][j][i][0] = fs->top_field->mv[LIST_1][jdiv][i][0]; + p->mv[LIST_1][j][i][1] = fs->top_field->mv[LIST_1][jdiv][i][1] ; + + p->ref_idx[LIST_0][j][i] = fs->top_field->ref_idx[LIST_0][jdiv][i]; + p->ref_idx[LIST_1][j][i] = fs->top_field->ref_idx[LIST_1][jdiv][i]; + p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj][i]; + p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj][i]; + p->is_long_term = fs->top_field->is_long_term; + } + else + { + p->mv[LIST_0][j][i][0] = fs->bottom_field->mv[LIST_0][jdiv][i][0]; + p->mv[LIST_0][j][i][1] = fs->bottom_field->mv[LIST_0][jdiv][i][1] ; + p->mv[LIST_1][j][i][0] = fs->bottom_field->mv[LIST_1][jdiv][i][0]; + p->mv[LIST_1][j][i][1] = fs->bottom_field->mv[LIST_1][jdiv][i][1] ; + + p->ref_idx[LIST_0][j][i] = fs->bottom_field->ref_idx[LIST_0][jdiv][i]; + p->ref_idx[LIST_1][j][i] = fs->bottom_field->ref_idx[LIST_1][jdiv][i]; + p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][jj + 4][i]; + p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][jj + 4][i]; + p->is_long_term = fs->bottom_field->is_long_term; + } + } + } + } + } + } + + + p->is_long_term = fs->is_long_term; + + if (!active_sps->frame_mbs_only_flag || active_sps->direct_8x8_inference_flag) + { + for (j=0 ; jsize_y/4 ; j++) + { + jj = RSD(j); + for (i=0 ; isize_x/4 ; i++) + { + ii = RSD(i); + + p->mv[LIST_0][j][i][0]=p->mv[LIST_0][jj][ii][0]; + p->mv[LIST_0][j][i][1]=p->mv[LIST_0][jj][ii][1]; + p->mv[LIST_1][j][i][0]=p->mv[LIST_1][jj][ii][0]; + p->mv[LIST_1][j][i][1]=p->mv[LIST_1][jj][ii][1]; + + p->ref_idx[LIST_0][j][i]=p->ref_idx[LIST_0][jj][ii]; + p->ref_idx[LIST_1][j][i]=p->ref_idx[LIST_1][jj][ii]; + p->ref_pic_id[LIST_0][j][i] = p->ref_pic_id[LIST_0][jj][ii]; + p->ref_pic_id[LIST_1][j][i] = p->ref_pic_id[LIST_1][jj][ii]; + + if (img->direct_spatial_mv_pred_flag == 1) + { + p->moving_block[j][i]= + !((!p->is_long_term + && ((p->ref_idx[LIST_0][j][i] == 0) + && (abs(p->mv[LIST_0][j][i][0])>>1 == 0) + && (abs(p->mv[LIST_0][j][i][1])>>1 == 0))) + || ((p->ref_idx[LIST_0][j][i] == -1) + && (p->ref_idx[LIST_1][j][i] == 0) + && (abs(p->mv[LIST_1][j][i][0])>>1 == 0) + && (abs(p->mv[LIST_1][j][i][1])>>1 == 0))); + } + } + } + } + else + { + for (j=0 ; jsize_y/4 ; j++) + { + jj = RSD(j); + for (i=0 ; isize_x/4 ; i++) + { + ii = RSD(i); + //! Use inference flag to remap mvs/references + p->mv[LIST_0][j][i][0]=fs->mv[LIST_0][j][i][0]; + p->mv[LIST_0][j][i][1]=fs->mv[LIST_0][j][i][1]; + p->mv[LIST_1][j][i][0]=fs->mv[LIST_1][j][i][0]; + p->mv[LIST_1][j][i][1]=fs->mv[LIST_1][j][i][1]; + + p->ref_idx[LIST_0][j][i]=fs->ref_idx[LIST_0][j][i]; + p->ref_idx[LIST_1][j][i]=fs->ref_idx[LIST_1][j][i]; + p->ref_pic_id[LIST_0][j][i] = fs->ref_id[LIST_0][j][i]; + p->ref_pic_id[LIST_1][j][i] = fs->ref_id[LIST_1][j][i]; + + if (img->direct_spatial_mv_pred_flag == 1) + { + p->moving_block[j][i]= + !((!p->is_long_term + && ((p->ref_idx[LIST_0][j][i] == 0) + && (abs(p->mv[LIST_0][j][i][0])>>1 == 0) + && (abs(p->mv[LIST_0][j][i][1])>>1 == 0))) + || ((p->ref_idx[LIST_0][j][i] == -1) + && (p->ref_idx[LIST_1][j][i] == 0) + && (abs(p->mv[LIST_1][j][i][0])>>1 == 0) + && (abs(p->mv[LIST_1][j][i][1])>>1 == 0))); + } + } + } + } + + + if (img->direct_spatial_mv_pred_flag ==0) + { + for (j=0 ; jsize_y/4 ; j++) + { + for (i=0 ; isize_x/4 ; i++) + { + if ((!img->MbaffFrameFlag &&!img->structure && fs->field_frame[j][i]) || (img->MbaffFrameFlag && fs->field_frame[j][i])) + { + p->mv[LIST_0][j][i][1] *= 2; + p->mv[LIST_1][j][i][1] *= 2; + } + else if (img->structure && !fs->field_frame[j][i]) + { + p->mv[LIST_0][j][i][1] /= 2; + p->mv[LIST_1][j][i][1] /= 2; + } + + } + } + + for (j=0; j<2 + (img->MbaffFrameFlag * 4);j+=2) + { + for (i=0; ipoc - listX[LIST_0 + j][i]->poc ); + } + else if (j == 2) + { + iTRb = Clip3( -128, 127, dec_picture->top_poc - listX[LIST_0 + j][i]->poc ); + } + else + { + iTRb = Clip3( -128, 127, dec_picture->bottom_poc - listX[LIST_0 + j][i]->poc ); + } + + iTRp = Clip3( -128, 127, listX[LIST_1 + j][0]->poc - listX[LIST_0 + j][i]->poc); + + if (iTRp!=0) + { + prescale = ( 16384 + abs( iTRp / 2 ) ) / iTRp; + img->mvscale[j][i] = Clip3( -1024, 1023, ( iTRb * prescale + 32 ) >> 6 ) ; + } + else + { + img->mvscale[j][i] = 9999; + } + } + } + } + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/mbuffer.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/mbuffer.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/mbuffer.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,218 ---- + + /*! + *********************************************************************** + * \file + * mbuffer.h + * + * \brief + * Frame buffer functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten S?hring + * - Jill Boyce + * - Saurav K Bandyopadhyay + * - Zhenyu Wu + * + *********************************************************************** + */ + #ifndef _MBUFFER_H_ + #define _MBUFFER_H_ + + #include "global.h" + + #define MAX_LIST_SIZE 33 + + //! definition a picture (field or frame) + typedef struct storable_picture + { + PictureStructure structure; + + int poc; + int top_poc; + int bottom_poc; + int frame_poc; + int64 ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + int64 frm_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + int64 top_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + int64 bottom_ref_pic_num [MAX_NUM_SLICES][6][MAX_LIST_SIZE]; + unsigned frame_num; + int pic_num; + int long_term_pic_num; + int long_term_frame_idx; + + int is_long_term; + int used_for_reference; + int is_output; + int non_existing; + + short max_slice_id; + + int size_x, size_y, size_x_cr, size_y_cr; + int chroma_vector_adjustment; + int coded_frame; + int MbaffFrameFlag; + unsigned PicWidthInMbs; + unsigned PicSizeInMbs; + + imgpel ** imgY; //!< Y picture component + imgpel *** imgUV; //!< U and V picture components + + byte * mb_field; //!< field macroblock indicator + + short ** slice_id; //!< reference picture [mb_x][mb_y] + + char *** ref_idx; //!< reference picture [list][subblock_y][subblock_x] + + int64 *** ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x] + // (not simply index) + + int64 *** ref_id; //!< reference picture identifier [list][subblock_y][subblock_x] + // (not simply index) + + short **** mv; //!< motion vector [list][subblock_y][subblock_x][component] + + byte ** moving_block; + byte ** field_frame; //!< indicates if co_located is field or frame. + + struct storable_picture *top_field; // for mb aff, if frame for referencing the top field + struct storable_picture *bottom_field; // for mb aff, if frame for referencing the bottom field + struct storable_picture *frame; // for mb aff, if field for referencing the combined frame + + int slice_type; + int idr_flag; + int no_output_of_prior_pics_flag; + int long_term_reference_flag; + int adaptive_ref_pic_buffering_flag; + + int chroma_format_idc; + int frame_mbs_only_flag; + int frame_cropping_flag; + int frame_cropping_rect_left_offset; + int frame_cropping_rect_right_offset; + int frame_cropping_rect_top_offset; + int frame_cropping_rect_bottom_offset; + int qp; + int chroma_qp_offset[2]; + int slice_qp_delta; + DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations + + // picture error concealment + int concealed_pic; //indicates if this is a concealed picutre + + } StorablePicture; + + + //! definition a picture (field or frame) + typedef struct colocated_params + { + int mb_adaptive_frame_field_flag; + int size_x, size_y; + + int64 ref_pic_num[6][MAX_LIST_SIZE]; + + char *** ref_idx; //!< reference picture [list][subblock_y][subblock_x] + int64 *** ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x] + short **** mv; //!< motion vector [list][subblock_y][subblock_x][component] + byte ** moving_block; + + // Top field params + int64 top_ref_pic_num[6][MAX_LIST_SIZE]; + char *** top_ref_idx; //!< reference picture [list][subblock_y][subblock_x] + int64 *** top_ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x] + short **** top_mv; //!< motion vector [list][subblock_y][subblock_x][component] + byte ** top_moving_block; + + // Bottom field params + int64 bottom_ref_pic_num[6][MAX_LIST_SIZE]; + char *** bottom_ref_idx; //!< reference picture [list][subblock_y][subblock_x] + int64 *** bottom_ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x] + short **** bottom_mv; //!< motion vector [list][subblock_y][subblock_x][component] + byte ** bottom_moving_block; + + byte is_long_term; + byte ** field_frame; //!< indicates if co_located is field or frame. + + } ColocatedParams; + + //! Frame Stores for Decoded Picture Buffer + typedef struct frame_store + { + int is_used; //!< 0=empty; 1=top; 2=bottom; 3=both fields (or frame) + int is_reference; //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used + int is_long_term; //!< 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used + int is_orig_reference; //!< original marking by nal_ref_idc: 0=not used for ref; 1=top used; 2=bottom used; 3=both fields (or frame) used + + int is_non_existent; + + unsigned frame_num; + int frame_num_wrap; + int long_term_frame_idx; + int is_output; + int poc; + + // picture error concealment + int concealment_reference; + + StorablePicture *frame; + StorablePicture *top_field; + StorablePicture *bottom_field; + + } FrameStore; + + + //! Decoded Picture Buffer + typedef struct decoded_picture_buffer + { + FrameStore **fs; + FrameStore **fs_ref; + FrameStore **fs_ltref; + unsigned size; + unsigned used_size; + unsigned ref_frames_in_buffer; + unsigned ltref_frames_in_buffer; + int last_output_poc; + int max_long_term_pic_idx; + + int init_done; + int num_ref_frames; + + FrameStore *last_picture; + } DecodedPictureBuffer; + + + extern DecodedPictureBuffer dpb; + extern StorablePicture **listX[6]; + extern int listXsize[6]; + + void init_dpb(); + void free_dpb(); + FrameStore* alloc_frame_store(); + void free_frame_store(FrameStore* f); + StorablePicture* alloc_storable_picture(PictureStructure type, int size_x, int size_y, int size_x_cr, int size_y_cr); + void free_storable_picture(StorablePicture* p); + void store_picture_in_dpb(StorablePicture* p); + void flush_dpb(); + + void dpb_split_field(FrameStore *fs); + void dpb_combine_field(FrameStore *fs); + void dpb_combine_field_yuv(FrameStore *fs); + + void init_lists(int currSliceType, PictureStructure currPicStructure); + void reorder_ref_pic_list(StorablePicture **list, int *list_size, + int num_ref_idx_lX_active_minus1, int *reordering_of_pic_nums_idc, + int *abs_diff_pic_num_minus1, int *long_term_pic_idx); + + void init_mbaff_lists(); + void alloc_ref_pic_list_reordering_buffer(Slice *currSlice); + void free_ref_pic_list_reordering_buffer(Slice *currSlice); + + void fill_frame_num_gap(ImageParameters *img); + + ColocatedParams* alloc_colocated(int size_x, int size_y,int mb_adaptive_frame_field_flag); + void free_colocated(ColocatedParams* p); + void compute_colocated(ColocatedParams* p, StorablePicture **listX[6]); + + #endif + Index: llvm-test/MultiSource/Applications/JM/ldecod/memalloc.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/memalloc.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/memalloc.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,591 ---- + + /*! + ************************************************************************ + * \file memalloc.c + * + * \brief + * Memory allocation and free helper funtions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + ************************************************************************ + */ + + #include + #include "memalloc.h" + + + /*! + ************************************************************************ + * \brief + * Allocate 2D memory array -> imgpel array2D[rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************/ + int get_mem2Dpel(imgpel ***array2D, int rows, int columns) + { + int i; + + if((*array2D = (imgpel**)calloc(rows, sizeof(imgpel*))) == NULL) + no_mem_exit("get_mem2Dpel: array2D"); + if(((*array2D)[0] = (imgpel* )calloc(rows*columns,sizeof(imgpel ))) == NULL) + no_mem_exit("get_mem2Dpel: array2D"); + + for(i=1 ; i imgpel array3D[frames][rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + int get_mem3Dpel(imgpel ****array3D, int frames, int rows, int columns) + { + int j; + + if(((*array3D) = (imgpel***)calloc(frames,sizeof(imgpel**))) == NULL) + no_mem_exit("get_mem3Dpel: array3D"); + + for(j=0;j unsigned char array2D[rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************/ + // Change 9-Aug-2001 P. List: dont allocate independant row arrays anymore + // but one complete array and move row-pointers to array. Now you can step + // to the next line with an offset of img->width + int get_mem2D(byte ***array2D, int rows, int columns) + { + int i; + + if((*array2D = (byte**)calloc(rows, sizeof(byte*))) == NULL) + no_mem_exit("get_mem2D: array2D"); + if(((*array2D)[0] = (byte* )calloc(columns*rows,sizeof(byte ))) == NULL) + no_mem_exit("get_mem2D: array2D"); + + for(i=1;i int array2D[rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + // same change as in get_mem2Dint + int get_mem2Dint(int ***array2D, int rows, int columns) + { + int i; + + if((*array2D = (int**)calloc(rows, sizeof(int*))) == NULL) + no_mem_exit("get_mem2Dint: array2D"); + if(((*array2D)[0] = (int* )calloc(rows*columns,sizeof(int ))) == NULL) + no_mem_exit("get_mem2Dint: array2D"); + + for(i=1 ; i int64 array2D[rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + // same change as in get_mem2Dint + int get_mem2Dint64(int64 ***array2D, int rows, int columns) + { + int i; + + if((*array2D = (int64**)calloc(rows, sizeof(int64*))) == NULL) + no_mem_exit("get_mem2Dint64: array2D"); + if(((*array2D)[0] = (int64* )calloc(rows*columns,sizeof(int64 ))) == NULL) + no_mem_exit("get_mem2Dint64: array2D"); + + for(i=1 ; i unsigned char array3D[frames][rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + // same change as in get_mem2Dint + int get_mem3D(byte ****array3D, int frames, int rows, int columns) + { + int j; + + if(((*array3D) = (byte***)calloc(frames,sizeof(byte**))) == NULL) + no_mem_exit("get_mem3D: array3D"); + + for(j=0;j int array3D[frames][rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + // same change as in get_mem2Dint + int get_mem3Dint(int ****array3D, int frames, int rows, int columns) + { + int j; + + if(((*array3D) = (int***)calloc(frames,sizeof(int**))) == NULL) + no_mem_exit("get_mem3Dint: array3D"); + + for(j=0;j int64 array3D[frames][rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + // same change as in get_mem2Dint + int get_mem3Dint64(int64 ****array3D, int frames, int rows, int columns) + { + int j; + + if(((*array3D) = (int64***)calloc(frames,sizeof(int64**))) == NULL) + no_mem_exit("get_mem3Dint64: array3D"); + + for(j=0;j int array3D[frames][rows][columns][component] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + // same change as in get_mem2Dint + int get_mem4Dint(int *****array4D, int idx, int frames, int rows, int columns ) + { + int j; + + if(((*array4D) = (int****)calloc(idx,sizeof(int**))) == NULL) + no_mem_exit("get_mem4Dint: array4D"); + + for(j=0;j short array2D[rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + int get_mem2Dshort(short ***array2D, int rows, int columns) + { + int i; + + if((*array2D = (short**)calloc(rows, sizeof(short*))) == NULL) + no_mem_exit("get_mem2Dshort: array2D"); + if(((*array2D)[0] = (short* )calloc(rows*columns,sizeof(short ))) == NULL) + no_mem_exit("get_mem2Dshort: array2D"); + + for(i=1 ; i short array3D[frames][rows][columns] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + int get_mem3Dshort(short ****array3D, int frames, int rows, int columns) + { + int j; + + if(((*array3D) = (short***)calloc(frames,sizeof(short**))) == NULL) + no_mem_exit("get_mem3Dshort: array3D"); + + for(j=0;j short array3D[frames][rows][columns][component] + * + * \par Output: + * memory size in bytes + ************************************************************************ + */ + int get_mem4Dshort(short *****array4D, int idx, int frames, int rows, int columns ) + { + int j; + + if(((*array4D) = (short****)calloc(idx,sizeof(short**))) == NULL) + no_mem_exit("get_mem4Dshort: array4D"); + + for(j=0;j + ************************************************************************ + */ + + #include "contributors.h" + + #include + #include + #include + #include + + #include "defines.h" + #include "global.h" + + + /*! + ************************************************************************ + * \brief + * Converts RBSP to string of data bits + * \param streamBuffer + * pointer to buffer containing data + * \param last_byte_pos + * position of the last byte containing data. + * \return last_byte_pos + * position of the last byte pos. If the last-byte was entirely a stuffing byte, + * it is removed, and the last_byte_pos is updated. + * + ************************************************************************/ + + int RBSPtoSODB(byte *streamBuffer, int last_byte_pos) + { + int ctr_bit, bitoffset; + + bitoffset = 0; + //find trailing 1 + ctr_bit = (streamBuffer[last_byte_pos-1] & (0x01< + ************************************************************************ + */ + + #include + + #include "contributors.h" + #include "global.h" + #include "elements.h" + + int assignSE2partition[][SE_MAX_ELEMENTS] = + { + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 // elementnumber (no not uncomment) + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, //!< all elements in one partition no data partitioning + { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0 } //!< three partitions per slice + }; + + int PartitionMode; + + /*! + ************************************************************************ + * \brief + * Resets the entries in the bitstream struct + ************************************************************************ + */ + void free_Partition(Bitstream *currStream) + { + byte *buf = currStream->streamBuffer; + + currStream->bitstream_length = 0; + currStream->frame_bitoffset = 0; + currStream->ei_flag =0; + memset (buf, 0x00, MAX_CODED_FRAME_SIZE); + } Index: llvm-test/MultiSource/Applications/JM/ldecod/nalu.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/nalu.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/nalu.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,44 ---- + + /*! + ************************************************************************ + * \file nalu.c + * + * \brief + * Decoder NALU support functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + ************************************************************************ + */ + + #include + + #include "global.h" + #include "nalu.h" + + + + + /*! + ************************************************************************************* + * \brief + * Converts a NALU to an RBSP + * + * \param + * nalu: nalu structure to be filled + * + * \return + * length of the RBSP in bytes + ************************************************************************************* + */ + + int NALUtoRBSP (NALU_t *nalu) + { + assert (nalu != NULL); + + nalu->len = EBSPtoRBSP (nalu->buf, nalu->len, 1) ; + + return nalu->len ; + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/nalu.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/nalu.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/nalu.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,28 ---- + + /*! + ************************************************************************************** + * \file + * parset.h + * \brief + * Picture and Sequence Parameter Sets, encoder operations + * This code reflects JVT version xxx + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + *************************************************************************************** + */ + + + #ifndef _NALU_H_ + #define _NALU_H_ + + #include + #include "nalucommon.h" + + extern FILE *bits; + + int GetAnnexbNALU (NALU_t *nalu); + int NALUtoRBSP (NALU_t *nalu); + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/nalucommon.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/nalucommon.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/nalucommon.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,75 ---- + + /*! + ************************************************************************ + * \file nalucommon.c + * + * \brief + * Common NALU support functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + ************************************************************************ + */ + + #include + #include + #include + + #include "global.h" + #include "nalu.h" + #include "memalloc.h" + + + /*! + ************************************************************************************* + * \brief + * Allocates memory for a NALU + * + * \param buffersize + * size of NALU buffer + * + * \return + * pointer to a NALU + ************************************************************************************* + */ + + + NALU_t *AllocNALU(int buffersize) + { + NALU_t *n; + + if ((n = (NALU_t*)calloc (1, sizeof (NALU_t))) == NULL) no_mem_exit ("AllocNALU: n"); + + n->max_size=buffersize; + + if ((n->buf = (byte*)calloc (buffersize, sizeof (byte))) == NULL) no_mem_exit ("AllocNALU: n->buf"); + + return n; + } + + + /*! + ************************************************************************************* + * \brief + * Frees a NALU + * + * \param n + * NALU to be freed + * + ************************************************************************************* + */ + + void FreeNALU(NALU_t *n) + { + if (n) + { + if (n->buf) + { + free(n->buf); + n->buf=NULL; + } + free (n); + } + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/nalucommon.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/nalucommon.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/nalucommon.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,55 ---- + + /*! + ************************************************************************************** + * \file + * nalucommon.h.h + * \brief + * NALU handling common to encoder and decoder + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + *************************************************************************************** + */ + + + #ifndef _NALUCOMMON_H_ + #define _NALUCOMMON_H_ + + #define MAXRBSPSIZE 64000 + + #define NALU_TYPE_SLICE 1 + #define NALU_TYPE_DPA 2 + #define NALU_TYPE_DPB 3 + #define NALU_TYPE_DPC 4 + #define NALU_TYPE_IDR 5 + #define NALU_TYPE_SEI 6 + #define NALU_TYPE_SPS 7 + #define NALU_TYPE_PPS 8 + #define NALU_TYPE_AUD 9 + #define NALU_TYPE_EOSEQ 10 + #define NALU_TYPE_EOSTREAM 11 + #define NALU_TYPE_FILL 12 + + #define NALU_PRIORITY_HIGHEST 3 + #define NALU_PRIORITY_HIGH 2 + #define NALU_PRIRITY_LOW 1 + #define NALU_PRIORITY_DISPOSABLE 0 + + + typedef struct + { + int startcodeprefix_len; //! 4 for parameter sets and first slice in picture, 3 for everything else (suggested) + unsigned len; //! Length of the NAL unit (Excluding the start code, which does not belong to the NALU) + unsigned max_size; //! Nal Unit Buffer size + int nal_unit_type; //! NALU_TYPE_xxxx + int nal_reference_idc; //! NALU_PRIORITY_xxxx + int forbidden_bit; //! should be always FALSE + byte *buf; //! conjtains the first byte followed by the EBSP + } NALU_t; + + + NALU_t *AllocNALU(); + void FreeNALU(NALU_t *n); + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/output.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/output.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/output.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,673 ---- + + /*! + ************************************************************************ + * \file output.c + * + * \brief + * Output an image and Trance support + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Suehring + ************************************************************************ + */ + + #include "contributors.h" + + #include + #include + #include + + #ifdef WIN32 + #include + #else + #include + #endif + + #include "global.h" + #include "mbuffer.h" + #include "image.h" + #include "memalloc.h" + + FrameStore* out_buffer; + + StorablePicture *pending_output = NULL; + int pending_output_state = FRAME; + + + void write_out_picture(StorablePicture *p, int p_out); + + + /*! + ************************************************************************ + * \brief + * checks if the System is big- or little-endian + * \return + * 0, little-endian (e.g. Intel architectures) + * 1, big-endian (e.g. SPARC, MIPS, PowerPC) + ************************************************************************ + */ + int testEndian() + { + short s; + byte *p; + + p=(byte*)&s; + + s=1; + + return (*p==0); + } + + + /*! + ************************************************************************ + * \brief + * Convert image plane to temporary buffer for file writing + * \param imgX + * Pointer to image plane + * \param buf + * Buffer for file output + * \param size_x + * horizontal size + * \param size_y + * vertical size + * \param symbol_size_in_bytes + * number of bytes used per pel + * \param crop_left + * pixels to crop from left + * \param crop_right + * pixels to crop from right + * \param crop_top + * pixels to crop from top + * \param crop_bottom + * pixels to crop from bottom + ************************************************************************ + */ + void img2buf (imgpel** imgX, unsigned char* buf, int size_x, int size_y, int symbol_size_in_bytes, int crop_left, int crop_right, int crop_top, int crop_bottom) + { + int i,j; + + int twidth = size_x - crop_left - crop_right; + int theight = size_y - crop_top - crop_bottom; + + int size = 0; + + unsigned char ui8; + unsigned short tmp16, ui16; + unsigned long tmp32, ui32; + + if (( sizeof(char) == sizeof (imgpel)) && ( sizeof(char) == symbol_size_in_bytes)) + { + // imgpel == pixel_in_file == 1 byte -> simple copy + for(i=0;i sizeof(char) + if (testEndian()) + { + // big endian + switch (symbol_size_in_bytes) + { + case 1: + { + for(i=crop_top;i> 8) | ((tmp16&0xFF)<<8); + memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*2),&(ui16), 2); + } + break; + } + case 4: + { + for(i=crop_top;i>8) | ((tmp32&0xFF000000)>>24); + memcpy(buf+((j-crop_left+((i-crop_top)*(twidth)))*4),&(ui32), 4); + } + break; + } + default: + { + error ("writing only to formats of 8, 16 or 32 bit allowed on big endian architecture", 500); + break; + } + } + + } + else + { + // little endian + if (sizeof (imgpel) < symbol_size_in_bytes) + { + // this should not happen. we should not have smaller imgpel than our source material. + size = sizeof (imgpel); + // clear buffer + memset (buf, 0, (twidth*theight*symbol_size_in_bytes)); + } + else + { + size = symbol_size_in_bytes; + } + + for(i=crop_top;iimgY) + { + free_mem2Dpel (pending_output->imgY); + pending_output->imgY=NULL; + } + if (pending_output->imgUV) + { + free_mem3Dpel (pending_output->imgUV, 2); + pending_output->imgUV=NULL; + } + + pending_output_state = FRAME; + } + + + /*! + ************************************************************************ + * \brief + * Writes out a storable picture + * If the picture is a field, the output buffers the picture and tries + * to pair it with the next field. + * \param p + * Picture to be written + * \param p_out + * Output file + ************************************************************************ + */ + void write_picture(StorablePicture *p, int p_out, int real_structure) + { + int i, add; + + if (real_structure==FRAME) + { + flush_pending_output(p_out); + write_out_picture(p, p_out); + return; + } + if (real_structure==pending_output_state) + { + flush_pending_output(p_out); + write_picture(p, p_out, real_structure); + return; + } + + if (pending_output_state == FRAME) + { + pending_output->size_x = p->size_x; + pending_output->size_y = p->size_y; + pending_output->size_x_cr = p->size_x_cr; + pending_output->size_y_cr = p->size_y_cr; + pending_output->chroma_format_idc = p->chroma_format_idc; + + pending_output->frame_mbs_only_flag = p->frame_mbs_only_flag; + pending_output->frame_cropping_flag = p->frame_cropping_flag; + if (pending_output->frame_cropping_flag) + { + pending_output->frame_cropping_rect_left_offset = p->frame_cropping_rect_left_offset; + pending_output->frame_cropping_rect_right_offset = p->frame_cropping_rect_right_offset; + pending_output->frame_cropping_rect_top_offset = p->frame_cropping_rect_top_offset; + pending_output->frame_cropping_rect_bottom_offset = p->frame_cropping_rect_bottom_offset; + } + + get_mem2Dpel (&(pending_output->imgY), pending_output->size_y, pending_output->size_x); + get_mem3Dpel (&(pending_output->imgUV), 2, pending_output->size_y_cr, pending_output->size_x_cr); + + clear_picture(pending_output); + + // copy first field + if (real_structure == TOP_FIELD) + { + add = 0; + } + else + { + add = 1; + } + + for (i=0; isize_y; i+=2) + { + memcpy(pending_output->imgY[(i+add)], p->imgY[(i+add)], p->size_x * sizeof(imgpel)); + } + for (i=0; isize_y_cr; i+=2) + { + memcpy(pending_output->imgUV[0][(i+add)], p->imgUV[0][(i+add)], p->size_x_cr * sizeof(imgpel)); + memcpy(pending_output->imgUV[1][(i+add)], p->imgUV[1][(i+add)], p->size_x_cr * sizeof(imgpel)); + } + pending_output_state = real_structure; + } + else + { + if ( (pending_output->size_x!=p->size_x) || (pending_output->size_y!= p->size_y) + || (pending_output->frame_mbs_only_flag != p->frame_mbs_only_flag) + || (pending_output->frame_cropping_flag != p->frame_cropping_flag) + || ( pending_output->frame_cropping_flag && + ( (pending_output->frame_cropping_rect_left_offset != p->frame_cropping_rect_left_offset) + ||(pending_output->frame_cropping_rect_right_offset != p->frame_cropping_rect_right_offset) + ||(pending_output->frame_cropping_rect_top_offset != p->frame_cropping_rect_top_offset) + ||(pending_output->frame_cropping_rect_bottom_offset != p->frame_cropping_rect_bottom_offset) + ) + ) + ) + { + flush_pending_output(p_out); + write_picture (p, p_out, real_structure); + return; + } + // copy second field + if (real_structure == TOP_FIELD) + { + add = 0; + } + else + { + add = 1; + } + + for (i=0; isize_y; i+=2) + { + memcpy(pending_output->imgY[(i+add)], p->imgY[(i+add)], p->size_x * sizeof(imgpel)); + } + for (i=0; isize_y_cr; i+=2) + { + memcpy(pending_output->imgUV[0][(i+add)], p->imgUV[0][(i+add)], p->size_x_cr * sizeof(imgpel)); + memcpy(pending_output->imgUV[1][(i+add)], p->imgUV[1][(i+add)], p->size_x_cr * sizeof(imgpel)); + } + + flush_pending_output(p_out); + } + } + + #else + + /*! + ************************************************************************ + * \brief + * Writes out a storable picture without doing any output modifications + * \param p + * Picture to be written + * \param p_out + * Output file + * \param real_structure + * real picture structure + ************************************************************************ + */ + void write_picture(StorablePicture *p, int p_out, int real_structure) + { + write_out_picture(p, p_out); + } + + + #endif + + /*! + ************************************************************************ + * \brief + * Writes out a storable picture + * \param p + * Picture to be written + * \param p_out + * Output file + ************************************************************************ + */ + void write_out_picture(StorablePicture *p, int p_out) + { + int SubWidthC [4]= { 1, 2, 2, 1}; + int SubHeightC [4]= { 1, 2, 1, 1}; + + int crop_left, crop_right, crop_top, crop_bottom; + int symbol_size_in_bytes = img->pic_unit_bitsize_on_disk/8; + Boolean rgb_output = (active_sps->vui_seq_parameters.matrix_coefficients==0); + unsigned char *buf; + + if (p->non_existing) + return; + + if (p->frame_cropping_flag) + { + crop_left = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset; + crop_right = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset; + crop_top = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + } + else + { + crop_left = crop_right = crop_top = crop_bottom = 0; + } + + //printf ("write frame size: %dx%d\n", p->size_x-crop_left-crop_right,p->size_y-crop_top-crop_bottom ); + + // KS: this buffer should actually be allocated only once, but this is still much faster than the previous version + buf = malloc (p->size_x*p->size_y*symbol_size_in_bytes); + if (NULL==buf) + { + no_mem_exit("write_out_picture: buf"); + } + + if(rgb_output) + { + crop_left = p->frame_cropping_rect_left_offset; + crop_right = p->frame_cropping_rect_right_offset; + crop_top = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + + img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes); + + if (p->frame_cropping_flag) + { + crop_left = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_left_offset; + crop_right = SubWidthC[p->chroma_format_idc] * p->frame_cropping_rect_right_offset; + crop_top = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = SubHeightC[p->chroma_format_idc]*( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + } + else + { + crop_left = crop_right = crop_top = crop_bottom = 0; + } + } + + img2buf (p->imgY, buf, p->size_x, p->size_y, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + write(p_out, buf, (p->size_y-crop_bottom-crop_top)*(p->size_x-crop_right-crop_left)*symbol_size_in_bytes); + + if (p->chroma_format_idc!=YUV400) + { + crop_left = p->frame_cropping_rect_left_offset; + crop_right = p->frame_cropping_rect_right_offset; + crop_top = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_top_offset; + crop_bottom = ( 2 - p->frame_mbs_only_flag ) * p->frame_cropping_rect_bottom_offset; + + img2buf (p->imgUV[0], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)* symbol_size_in_bytes); + + if (!rgb_output) + { + img2buf (p->imgUV[1], buf, p->size_x_cr, p->size_y_cr, symbol_size_in_bytes, crop_left, crop_right, crop_top, crop_bottom); + write(p_out, buf, (p->size_y_cr-crop_bottom-crop_top)*(p->size_x_cr-crop_right-crop_left)*symbol_size_in_bytes); + } + } + else + { + if (input->write_uv) + { + int i,j; + imgpel cr_val = 1<<(img->bitdepth_luma - 1); + + get_mem3Dpel (&(p->imgUV), 1, p->size_y/2, p->size_x/2); + for (j=0; jsize_y/2; j++) + for (i=0; isize_x/2; i++) + p->imgUV[0][j][i]=cr_val; + + // fake out U=V=128 to make a YUV 4:2:0 stream + img2buf (p->imgUV[0], buf, p->size_x/2, p->size_y/2, symbol_size_in_bytes, crop_left/2, crop_right/2, crop_top/2, crop_bottom/2); + + write(p_out, buf, (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2 ); + write(p_out, buf, (p->size_y-crop_bottom-crop_top)/2 * (p->size_x-crop_right-crop_left)/2 ); + + free_mem3Dpel(p->imgUV, 1); + p->imgUV=NULL; + } + } + + free(buf); + + // fsync(p_out); + } + + /*! + ************************************************************************ + * \brief + * Initialize output buffer for direct output + ************************************************************************ + */ + void init_out_buffer() + { + out_buffer = alloc_frame_store(); + #ifdef PAIR_FIELDS_IN_OUTPUT + pending_output = calloc (sizeof(StorablePicture), 1); + if (NULL==pending_output) no_mem_exit("init_out_buffer"); + pending_output->imgUV = NULL; + pending_output->imgY = NULL; + #endif + } + + /*! + ************************************************************************ + * \brief + * Uninitialize output buffer for direct output + ************************************************************************ + */ + void uninit_out_buffer() + { + free_frame_store(out_buffer); + out_buffer=NULL; + #ifdef PAIR_FIELDS_IN_OUTPUT + flush_pending_output(p_out); + free (pending_output); + #endif + } + + /*! + ************************************************************************ + * \brief + * Initialize picture memory with (Y:0,U:128,V:128) + ************************************************************************ + */ + void clear_picture(StorablePicture *p) + { + int i; + + for(i=0;isize_y;i++) + memset(p->imgY[i], img->dc_pred_value, p->size_x*sizeof(imgpel)); + for(i=0;isize_y_cr;i++) + memset(p->imgUV[0][i], img->dc_pred_value, p->size_x_cr*sizeof(imgpel)); + for(i=0;isize_y_cr;i++) + memset(p->imgUV[1][i], img->dc_pred_value, p->size_x_cr*sizeof(imgpel)); + } + + /*! + ************************************************************************ + * \brief + * Write out not paired direct output fields. A second empty field is generated + * and combined into the frame buffer. + * \param fs + * FrameStore that contains a single field + * \param p_out + * Output file + ************************************************************************ + */ + void write_unpaired_field(FrameStore* fs, int p_out) + { + StorablePicture *p; + assert (fs->is_used<3); + if(fs->is_used &1) + { + // we have a top field + // construct an empty bottom field + p = fs->top_field; + fs->bottom_field = alloc_storable_picture(BOTTOM_FIELD, p->size_x, 2*p->size_y, p->size_x_cr, 2*p->size_y_cr); + fs->bottom_field->chroma_format_idc = p->chroma_format_idc; + clear_picture(fs->bottom_field); + dpb_combine_field_yuv(fs); + write_picture (fs->frame, p_out, TOP_FIELD); + } + + if(fs->is_used &2) + { + // we have a bottom field + // construct an empty top field + p = fs->bottom_field; + fs->top_field = alloc_storable_picture(TOP_FIELD, p->size_x, 2*p->size_y, p->size_x_cr, 2*p->size_y_cr); + fs->top_field->chroma_format_idc = p->chroma_format_idc; + clear_picture(fs->top_field); + fs ->top_field->frame_cropping_flag = fs->bottom_field->frame_cropping_flag; + if(fs ->top_field->frame_cropping_flag) + { + fs ->top_field->frame_cropping_rect_top_offset = fs->bottom_field->frame_cropping_rect_top_offset; + fs ->top_field->frame_cropping_rect_bottom_offset = fs->bottom_field->frame_cropping_rect_bottom_offset; + fs ->top_field->frame_cropping_rect_left_offset = fs->bottom_field->frame_cropping_rect_left_offset; + fs ->top_field->frame_cropping_rect_right_offset = fs->bottom_field->frame_cropping_rect_right_offset; + } + dpb_combine_field_yuv(fs); + write_picture (fs->frame, p_out, BOTTOM_FIELD); + } + + fs->is_used=3; + } + + /*! + ************************************************************************ + * \brief + * Write out unpaired fields from output buffer. + * \param p_out + * Output file + ************************************************************************ + */ + void flush_direct_output(int p_out) + { + write_unpaired_field(out_buffer, p_out); + + free_storable_picture(out_buffer->frame); + out_buffer->frame = NULL; + free_storable_picture(out_buffer->top_field); + out_buffer->top_field = NULL; + free_storable_picture(out_buffer->bottom_field); + out_buffer->bottom_field = NULL; + out_buffer->is_used = 0; + } + + + /*! + ************************************************************************ + * \brief + * Write a frame (from FrameStore) + * \param fs + * FrameStore containing the frame + * \param p_out + * Output file + ************************************************************************ + */ + void write_stored_frame( FrameStore *fs,int p_out) + { + // make sure no direct output field is pending + flush_direct_output(p_out); + + if (fs->is_used<3) + { + write_unpaired_field(fs, p_out); + } + else + { + write_picture(fs->frame, p_out, FRAME); + } + + fs->is_output = 1; + } + + /*! + ************************************************************************ + * \brief + * Directly output a picture without storing it in the DPB. Fields + * are buffered before they are written to the file. + * \param p + * Picture for output + * \param p_out + * Output file + ************************************************************************ + */ + void direct_output(StorablePicture *p, int p_out) + { + if (p->structure==FRAME) + { + // we have a frame (or complementary field pair) + // so output it directly + flush_direct_output(p_out); + write_picture (p, p_out, FRAME); + if (-1!=p_ref) + find_snr(snr, p, p_ref); + free_storable_picture(p); + return; + } + + if (p->structure == TOP_FIELD) + { + if (out_buffer->is_used &1) + flush_direct_output(p_out); + out_buffer->top_field = p; + out_buffer->is_used |= 1; + } + + if (p->structure == BOTTOM_FIELD) + { + if (out_buffer->is_used &2) + flush_direct_output(p_out); + out_buffer->bottom_field = p; + out_buffer->is_used |= 2; + } + + if (out_buffer->is_used == 3) + { + // we have both fields, so output them + dpb_combine_field_yuv(out_buffer); + write_picture (out_buffer->frame, p_out, FRAME); + if (-1!=p_ref) + find_snr(snr, out_buffer->frame, p_ref); + free_storable_picture(out_buffer->frame); + out_buffer->frame = NULL; + free_storable_picture(out_buffer->top_field); + out_buffer->top_field = NULL; + free_storable_picture(out_buffer->bottom_field); + out_buffer->bottom_field = NULL; + out_buffer->is_used = 0; + } + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/output.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/output.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/output.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,27 ---- + + /*! + ************************************************************************************** + * \file + * output.h + * \brief + * Picture writing routine headers + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Karsten Suehring + *************************************************************************************** + */ + #ifndef _OUTPUT_H_ + #define _OUTPUT_H_ + + int testEndian(); + + void write_stored_frame(FrameStore *fs, int p_out); + void direct_output(StorablePicture *p, int p_out); + void init_out_buffer(); + void uninit_out_buffer(); + + #ifdef PAIR_FIELDS_IN_OUTPUT + void flush_pending_output(int p_out); + #endif + + #endif //_OUTPUT_H_ Index: llvm-test/MultiSource/Applications/JM/ldecod/parset.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/parset.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/parset.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,684 ---- + + /*! + ************************************************************************ + * \file + * parset.c + * \brief + * Parameter Sets + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + * + *********************************************************************** + */ + + #include + #include + #include + + #include "global.h" + #include "parsetcommon.h" + #include "parset.h" + #include "nalu.h" + #include "memalloc.h" + #include "fmo.h" + #include "cabac.h" + #include "vlc.h" + #include "mbuffer.h" + #include "erc_api.h" + + #if TRACE + #define SYMTRACESTRING(s) strncpy(sym->tracestring,s,TRACESTRING_SIZE) + #else + #define SYMTRACESTRING(s) // do nothing + #endif + + const byte ZZ_SCAN[16] = + { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 + }; + + const byte ZZ_SCAN8[64] = + { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 + }; + + extern int UsedBits; // for internal statistics, is adjusted by se_v, ue_v, u_1 + extern ColocatedParams *Co_located; + + extern int quant_intra_default[16]; + extern int quant_inter_default[16]; + extern int quant8_intra_default[64]; + extern int quant8_inter_default[64]; + + seq_parameter_set_rbsp_t SeqParSet[MAXSPS]; + pic_parameter_set_rbsp_t PicParSet[MAXPPS]; + + extern StorablePicture* dec_picture; + + extern void init_frext(struct img_par *img); + + // syntax for scaling list matrix values + void Scaling_List(int *scalingList, int sizeOfScalingList, Boolean *UseDefaultScalingMatrix, Bitstream *s) + { + int j, scanj; + int delta_scale, lastScale, nextScale; + + lastScale = 8; + nextScale = 8; + + for(j=0; jbitstream; + + assert (p != NULL); + assert (p->bitstream != NULL); + assert (p->bitstream->streamBuffer != 0); + assert (sps != NULL); + + UsedBits = 0; + + sps->profile_idc = u_v (8, "SPS: profile_idc" , s); + + if ((sps->profile_idc!=66 ) && + (sps->profile_idc!=77 ) && + (sps->profile_idc!=88 ) && + (sps->profile_idc!=100 ) && + (sps->profile_idc!=110 ) && + (sps->profile_idc!=122 ) && + (sps->profile_idc!=144 )) + { + return UsedBits; + } + + sps->constrained_set0_flag = u_1 ( "SPS: constrained_set0_flag" , s); + sps->constrained_set1_flag = u_1 ( "SPS: constrained_set1_flag" , s); + sps->constrained_set2_flag = u_1 ( "SPS: constrained_set2_flag" , s); + sps->constrained_set3_flag = u_1 ( "SPS: constrained_set3_flag" , s); + reserved_zero = u_v (4, "SPS: reserved_zero_4bits" , s); + assert (reserved_zero==0); + + sps->level_idc = u_v (8, "SPS: level_idc" , s); + + sps->seq_parameter_set_id = ue_v ("SPS: seq_parameter_set_id" , s); + + // Fidelity Range Extensions stuff + sps->chroma_format_idc = 1; + sps->bit_depth_luma_minus8 = 0; + sps->bit_depth_chroma_minus8 = 0; + img->lossless_qpprime_flag = 0; + + // Residue Color Transform + img->residue_transform_flag = 0; + + if((sps->profile_idc==FREXT_HP ) || + (sps->profile_idc==FREXT_Hi10P) || + (sps->profile_idc==FREXT_Hi422) || + (sps->profile_idc==FREXT_Hi444)) + { + sps->chroma_format_idc = ue_v ("SPS: chroma_format_idc" , s); + + // Residue Color Transform + if(sps->chroma_format_idc == 3) + img->residue_transform_flag = u_1 ("SPS: residue_transform_flag" , s); + + sps->bit_depth_luma_minus8 = ue_v ("SPS: bit_depth_luma_minus8" , s); + sps->bit_depth_chroma_minus8 = ue_v ("SPS: bit_depth_chroma_minus8" , s); + img->lossless_qpprime_flag = u_1 ("SPS: lossless_qpprime_y_zero_flag" , s); + + sps->seq_scaling_matrix_present_flag = u_1 ( "SPS: seq_scaling_matrix_present_flag" , s); + + if(sps->seq_scaling_matrix_present_flag) + { + for(i=0; i<8; i++) + { + sps->seq_scaling_list_present_flag[i] = u_1 ( "SPS: seq_scaling_list_present_flag" , s); + if(sps->seq_scaling_list_present_flag[i]) + { + if(i<6) + Scaling_List(sps->ScalingList4x4[i], 16, &sps->UseDefaultScalingMatrix4x4Flag[i], s); + else + Scaling_List(sps->ScalingList8x8[i-6], 64, &sps->UseDefaultScalingMatrix8x8Flag[i-6], s); + } + } + } + } + + sps->log2_max_frame_num_minus4 = ue_v ("SPS: log2_max_frame_num_minus4" , s); + sps->pic_order_cnt_type = ue_v ("SPS: pic_order_cnt_type" , s); + + if (sps->pic_order_cnt_type == 0) + sps->log2_max_pic_order_cnt_lsb_minus4 = ue_v ("SPS: log2_max_pic_order_cnt_lsb_minus4" , s); + else if (sps->pic_order_cnt_type == 1) + { + sps->delta_pic_order_always_zero_flag = u_1 ("SPS: delta_pic_order_always_zero_flag" , s); + sps->offset_for_non_ref_pic = se_v ("SPS: offset_for_non_ref_pic" , s); + sps->offset_for_top_to_bottom_field = se_v ("SPS: offset_for_top_to_bottom_field" , s); + sps->num_ref_frames_in_pic_order_cnt_cycle = ue_v ("SPS: num_ref_frames_in_pic_order_cnt_cycle" , s); + for(i=0; inum_ref_frames_in_pic_order_cnt_cycle; i++) + sps->offset_for_ref_frame[i] = se_v ("SPS: offset_for_ref_frame[i]" , s); + } + sps->num_ref_frames = ue_v ("SPS: num_ref_frames" , s); + sps->gaps_in_frame_num_value_allowed_flag = u_1 ("SPS: gaps_in_frame_num_value_allowed_flag" , s); + sps->pic_width_in_mbs_minus1 = ue_v ("SPS: pic_width_in_mbs_minus1" , s); + sps->pic_height_in_map_units_minus1 = ue_v ("SPS: pic_height_in_map_units_minus1" , s); + sps->frame_mbs_only_flag = u_1 ("SPS: frame_mbs_only_flag" , s); + if (!sps->frame_mbs_only_flag) + { + sps->mb_adaptive_frame_field_flag = u_1 ("SPS: mb_adaptive_frame_field_flag" , s); + } + sps->direct_8x8_inference_flag = u_1 ("SPS: direct_8x8_inference_flag" , s); + sps->frame_cropping_flag = u_1 ("SPS: frame_cropping_flag" , s); + + if (sps->frame_cropping_flag) + { + sps->frame_cropping_rect_left_offset = ue_v ("SPS: frame_cropping_rect_left_offset" , s); + sps->frame_cropping_rect_right_offset = ue_v ("SPS: frame_cropping_rect_right_offset" , s); + sps->frame_cropping_rect_top_offset = ue_v ("SPS: frame_cropping_rect_top_offset" , s); + sps->frame_cropping_rect_bottom_offset = ue_v ("SPS: frame_cropping_rect_bottom_offset" , s); + } + sps->vui_parameters_present_flag = u_1 ("SPS: vui_parameters_present_flag" , s); + + InitVUI(sps); + ReadVUI(p, sps); + + sps->Valid = TRUE; + + return UsedBits; + } + + + void InitVUI(seq_parameter_set_rbsp_t *sps) + { + sps->vui_seq_parameters.matrix_coefficients = 2; + } + + + int ReadVUI(DataPartition *p, seq_parameter_set_rbsp_t *sps) + { + Bitstream *s = p->bitstream; + if (sps->vui_parameters_present_flag) + { + sps->vui_seq_parameters.aspect_ratio_info_present_flag = u_1 ("VUI: aspect_ratio_info_present_flag" , s); + if (sps->vui_seq_parameters.aspect_ratio_info_present_flag) + { + sps->vui_seq_parameters.aspect_ratio_idc = u_v ( 8, "VUI: aspect_ratio_idc" , s); + if (255==sps->vui_seq_parameters.aspect_ratio_idc) + { + sps->vui_seq_parameters.sar_width = u_v (16, "VUI: sar_width" , s); + sps->vui_seq_parameters.sar_height = u_v (16, "VUI: sar_height" , s); + } + } + + sps->vui_seq_parameters.overscan_info_present_flag = u_1 ("VUI: overscan_info_present_flag" , s); + if (sps->vui_seq_parameters.overscan_info_present_flag) + { + sps->vui_seq_parameters.overscan_appropriate_flag = u_1 ("VUI: overscan_appropriate_flag" , s); + } + + sps->vui_seq_parameters.video_signal_type_present_flag = u_1 ("VUI: video_signal_type_present_flag" , s); + if (sps->vui_seq_parameters.video_signal_type_present_flag) + { + sps->vui_seq_parameters.video_format = u_v ( 3,"VUI: video_format" , s); + sps->vui_seq_parameters.video_full_range_flag = u_1 ( "VUI: video_full_range_flag" , s); + sps->vui_seq_parameters.colour_description_present_flag = u_1 ( "VUI: color_description_present_flag" , s); + if(sps->vui_seq_parameters.colour_description_present_flag) + { + sps->vui_seq_parameters.colour_primaries = u_v ( 8,"VUI: colour_primaries" , s); + sps->vui_seq_parameters.transfer_characteristics = u_v ( 8,"VUI: transfer_characteristics" , s); + sps->vui_seq_parameters.matrix_coefficients = u_v ( 8,"VUI: matrix_coefficients" , s); + } + } + sps->vui_seq_parameters.chroma_location_info_present_flag = u_1 ( "VUI: chroma_loc_info_present_flag" , s); + if(sps->vui_seq_parameters.chroma_location_info_present_flag) + { + sps->vui_seq_parameters.chroma_sample_loc_type_top_field = ue_v ( "VUI: chroma_sample_loc_type_top_field" , s); + sps->vui_seq_parameters.chroma_sample_loc_type_bottom_field = ue_v ( "VUI: chroma_sample_loc_type_bottom_field" , s); + } + sps->vui_seq_parameters.timing_info_present_flag = u_1 ("VUI: timing_info_present_flag" , s); + if (sps->vui_seq_parameters.timing_info_present_flag) + { + sps->vui_seq_parameters.num_units_in_tick = u_v (32,"VUI: num_units_in_tick" , s); + sps->vui_seq_parameters.time_scale = u_v (32,"VUI: time_scale" , s); + sps->vui_seq_parameters.fixed_frame_rate_flag = u_1 ( "VUI: fixed_frame_rate_flag" , s); + } + sps->vui_seq_parameters.nal_hrd_parameters_present_flag = u_1 ("VUI: nal_hrd_parameters_present_flag" , s); + if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag) + { + ReadHRDParameters(p, &(sps->vui_seq_parameters.nal_hrd_parameters)); + } + sps->vui_seq_parameters.vcl_hrd_parameters_present_flag = u_1 ("VUI: vcl_hrd_parameters_present_flag" , s); + if (sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + ReadHRDParameters(p, &(sps->vui_seq_parameters.vcl_hrd_parameters)); + } + if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag || sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + sps->vui_seq_parameters.low_delay_hrd_flag = u_1 ("VUI: low_delay_hrd_flag" , s); + } + sps->vui_seq_parameters.pic_struct_present_flag = u_1 ("VUI: pic_struct_present_flag " , s); + sps->vui_seq_parameters.bitstream_restriction_flag = u_1 ("VUI: bitstream_restriction_flag" , s); + if (sps->vui_seq_parameters.bitstream_restriction_flag) + { + sps->vui_seq_parameters.motion_vectors_over_pic_boundaries_flag = u_1 ("VUI: motion_vectors_over_pic_boundaries_flag", s); + sps->vui_seq_parameters.max_bytes_per_pic_denom = ue_v ("VUI: max_bytes_per_pic_denom" , s); + sps->vui_seq_parameters.max_bits_per_mb_denom = ue_v ("VUI: max_bits_per_mb_denom" , s); + sps->vui_seq_parameters.log2_max_mv_length_horizontal = ue_v ("VUI: log2_max_mv_length_horizontal" , s); + sps->vui_seq_parameters.log2_max_mv_length_vertical = ue_v ("VUI: log2_max_mv_length_vertical" , s); + sps->vui_seq_parameters.num_reorder_frames = ue_v ("VUI: num_reorder_frames" , s); + sps->vui_seq_parameters.max_dec_frame_buffering = ue_v ("VUI: max_dec_frame_buffering" , s); + } + } + + return 0; + } + + + int ReadHRDParameters(DataPartition *p, hrd_parameters_t *hrd) + { + Bitstream *s = p->bitstream; + unsigned int SchedSelIdx; + + hrd->cpb_cnt_minus1 = ue_v ( "VUI: cpb_cnt_minus1" , s); + hrd->bit_rate_scale = u_v ( 4,"VUI: bit_rate_scale" , s); + hrd->cpb_size_scale = u_v ( 4,"VUI: cpb_size_scale" , s); + + for( SchedSelIdx = 0; SchedSelIdx <= hrd->cpb_cnt_minus1; SchedSelIdx++ ) + { + hrd->bit_rate_value_minus1[ SchedSelIdx ] = ue_v ( "VUI: bit_rate_value_minus1" , s); + hrd->cpb_size_value_minus1[ SchedSelIdx ] = ue_v ( "VUI: cpb_size_value_minus1" , s); + hrd->cbr_flag[ SchedSelIdx ] = u_1 ( "VUI: cbr_flag" , s); + } + + hrd->initial_cpb_removal_delay_length_minus1 = u_v ( 5,"VUI: initial_cpb_removal_delay_length_minus1" , s); + hrd->cpb_removal_delay_length_minus1 = u_v ( 5,"VUI: cpb_removal_delay_length_minus1" , s); + hrd->dpb_output_delay_length_minus1 = u_v ( 5,"VUI: dpb_output_delay_length_minus1" , s); + hrd->time_offset_length = u_v ( 5,"VUI: time_offset_length" , s); + + return 0; + } + + + int InterpretPPS (DataPartition *p, pic_parameter_set_rbsp_t *pps) + { + unsigned i; + int NumberBitsPerSliceGroupId; + Bitstream *s = p->bitstream; + + assert (p != NULL); + assert (p->bitstream != NULL); + assert (p->bitstream->streamBuffer != 0); + assert (pps != NULL); + + UsedBits = 0; + + pps->pic_parameter_set_id = ue_v ("PPS: pic_parameter_set_id" , s); + pps->seq_parameter_set_id = ue_v ("PPS: seq_parameter_set_id" , s); + pps->entropy_coding_mode_flag = u_1 ("PPS: entropy_coding_mode_flag" , s); + + //! Note: as per JVT-F078 the following bit is unconditional. If F078 is not accepted, then + //! one has to fetch the correct SPS to check whether the bit is present (hopefully there is + //! no consistency problem :-( + //! The current encoder code handles this in the same way. When you change this, don't forget + //! the encoder! StW, 12/8/02 + pps->pic_order_present_flag = u_1 ("PPS: pic_order_present_flag" , s); + + pps->num_slice_groups_minus1 = ue_v ("PPS: num_slice_groups_minus1" , s); + + // FMO stuff begins here + if (pps->num_slice_groups_minus1 > 0) + { + pps->slice_group_map_type = ue_v ("PPS: slice_group_map_type" , s); + if (pps->slice_group_map_type == 0) + { + for (i=0; i<=pps->num_slice_groups_minus1; i++) + pps->run_length_minus1 [i] = ue_v ("PPS: run_length_minus1 [i]" , s); + } + else if (pps->slice_group_map_type == 2) + { + for (i=0; inum_slice_groups_minus1; i++) + { + //! JVT-F078: avoid reference of SPS by using ue(v) instead of u(v) + pps->top_left [i] = ue_v ("PPS: top_left [i]" , s); + pps->bottom_right [i] = ue_v ("PPS: bottom_right [i]" , s); + } + } + else if (pps->slice_group_map_type == 3 || + pps->slice_group_map_type == 4 || + pps->slice_group_map_type == 5) + { + pps->slice_group_change_direction_flag = u_1 ("PPS: slice_group_change_direction_flag" , s); + pps->slice_group_change_rate_minus1 = ue_v ("PPS: slice_group_change_rate_minus1" , s); + } + else if (pps->slice_group_map_type == 6) + { + if (pps->num_slice_groups_minus1+1 >4) + NumberBitsPerSliceGroupId = 3; + else if (pps->num_slice_groups_minus1+1 > 2) + NumberBitsPerSliceGroupId = 2; + else + NumberBitsPerSliceGroupId = 1; + //! JVT-F078, exlicitly signal number of MBs in the map + pps->num_slice_group_map_units_minus1 = ue_v ("PPS: num_slice_group_map_units_minus1" , s); + for (i=0; i<=pps->num_slice_group_map_units_minus1; i++) + pps->slice_group_id[i] = u_v (NumberBitsPerSliceGroupId, "slice_group_id[i]", s); + } + } + + // End of FMO stuff + + pps->num_ref_idx_l0_active_minus1 = ue_v ("PPS: num_ref_idx_l0_active_minus1" , s); + pps->num_ref_idx_l1_active_minus1 = ue_v ("PPS: num_ref_idx_l1_active_minus1" , s); + pps->weighted_pred_flag = u_1 ("PPS: weighted prediction flag" , s); + pps->weighted_bipred_idc = u_v ( 2, "PPS: weighted_bipred_idc" , s); + pps->pic_init_qp_minus26 = se_v ("PPS: pic_init_qp_minus26" , s); + pps->pic_init_qs_minus26 = se_v ("PPS: pic_init_qs_minus26" , s); + + pps->chroma_qp_index_offset = se_v ("PPS: chroma_qp_index_offset" , s); + + pps->deblocking_filter_control_present_flag = u_1 ("PPS: deblocking_filter_control_present_flag" , s); + pps->constrained_intra_pred_flag = u_1 ("PPS: constrained_intra_pred_flag" , s); + pps->redundant_pic_cnt_present_flag = u_1 ("PPS: redundant_pic_cnt_present_flag" , s); + + if(more_rbsp_data(s->streamBuffer, s->frame_bitoffset,s->bitstream_length)) // more_data_in_rbsp() + { + //Fidelity Range Extensions Stuff + pps->transform_8x8_mode_flag = u_1 ("PPS: transform_8x8_mode_flag" , s); + pps->pic_scaling_matrix_present_flag = u_1 ("PPS: pic_scaling_matrix_present_flag" , s); + + if(pps->pic_scaling_matrix_present_flag) + { + for(i=0; i<(6+((unsigned)pps->transform_8x8_mode_flag<<1)); i++) + { + pps->pic_scaling_list_present_flag[i]= u_1 ("PPS: pic_scaling_list_present_flag" , s); + + if(pps->pic_scaling_list_present_flag[i]) + { + if(i<6) + Scaling_List(pps->ScalingList4x4[i], 16, &pps->UseDefaultScalingMatrix4x4Flag[i], s); + else + Scaling_List(pps->ScalingList8x8[i-6], 64, &pps->UseDefaultScalingMatrix8x8Flag[i-6], s); + } + } + } + pps->second_chroma_qp_index_offset = se_v ("PPS: second_chroma_qp_index_offset" , s); + } + else + { + pps->second_chroma_qp_index_offset = pps->chroma_qp_index_offset; + } + + pps->Valid = TRUE; + return UsedBits; + } + + + void PPSConsistencyCheck (pic_parameter_set_rbsp_t *pps) + { + printf ("Consistency checking a picture parset, to be implemented\n"); + // if (pps->seq_parameter_set_id invalid then do something) + } + + void SPSConsistencyCheck (seq_parameter_set_rbsp_t *sps) + { + printf ("Consistency checking a sequence parset, to be implemented\n"); + } + + void MakePPSavailable (int id, pic_parameter_set_rbsp_t *pps) + { + assert (pps->Valid == TRUE); + + if (PicParSet[id].Valid == TRUE && PicParSet[id].slice_group_id != NULL) + free (PicParSet[id].slice_group_id); + + memcpy (&PicParSet[id], pps, sizeof (pic_parameter_set_rbsp_t)); + + if ((PicParSet[id].slice_group_id = calloc (PicParSet[id].num_slice_group_map_units_minus1+1, sizeof(int))) == NULL) + no_mem_exit ("MakePPSavailable: Cannot calloc slice_group_id"); + + memcpy (PicParSet[id].slice_group_id, pps->slice_group_id, (pps->num_slice_group_map_units_minus1+1)*sizeof(int)); + } + + void MakeSPSavailable (int id, seq_parameter_set_rbsp_t *sps) + { + assert (sps->Valid == TRUE); + memcpy (&SeqParSet[id], sps, sizeof (seq_parameter_set_rbsp_t)); + } + + + void ProcessSPS (NALU_t *nalu) + { + DataPartition *dp = AllocPartition(1); + seq_parameter_set_rbsp_t *sps = AllocSPS(); + int dummy; + + memcpy (dp->bitstream->streamBuffer, &nalu->buf[1], nalu->len-1); + dp->bitstream->code_len = dp->bitstream->bitstream_length = RBSPtoSODB (dp->bitstream->streamBuffer, nalu->len-1); + dp->bitstream->ei_flag = 0; + dp->bitstream->read_len = dp->bitstream->frame_bitoffset = 0; + dummy = InterpretSPS (dp, sps); + + if (sps->Valid) + { + if (active_sps) + { + if (sps->seq_parameter_set_id == active_sps->seq_parameter_set_id) + { + if (!sps_is_equal(sps, active_sps)) + { + if (dec_picture) + { + // this may only happen on slice loss + exit_picture(); + } + active_sps=NULL; + } + } + } + // SPSConsistencyCheck (pps); + MakeSPSavailable (sps->seq_parameter_set_id, sps); + img->profile_idc = sps->profile_idc; //ADD-VG + } + + FreePartition (dp, 1); + FreeSPS (sps); + } + + + void ProcessPPS (NALU_t *nalu) + { + DataPartition *dp; + pic_parameter_set_rbsp_t *pps; + int dummy; + + dp = AllocPartition(1); + pps = AllocPPS(); + memcpy (dp->bitstream->streamBuffer, &nalu->buf[1], nalu->len-1); + dp->bitstream->code_len = dp->bitstream->bitstream_length = RBSPtoSODB (dp->bitstream->streamBuffer, nalu->len-1); + dp->bitstream->ei_flag = 0; + dp->bitstream->read_len = dp->bitstream->frame_bitoffset = 0; + dummy = InterpretPPS (dp, pps); + // PPSConsistencyCheck (pps); + if (active_pps) + { + if (pps->pic_parameter_set_id == active_pps->pic_parameter_set_id) + { + if (!pps_is_equal(pps, active_pps)) + { + if (dec_picture) + { + // this may only happen on slice loss + exit_picture(); + } + active_pps = NULL; + } + } + } + MakePPSavailable (pps->pic_parameter_set_id, pps); + FreePartition (dp, 1); + FreePPS (pps); + } + + void activate_sps (seq_parameter_set_rbsp_t *sps) + { + if (active_sps != sps) + { + if (dec_picture) + { + // this may only happen on slice loss + exit_picture(); + } + active_sps = sps; + + img->bitdepth_chroma = 0; + img->width_cr = 0; + img->height_cr = 0; + + // Fidelity Range Extensions stuff (part 1) + img->bitdepth_luma = sps->bit_depth_luma_minus8 + 8; + if (sps->chroma_format_idc != YUV400) + img->bitdepth_chroma = sps->bit_depth_chroma_minus8 + 8; + + img->MaxFrameNum = 1<<(sps->log2_max_frame_num_minus4+4); + img->PicWidthInMbs = (sps->pic_width_in_mbs_minus1 +1); + img->PicHeightInMapUnits = (sps->pic_height_in_map_units_minus1 +1); + img->FrameHeightInMbs = ( 2 - sps->frame_mbs_only_flag ) * img->PicHeightInMapUnits; + img->FrameSizeInMbs = img->PicWidthInMbs * img->FrameHeightInMbs; + + img->yuv_format=sps->chroma_format_idc; + + img->width = img->PicWidthInMbs * MB_BLOCK_SIZE; + img->height = img->FrameHeightInMbs * MB_BLOCK_SIZE; + + if (sps->chroma_format_idc == YUV420) + { + img->width_cr = img->width /2; + img->height_cr = img->height / 2; + } + else if (sps->chroma_format_idc == YUV422) + { + img->width_cr = img->width /2; + img->height_cr = img->height; + } + else if (sps->chroma_format_idc == YUV444) + { + //YUV444 + img->width_cr = img->width; + img->height_cr = img->height; + } + + init_frext(img); + init_global_buffers(); + if (!img->no_output_of_prior_pics_flag) + { + flush_dpb(); + } + init_dpb(); + + if (NULL!=Co_located) + { + free_colocated(Co_located); + } + Co_located = alloc_colocated (img->width, img->height,sps->mb_adaptive_frame_field_flag); + ercInit(img->width, img->height, 1); + } + } + + void activate_pps(pic_parameter_set_rbsp_t *pps) + { + if (active_pps != pps) + { + if (dec_picture) + { + // this may only happen on slice loss + exit_picture(); + } + + active_pps = pps; + + // Fidelity Range Extensions stuff (part 2) + img->Transform8x8Mode = pps->transform_8x8_mode_flag; + + } + } + + void UseParameterSet (int PicParsetId) + { + seq_parameter_set_rbsp_t *sps = &SeqParSet[PicParSet[PicParsetId].seq_parameter_set_id]; + pic_parameter_set_rbsp_t *pps = &PicParSet[PicParsetId]; + int i; + + + if (PicParSet[PicParsetId].Valid != TRUE) + printf ("Trying to use an invalid (uninitialized) Picture Parameter Set with ID %d, expect the unexpected...\n", PicParsetId); + if (SeqParSet[PicParSet[PicParsetId].seq_parameter_set_id].Valid != TRUE) + printf ("PicParset %d references an invalid (uninitialized) Sequence Parameter Set with ID %d, expect the unexpected...\n", PicParsetId, PicParSet[PicParsetId].seq_parameter_set_id); + + sps = &SeqParSet[PicParSet[PicParsetId].seq_parameter_set_id]; + + + // In theory, and with a well-designed software, the lines above + // are everything necessary. In practice, we need to patch many values + // in img-> (but no more in inp-> -- these have been taken care of) + + // Sequence Parameter Set Stuff first + + // printf ("Using Picture Parameter set %d and associated Sequence Parameter Set %d\n", PicParsetId, PicParSet[PicParsetId].seq_parameter_set_id); + + if ((int) sps->pic_order_cnt_type < 0 || sps->pic_order_cnt_type > 2) // != 1 + { + printf ("invalid sps->pic_order_cnt_type = %d\n", sps->pic_order_cnt_type); + error ("pic_order_cnt_type != 1", -1000); + } + + if (sps->pic_order_cnt_type == 1) + { + if(sps->num_ref_frames_in_pic_order_cnt_cycle >= MAXnum_ref_frames_in_pic_order_cnt_cycle) + { + error("num_ref_frames_in_pic_order_cnt_cycle too large",-1011); + } + } + + activate_sps(sps); + activate_pps(pps); + + + // currSlice->dp_mode is set by read_new_slice (NALU first byte available there) + if (pps->entropy_coding_mode_flag == UVLC) + { + nal_startcode_follows = uvlc_startcode_follows; + for (i=0; i<3; i++) + { + img->currentSlice->partArr[i].readSyntaxElement = readSyntaxElement_UVLC; + } + } + else + { + nal_startcode_follows = cabac_startcode_follows; + for (i=0; i<3; i++) + { + img->currentSlice->partArr[i].readSyntaxElement = readSyntaxElement_CABAC; + } + } + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/parset.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/parset.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/parset.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,42 ---- + + /*! + ************************************************************************************** + * \file + * parset.h + * \brief + * Picture and Sequence Parameter Sets, decoder operations + * This code reflects JVT version xxx + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + *************************************************************************************** + */ + #ifndef _PARSET_H_ + #define _PARSET_H_ + + + #include "parsetcommon.h" + #include "nalucommon.h" + + void Scaling_List(int *scalingList, int sizeOfScalingList, Boolean *UseDefaultScalingMatrix, Bitstream *s); + + void InitVUI(seq_parameter_set_rbsp_t *sps); + int ReadVUI(DataPartition *p, seq_parameter_set_rbsp_t *sps); + int ReadHRDParameters(DataPartition *p, hrd_parameters_t *hrd); + + void PPSConsistencyCheck (pic_parameter_set_rbsp_t *pps); + void SPSConsistencyCheck (seq_parameter_set_rbsp_t *sps); + + void MakePPSavailable (int id, pic_parameter_set_rbsp_t *pps); + void MakeSPSavailable (int id, seq_parameter_set_rbsp_t *sps); + + void ProcessSPS (NALU_t *nalu); + void ProcessPPS (NALU_t *nalu); + + void UseParameterSet (int PicParsetId); + + void activate_sps (seq_parameter_set_rbsp_t *sps); + void activate_pps (pic_parameter_set_rbsp_t *pps); + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/parsetcommon.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/parsetcommon.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/parsetcommon.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,218 ---- + + /*! + ************************************************************************************** + * \file + * parset.c + * \brief + * Picture and Sequence Parameter set generation and handling + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + * + ************************************************************************************** + */ + #include + #include + #include + + #include "parsetcommon.h" + #include "memalloc.h" + /*! + ************************************************************************************* + * \brief + * Allocates memory for a picture paramater set + * + * \return + * pointer to a pps + ************************************************************************************* + */ + + pic_parameter_set_rbsp_t *AllocPPS () + { + pic_parameter_set_rbsp_t *p; + + if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL) + no_mem_exit ("AllocPPS: PPS"); + if ((p->slice_group_id = calloc (SIZEslice_group_id, 1)) == NULL) + no_mem_exit ("AllocPPS: slice_group_id"); + return p; + } + + + /*! + ************************************************************************************* + * \brief + * Allocates memory for am sequence paramater set + * + * \return + * pointer to a sps + ************************************************************************************* + */ + + seq_parameter_set_rbsp_t *AllocSPS () + { + seq_parameter_set_rbsp_t *p; + + if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL) + no_mem_exit ("AllocSPS: SPS"); + return p; + } + + + /*! + ************************************************************************************* + * \brief + * Frees a picture parameter set + * + * \param pps to be freed + * Picture parameter set to be freed + ************************************************************************************* + */ + + void FreePPS (pic_parameter_set_rbsp_t *pps) + { + assert (pps != NULL); + if (pps->slice_group_id != NULL) free (pps->slice_group_id); + free (pps); + } + + + /*! + ************************************************************************************* + * \brief + * Frees a sps + * + * \param sps + * Sequence parameter set to be freed + ************************************************************************************* + */ + + void FreeSPS (seq_parameter_set_rbsp_t *sps) + { + assert (sps != NULL); + free (sps); + } + + + int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2) + { + unsigned i; + int equal = 1; + + if ((!sps1->Valid) || (!sps2->Valid)) + return 0; + + equal &= (sps1->profile_idc == sps2->profile_idc); + equal &= (sps1->constrained_set0_flag == sps2->constrained_set0_flag); + equal &= (sps1->constrained_set1_flag == sps2->constrained_set1_flag); + equal &= (sps1->constrained_set2_flag == sps2->constrained_set2_flag); + equal &= (sps1->level_idc == sps2->level_idc); + equal &= (sps1->seq_parameter_set_id == sps2->seq_parameter_set_id); + equal &= (sps1->log2_max_frame_num_minus4 == sps2->log2_max_frame_num_minus4); + equal &= (sps1->pic_order_cnt_type == sps2->pic_order_cnt_type); + + if (!equal) return equal; + + if( sps1->pic_order_cnt_type == 0 ) + { + equal &= (sps1->log2_max_pic_order_cnt_lsb_minus4 == sps2->log2_max_pic_order_cnt_lsb_minus4); + } + + else if( sps1->pic_order_cnt_type == 1 ) + { + equal &= (sps1->delta_pic_order_always_zero_flag == sps2->delta_pic_order_always_zero_flag); + equal &= (sps1->offset_for_non_ref_pic == sps2->offset_for_non_ref_pic); + equal &= (sps1->offset_for_top_to_bottom_field == sps2->offset_for_top_to_bottom_field); + equal &= (sps1->num_ref_frames_in_pic_order_cnt_cycle == sps2->num_ref_frames_in_pic_order_cnt_cycle); + if (!equal) return equal; + + for ( i = 0 ; i< sps1->num_ref_frames_in_pic_order_cnt_cycle ;i ++) + equal &= (sps1->offset_for_ref_frame[i] == sps2->offset_for_ref_frame[i]); + } + + equal &= (sps1->num_ref_frames == sps2->num_ref_frames); + equal &= (sps1->gaps_in_frame_num_value_allowed_flag == sps2->gaps_in_frame_num_value_allowed_flag); + equal &= (sps1->pic_width_in_mbs_minus1 == sps2->pic_width_in_mbs_minus1); + equal &= (sps1->pic_height_in_map_units_minus1 == sps2->pic_height_in_map_units_minus1); + equal &= (sps1->frame_mbs_only_flag == sps2->frame_mbs_only_flag); + + if (!equal) return equal; + if( !sps1->frame_mbs_only_flag ) + equal &= (sps1->mb_adaptive_frame_field_flag == sps2->mb_adaptive_frame_field_flag); + + equal &= (sps1->direct_8x8_inference_flag == sps2->direct_8x8_inference_flag); + equal &= (sps1->frame_cropping_flag == sps2->frame_cropping_flag); + if (!equal) return equal; + if (sps1->frame_cropping_flag) + { + equal &= (sps1->frame_cropping_rect_left_offset == sps2->frame_cropping_rect_left_offset); + equal &= (sps1->frame_cropping_rect_right_offset == sps2->frame_cropping_rect_right_offset); + equal &= (sps1->frame_cropping_rect_top_offset == sps2->frame_cropping_rect_top_offset); + equal &= (sps1->frame_cropping_rect_bottom_offset == sps2->frame_cropping_rect_bottom_offset); + } + equal &= (sps1->vui_parameters_present_flag == sps2->vui_parameters_present_flag); + + return equal; + } + + int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2) + { + unsigned i; + int equal = 1; + + if ((!pps1->Valid) || (!pps2->Valid)) + return 0; + + equal &= (pps1->pic_parameter_set_id == pps2->pic_parameter_set_id); + equal &= (pps1->seq_parameter_set_id == pps2->seq_parameter_set_id); + equal &= (pps1->entropy_coding_mode_flag == pps2->entropy_coding_mode_flag); + equal &= (pps1->pic_order_present_flag == pps2->pic_order_present_flag); + equal &= (pps1->num_slice_groups_minus1 == pps2->num_slice_groups_minus1); + + if (!equal) return equal; + + if (pps1->num_slice_groups_minus1>0) + { + equal &= (pps1->slice_group_map_type == pps2->slice_group_map_type); + if (!equal) return equal; + if (pps1->slice_group_map_type == 0) + { + for (i=0; i<=pps1->num_slice_groups_minus1; i++) + equal &= (pps1->run_length_minus1[i] == pps2->run_length_minus1[i]); + } + else if( pps1->slice_group_map_type == 2 ) + { + for (i=0; inum_slice_groups_minus1; i++) + { + equal &= (pps1->top_left[i] == pps2->top_left[i]); + equal &= (pps1->bottom_right[i] == pps2->bottom_right[i]); + } + } + else if( pps1->slice_group_map_type == 3 || pps1->slice_group_map_type==4 || pps1->slice_group_map_type==5 ) + { + equal &= (pps1->slice_group_change_direction_flag == pps2->slice_group_change_direction_flag); + equal &= (pps1->slice_group_change_rate_minus1 == pps2->slice_group_change_rate_minus1); + } + else if( pps1->slice_group_map_type == 6 ) + { + equal &= (pps1->num_slice_group_map_units_minus1 == pps2->num_slice_group_map_units_minus1); + if (!equal) return equal; + for (i=0; i<=pps1->num_slice_group_map_units_minus1; i++) + equal &= (pps1->slice_group_id[i] == pps2->slice_group_id[i]); + } + } + + equal &= (pps1->num_ref_idx_l0_active_minus1 == pps2->num_ref_idx_l0_active_minus1); + equal &= (pps1->num_ref_idx_l1_active_minus1 == pps2->num_ref_idx_l1_active_minus1); + equal &= (pps1->weighted_pred_flag == pps2->weighted_pred_flag); + equal &= (pps1->weighted_bipred_idc == pps2->weighted_bipred_idc); + equal &= (pps1->pic_init_qp_minus26 == pps2->pic_init_qp_minus26); + equal &= (pps1->pic_init_qs_minus26 == pps2->pic_init_qs_minus26); + equal &= (pps1->chroma_qp_index_offset == pps2->chroma_qp_index_offset); + equal &= (pps1->deblocking_filter_control_present_flag == pps2->deblocking_filter_control_present_flag); + equal &= (pps1->constrained_intra_pred_flag == pps2->constrained_intra_pred_flag); + equal &= (pps1->redundant_pic_cnt_present_flag == pps2->redundant_pic_cnt_present_flag); + + return equal; + } Index: llvm-test/MultiSource/Applications/JM/ldecod/parsetcommon.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/parsetcommon.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/parsetcommon.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,214 ---- + + /*! + ************************************************************************************** + * \file + * parsetcommon.h + * \brief + * Picture and Sequence Parameter Sets, structures common to encoder and decoder + * This code reflects JVT version xxx + * \date 25 November 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + *************************************************************************************** + */ + + + + // In the JVT syntax, frequently flags are used that indicate the presence of + // certain pieces of information in the NALU. Here, these flags are also + // present. In the encoder, those bits indicate that the values signalled to + // be present are meaningful and that this part of the syntax should be + // written to the NALU. In the decoder, the flag indicates that information + // was received from the decoded NALU and should be used henceforth. + // The structure names were chosen as indicated in the JVT syntax + + #ifndef _PARSETCOMMON_H_ + #define _PARSETCOMMON_H_ + + #include "defines.h" + + #define MAXIMUMPARSETRBSPSIZE 1500 + #define MAXIMUMPARSETNALUSIZE 1500 + #define SIZEslice_group_id (sizeof (int) * 60000) // should be sufficient for HUGE pictures, need one int per MB in a picture + + #define MAXSPS 32 + #define MAXPPS 256 + + //! Boolean Type + #ifdef FALSE + # define Boolean int + #else + typedef enum { + FALSE, + TRUE + } Boolean; + #endif + + #define MAXIMUMVALUEOFcpb_cnt 32 + typedef struct + { + unsigned cpb_cnt_minus1; // ue(v) + unsigned bit_rate_scale; // u(4) + unsigned cpb_size_scale; // u(4) + unsigned bit_rate_value_minus1 [MAXIMUMVALUEOFcpb_cnt]; // ue(v) + unsigned cpb_size_value_minus1 [MAXIMUMVALUEOFcpb_cnt]; // ue(v) + unsigned cbr_flag [MAXIMUMVALUEOFcpb_cnt]; // u(1) + unsigned initial_cpb_removal_delay_length_minus1; // u(5) + unsigned cpb_removal_delay_length_minus1; // u(5) + unsigned dpb_output_delay_length_minus1; // u(5) + unsigned time_offset_length; // u(5) + } hrd_parameters_t; + + + typedef struct + { + Boolean aspect_ratio_info_present_flag; // u(1) + unsigned aspect_ratio_idc; // u(8) + unsigned sar_width; // u(16) + unsigned sar_height; // u(16) + Boolean overscan_info_present_flag; // u(1) + Boolean overscan_appropriate_flag; // u(1) + Boolean video_signal_type_present_flag; // u(1) + unsigned video_format; // u(3) + Boolean video_full_range_flag; // u(1) + Boolean colour_description_present_flag; // u(1) + unsigned colour_primaries; // u(8) + unsigned transfer_characteristics; // u(8) + unsigned matrix_coefficients; // u(8) + Boolean chroma_location_info_present_flag; // u(1) + unsigned chroma_sample_loc_type_top_field; // ue(v) + unsigned chroma_sample_loc_type_bottom_field; // ue(v) + Boolean timing_info_present_flag; // u(1) + unsigned num_units_in_tick; // u(32) + unsigned time_scale; // u(32) + Boolean fixed_frame_rate_flag; // u(1) + Boolean nal_hrd_parameters_present_flag; // u(1) + hrd_parameters_t nal_hrd_parameters; // hrd_paramters_t + Boolean vcl_hrd_parameters_present_flag; // u(1) + hrd_parameters_t vcl_hrd_parameters; // hrd_paramters_t + // if ((nal_hrd_parameters_present_flag || (vcl_hrd_parameters_present_flag)) + Boolean low_delay_hrd_flag; // u(1) + Boolean pic_struct_present_flag; // u(1) + Boolean bitstream_restriction_flag; // u(1) + Boolean motion_vectors_over_pic_boundaries_flag; // u(1) + unsigned max_bytes_per_pic_denom; // ue(v) + unsigned max_bits_per_mb_denom; // ue(v) + unsigned log2_max_mv_length_vertical; // ue(v) + unsigned log2_max_mv_length_horizontal; // ue(v) + unsigned num_reorder_frames; // ue(v) + unsigned max_dec_frame_buffering; // ue(v) + } vui_seq_parameters_t; + + + #define MAXnum_slice_groups_minus1 8 + typedef struct + { + Boolean Valid; // indicates the parameter set is valid + unsigned pic_parameter_set_id; // ue(v) + unsigned seq_parameter_set_id; // ue(v) + Boolean entropy_coding_mode_flag; // u(1) + + Boolean transform_8x8_mode_flag; // u(1) + + Boolean pic_scaling_matrix_present_flag; // u(1) + int pic_scaling_list_present_flag[8]; // u(1) + int ScalingList4x4[6][16]; // se(v) + int ScalingList8x8[2][64]; // se(v) + Boolean UseDefaultScalingMatrix4x4Flag[6]; + Boolean UseDefaultScalingMatrix8x8Flag[2]; + + // if( pic_order_cnt_type < 2 ) in the sequence parameter set + Boolean pic_order_present_flag; // u(1) + unsigned num_slice_groups_minus1; // ue(v) + unsigned slice_group_map_type; // ue(v) + // if( slice_group_map_type = = 0 ) + unsigned run_length_minus1[MAXnum_slice_groups_minus1]; // ue(v) + // else if( slice_group_map_type = = 2 ) + unsigned top_left[MAXnum_slice_groups_minus1]; // ue(v) + unsigned bottom_right[MAXnum_slice_groups_minus1]; // ue(v) + // else if( slice_group_map_type = = 3 || 4 || 5 + Boolean slice_group_change_direction_flag; // u(1) + unsigned slice_group_change_rate_minus1; // ue(v) + // else if( slice_group_map_type = = 6 ) + unsigned num_slice_group_map_units_minus1; // ue(v) + unsigned *slice_group_id; // complete MBAmap u(v) + unsigned num_ref_idx_l0_active_minus1; // ue(v) + unsigned num_ref_idx_l1_active_minus1; // ue(v) + Boolean weighted_pred_flag; // u(1) + unsigned weighted_bipred_idc; // u(2) + int pic_init_qp_minus26; // se(v) + int pic_init_qs_minus26; // se(v) + int chroma_qp_index_offset; // se(v) + + int second_chroma_qp_index_offset; // se(v) + + Boolean deblocking_filter_control_present_flag; // u(1) + Boolean constrained_intra_pred_flag; // u(1) + Boolean redundant_pic_cnt_present_flag; // u(1) + } pic_parameter_set_rbsp_t; + + + #define MAXnum_ref_frames_in_pic_order_cnt_cycle 256 + typedef struct + { + Boolean Valid; // indicates the parameter set is valid + + unsigned profile_idc; // u(8) + Boolean constrained_set0_flag; // u(1) + Boolean constrained_set1_flag; // u(1) + Boolean constrained_set2_flag; // u(1) + Boolean constrained_set3_flag; // u(1) + unsigned level_idc; // u(8) + unsigned seq_parameter_set_id; // ue(v) + unsigned chroma_format_idc; // ue(v) + + Boolean seq_scaling_matrix_present_flag; // u(1) + int seq_scaling_list_present_flag[8]; // u(1) + int ScalingList4x4[6][16]; // se(v) + int ScalingList8x8[2][64]; // se(v) + Boolean UseDefaultScalingMatrix4x4Flag[6]; + Boolean UseDefaultScalingMatrix8x8Flag[2]; + + unsigned bit_depth_luma_minus8; // ue(v) + unsigned bit_depth_chroma_minus8; // ue(v) + + unsigned log2_max_frame_num_minus4; // ue(v) + unsigned pic_order_cnt_type; + // if( pic_order_cnt_type == 0 ) + unsigned log2_max_pic_order_cnt_lsb_minus4; // ue(v) + // else if( pic_order_cnt_type == 1 ) + Boolean delta_pic_order_always_zero_flag; // u(1) + int offset_for_non_ref_pic; // se(v) + int offset_for_top_to_bottom_field; // se(v) + unsigned num_ref_frames_in_pic_order_cnt_cycle; // ue(v) + // for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ ) + int offset_for_ref_frame[MAXnum_ref_frames_in_pic_order_cnt_cycle]; // se(v) + unsigned num_ref_frames; // ue(v) + Boolean gaps_in_frame_num_value_allowed_flag; // u(1) + unsigned pic_width_in_mbs_minus1; // ue(v) + unsigned pic_height_in_map_units_minus1; // ue(v) + Boolean frame_mbs_only_flag; // u(1) + // if( !frame_mbs_only_flag ) + Boolean mb_adaptive_frame_field_flag; // u(1) + Boolean direct_8x8_inference_flag; // u(1) + Boolean frame_cropping_flag; // u(1) + unsigned frame_cropping_rect_left_offset; // ue(v) + unsigned frame_cropping_rect_right_offset; // ue(v) + unsigned frame_cropping_rect_top_offset; // ue(v) + unsigned frame_cropping_rect_bottom_offset; // ue(v) + Boolean vui_parameters_present_flag; // u(1) + vui_seq_parameters_t vui_seq_parameters; // vui_seq_parameters_t + } seq_parameter_set_rbsp_t; + + + pic_parameter_set_rbsp_t *AllocPPS (); + seq_parameter_set_rbsp_t *AllocSPS (); + + void FreePPS (pic_parameter_set_rbsp_t *pps); + void FreeSPS (seq_parameter_set_rbsp_t *sps); + + int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2); + int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2); + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/rtp.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/rtp.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/rtp.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,364 ---- + + /*! + ************************************************************************ + * \file rtp.c + * + * \brief + * Network Adaptation layer for RTP packets + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + ************************************************************************ + */ + + + /*! + + A quick guide to the basics of the RTP decoder implementation + + This module contains the RTP packetization, de-packetization, and the + handling of Parameter Sets, see VCEG-N52 and accompanying documents. + Note: Compound packets are not yet implemented! + + The interface between every NAL (including the RTP NAL) and the VCL is + based on Slices. The slice data structure on which the VCL is working + is defined in the type Slice (in defines.h). This type contains the + various fields of the slice header and a partition array, which itself + contains the data partitions the slice consists of. When data + partitioning is not used, then the whole slice bit string is stored + in partition #0. When individual partitions are missing, this is + indicated by the size of the bit strings in the partition array. + A complete missing slice (e.g. if a Full Slice packet was lost) is + indicated in a similar way. + + part of the slice structure is the error indication (ei-flag). The + Ei-flag is set in such cases in which at least one partition of a slice + is damaged or missing.When data partitioning is used, it can happen that + one partition does not contain any symbols but the ei_flag is cleared, + which indicates the intentional missing of symbols of that partition. + A typical example for this behaviour is the Intra Slice, which does not + have symnbols in its type C partition. + + The VCL requests new data to work on through the call of readSliceRTP(). + This function calls the main state machine of this module in ReadRTPpaacket(). + + ReadRTPpacket assumes, when called, that in an error free environment + a complete slice, consisting of one Full Slice RTP packet, or three Partition + packets of types A, B, C with consecutive sequence numbers, can be read. + It first interprets any trailing SUPP and Parameter Update (Header) packets. + Then it reads one video data packet. Two cases have to be distinguished: + + 1. Type A, or Full Slice packet + In this case, the PictureID and the macroblock mumbers are used to + identify the potential loss of a slice. A slice is lost, when the + StartMB of the newly read slice header is not equal to the current + state of the decoder + 1.1 Loss detected + In this case the last packet is unread (fseek back), and a dummy slice + containing the missing macroblocks is conveyed to the VCL. At the next + call of the NAL, the same packet is read again, but this time no packet + loss is detected by the above algorithm, + 1.2. No loss + In this case it is checked whether a Full Slice packet or a type A data + partition was read + 1.2.1 Full Slice + The Full Slice packet is conveyed to the NAL + 1.2.2 Type A Partition + The function RTPReadDataPartitionedSlice() is called, which collects + the remaining type B, C partitions and handles them appropriately. + + Paraneter Update Packets (aka Header packets) are in an SDP-like syntax + and are interpreted by a simple parser in the function + RTPInterpretParameterSetPacket() + + Each Slice header contaions the information on which parameter set to be used. + The function RTPSetImgInp() copies the information of the relevant parameter + set in the VCL's global variables img-> and inp-> IMPORTANT: any changes + in the semantics of the img-> and inp-> structure members must be represented + in this function as well! + + A note to the stream-buffer data structure: The stream buffer always contains + only the contents of the partition in question, and not the slice/partition + header. Decoding has to start at bitoffset 0 (UVLC) or bytreoffset 0 (CABAC). + + The remaining functions should be self-explanatory. + + */ + + #include "contributors.h" + + #include + #include + #include + #include + #include + + #include "global.h" + #include "errorconcealment.h" + #include "rtp.h" + #include "fmo.h" + #include "sei.h" + #include "memalloc.h" + + + FILE *bits; + + int RTPReadPacket (RTPpacket_t *p, FILE *bits); + + /*! + ************************************************************************ + * \brief + * Opens the bit stream file named fn + * \return + * none + ************************************************************************ + */ + void OpenRTPFile (char *fn) + { + if (NULL == (bits=fopen(fn, "rb"))) + { + snprintf (errortext, ET_SIZE, "Cannot open RTP file '%s'", input->infile); + error(errortext,500); + } + } + + + /*! + ************************************************************************ + * \brief + * Closes the bit stream file + ************************************************************************ + */ + void CloseRTPFile() + { + fclose (bits); + } + + + /*! + ************************************************************************ + * \brief + * Fills nalu->buf and nalu->len with the payload of an RTP packet. + * Other fields in nalu-> remain uninitialized (will be taken care of + * by NALUtoRBSP. + * + * \return + * 4 in case of ok (for compatibility with GetAnnexbNALU) + * 0 if there is nothing any more to read (EOF) + * -1 in case of any error + * + ************************************************************************ + */ + + int GetRTPNALU (NALU_t *nalu) + { + RTPpacket_t *p; + int ret; + + if ((p=malloc (sizeof (RTPpacket_t)))== NULL) + no_mem_exit ("GetRTPNALU-1"); + if ((p->packet=malloc (MAXRTPPACKETSIZE))== NULL) + no_mem_exit ("GetRTPNALU-2"); + if ((p->payload=malloc (MAXRTPPACKETSIZE))== NULL) + no_mem_exit ("GetRTPNALU-3"); + + ret = RTPReadPacket (p, bits); + nalu->forbidden_bit = 1; + nalu->len = 0; + + if (ret < 0) + return -1; + if (ret == 0) + return 0; + + assert (p->paylen < nalu->max_size); + + nalu->len = p->paylen; + memcpy (nalu->buf, p->payload, p->paylen); + nalu->forbidden_bit = (nalu->buf[0]>>7) & 1; + nalu->nal_reference_idc = (nalu->buf[0]>>5) & 3; + nalu->nal_unit_type = (nalu->buf[0]) & 0x1f; + + free (p->payload); + free (p->packet); + free (p); + // printf ("Got an RTP NALU, len %d, first byte %x\n", nalu->len, nalu->buf[0]); + return nalu->len; + } + + + + /*! + ***************************************************************************** + * + * \brief + * DecomposeRTPpacket interprets the RTP packet and writes the various + * structure members of the RTPpacket_t structure + * + * \return + * 0 in case of success + * negative error code in case of failure + * + * \param p + * Caller is responsible to allocate enough memory for the generated payload + * in parameter->payload. Typically a malloc of paclen-12 bytes is sufficient + * + * \par Side effects + * none + * + * \date + * 30 Spetember 2001 + * + * \author + * Stephan Wenger stewe at cs.tu-berlin.de + *****************************************************************************/ + + int DecomposeRTPpacket (RTPpacket_t *p) + + { + // consistency check + assert (p->packlen < 65536 - 28); // IP, UDP headers + assert (p->packlen >= 12); // at least a complete RTP header + assert (p->payload != NULL); + assert (p->packet != NULL); + + // Extract header information + + p->v = p->packet[0] & 0x3; + p->p = (p->packet[0] & 0x4) >> 2; + p->x = (p->packet[0] & 0x8) >> 3; + p->cc = (p->packet[0] & 0xf0) >> 4; + + p->m = p->packet[1] & 0x1; + p->pt = (p->packet[1] & 0xfe) >> 1; + + p->seq = p->packet[2] | (p->packet[3] << 8); + + memcpy (&p->timestamp, &p->packet[4], 4);// change to shifts for unified byte sex + memcpy (&p->ssrc, &p->packet[8], 4);// change to shifts for unified byte sex + + // header consistency checks + if ( (p->v != 2) + || (p->p != 0) + || (p->x != 0) + || (p->cc != 0) ) + { + printf ("DecomposeRTPpacket, RTP header consistency problem, header follows\n"); + DumpRTPHeader (p); + return -1; + } + p->paylen = p->packlen-12; + memcpy (p->payload, &p->packet[12], p->paylen); + return 0; + } + + /*! + ***************************************************************************** + * + * \brief + * DumpRTPHeader is a debug tool that dumps a human-readable interpretation + * of the RTP header + * + * \return + * n.a. + * \param p + * the RTP packet to be dumped, after DecompositeRTPpacket() + * + * \par Side effects + * Debug output to stdout + * + * \date + * 30 Spetember 2001 + * + * \author + * Stephan Wenger stewe at cs.tu-berlin.de + *****************************************************************************/ + + void DumpRTPHeader (RTPpacket_t *p) + + { + int i; + for (i=0; i< 30; i++) + printf ("%02x ", p->packet[i]); + printf ("Version (V): %d\n", p->v); + printf ("Padding (P): %d\n", p->p); + printf ("Extension (X): %d\n", p->x); + printf ("CSRC count (CC): %d\n", p->cc); + printf ("Marker bit (M): %d\n", p->m); + printf ("Payload Type (PT): %d\n", p->pt); + printf ("Sequence Number: %d\n", p->seq); + printf ("Timestamp: %d\n", p->timestamp); + printf ("SSRC: %d\n", p->ssrc); + } + + + /*! + ***************************************************************************** + * + * \brief + * RTPReadPacket reads one packet from file + * + * \return + * 0: EOF + * negative: error + * positive: size of RTP packet in bytes + * + * \param p + * packet data structure, with memory for p->packet allocated + * + * \param bits + * target file + * + * \par Side effects: + * - File pointer in bits moved + * - p->xxx filled by reading and Decomposepacket() + * + * \date + * 04 November, 2001 + * + * \author + * Stephan Wenger, stewe at cs.tu-berlin.de + *****************************************************************************/ + + int RTPReadPacket (RTPpacket_t *p, FILE *bits) + { + int Filepos, intime; + + assert (p != NULL); + assert (p->packet != NULL); + assert (p->payload != NULL); + + Filepos = ftell (bits); + if (4 != fread (&p->packlen,1, 4, bits)) + { + return 0; + } + + if (4 != fread (&intime, 1, 4, bits)) + { + fseek (bits, Filepos, SEEK_SET); + printf ("RTPReadPacket: File corruption, could not read Timestamp, exit\n"); + exit (-1); + } + + assert (p->packlen < MAXRTPPACKETSIZE); + + if (p->packlen != fread (p->packet, 1, p->packlen, bits)) + { + printf ("RTPReadPacket: File corruption, could not read %d bytes\n", p->packlen); + exit (-1); // EOF inidication + } + + if (DecomposeRTPpacket (p) < 0) + { + // this should never happen, hence exit() is ok. We probably do not want to attempt + // to decode a packet that obviously wasn't generated by RTP + printf ("Errors reported by DecomposePacket(), exit\n"); + exit (-700); + } + assert (p->pt == H26LPAYLOADTYPE); + assert (p->ssrc == 0x12345678); + return p->packlen; + } + Index: llvm-test/MultiSource/Applications/JM/ldecod/rtp.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/rtp.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/rtp.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,48 ---- + + /*! + ************************************************************************************* + * \file rtp.h + * + * \brief + * Prototypes for rtp.c + ************************************************************************************* + */ + + #ifndef _RTP_H_ + #define _RTP_H_ + + #include "nalucommon.h" + + #define MAXRTPPAYLOADLEN (65536 - 40) //!< Maximum payload size of an RTP packet */ + #define MAXRTPPACKETSIZE (65536 - 28) //!< Maximum size of an RTP packet incl. header */ + #define H26LPAYLOADTYPE 105 //!< RTP paylaod type fixed here for simplicity*/ + #define H26LSSRC 0x12345678 //!< SSRC, chosen to simplify debugging */ + #define RTP_TR_TIMESTAMP_MULT 1000 //!< should be something like 27 Mhz / 29.97 Hz */ + + typedef struct + { + unsigned int v; //!< Version, 2 bits, MUST be 0x2 + unsigned int p; //!< Padding bit, Padding MUST NOT be used + unsigned int x; //!< Extension, MUST be zero + unsigned int cc; /*!< CSRC count, normally 0 in the absence + of RTP mixers */ + unsigned int m; //!< Marker bit + unsigned int pt; //!< 7 bits, Payload Type, dynamically established + unsigned int seq; /*!< RTP sequence number, incremented by one for + each sent packet */ + unsigned int old_seq; //!< to detect wether packets were lost + unsigned int timestamp; //!< timestamp, 27 MHz for H.264 + unsigned int ssrc; //!< Synchronization Source, chosen randomly + byte * payload; //!< the payload including payload headers + unsigned int paylen; //!< length of payload in bytes + byte * packet; //!< complete packet including header and payload + unsigned int packlen; //!< length of packet, typically paylen+12 + } RTPpacket_t; + + void DumpRTPHeader (RTPpacket_t *p); + + int GetRTPNALU (NALU_t *nalu); + void OpenRTPFile (char *fn); + void CloseRTPFile(); + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/sei.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/sei.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/sei.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1571 ---- + /*! + ************************************************************************ + * \file sei.c + * + * \brief + * Functions to implement SEI messages + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Dong Tian + * - Karsten Suehring + ************************************************************************ + */ + + #include "contributors.h" + + #include + #include + #include + + #include "global.h" + #include "memalloc.h" + #include "sei.h" + #include "vlc.h" + #include "header.h" + #include "mbuffer.h" + #include "parset.h" + + extern int UsedBits; + + extern seq_parameter_set_rbsp_t SeqParSet[MAXSPS]; + + + // #define PRINT_BUFFERING_PERIOD_INFO // uncomment to print buffering period SEI info + // #define PRINT_PCITURE_TIMING_INFO // uncomment to print picture timing SEI info + // #define WRITE_MAP_IMAGE // uncomment to write spare picture map + // #define PRINT_SUBSEQUENCE_INFO // uncomment to print sub-sequence SEI info + // #define PRINT_SUBSEQUENCE_LAYER_CHAR // uncomment to print sub-sequence layer characteristics SEI info + // #define PRINT_SUBSEQUENCE_CHAR // uncomment to print sub-sequence characteristics SEI info + // #define PRINT_SCENE_INFORMATION // uncomment to print scene information SEI info + // #define PRINT_PAN_SCAN_RECT // uncomment to print pan-scan rectangle SEI info + // #define PRINT_RECOVERY_POINT // uncomment to print random access point SEI info + // #define PRINT_FILLER_PAYLOAD_INFO // uncomment to print filler payload SEI info + // #define PRINT_DEC_REF_PIC_MARKING // uncomment to print decoded picture buffer management repetition SEI info + // #define PRINT_RESERVED_INFO // uncomment to print reserved SEI info + // #define PRINT_USER_DATA_UNREGISTERED_INFO // uncomment to print unregistered user data SEI info + // #define PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO // uncomment to print ITU-T T.35 user data SEI info + // #define PRINT_FULL_FRAME_FREEZE_INFO // uncomment to print full-frame freeze SEI info + // #define PRINT_FULL_FRAME_FREEZE_RELEASE_INFO // uncomment to print full-frame freeze release SEI info + // #define PRINT_FULL_FRAME_SNAPSHOT_INFO // uncomment to print full-frame snapshot SEI info + // #define PRINT_PROGRESSIVE_REFINEMENT_END_INFO // uncomment to print Progressive refinement segment start SEI info + // #define PRINT_PROGRESSIVE_REFINEMENT_END_INFO // uncomment to print Progressive refinement segment end SEI info + // #define PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO // uncomment to print Motion-constrained slice group set SEI info + + /*! + ************************************************************************ + * \brief + * Interpret the SEI rbsp + * \param msg + * a pointer that point to the sei message. + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void InterpretSEIMessage(byte* msg, int size, ImageParameters *img) + { + int payload_type = 0; + int payload_size = 0; + int offset = 1; + byte tmp_byte; + do + { + // sei_message(); + payload_type = 0; + tmp_byte = msg[offset++]; + while (tmp_byte == 0xFF) + { + payload_type += 255; + tmp_byte = msg[offset++]; + } + payload_type += tmp_byte; // this is the last byte + + payload_size = 0; + tmp_byte = msg[offset++]; + while (tmp_byte == 0xFF) + { + payload_size += 255; + tmp_byte = msg[offset++]; + } + payload_size += tmp_byte; // this is the last byte + + switch ( payload_type ) // sei_payload( type, size ); + { + case SEI_BUFFERING_PERIOD: + interpret_buffering_period_info( msg+offset, payload_size, img ); + break; + case SEI_PIC_TIMING: + interpret_picture_timing_info( msg+offset, payload_size, img ); + break; + case SEI_PAN_SCAN_RECT: + interpret_pan_scan_rect_info( msg+offset, payload_size, img ); + break; + case SEI_FILLER_PAYLOAD: + interpret_filler_payload_info( msg+offset, payload_size, img ); + break; + case SEI_USER_DATA_REGISTERED_ITU_T_T35: + interpret_user_data_registered_itu_t_t35_info( msg+offset, payload_size, img ); + break; + case SEI_USER_DATA_UNREGISTERED: + interpret_user_data_unregistered_info( msg+offset, payload_size, img ); + break; + case SEI_RECOVERY_POINT: + interpret_recovery_point_info( msg+offset, payload_size, img ); + break; + case SEI_DEC_REF_PIC_MARKING_REPETITION: + interpret_dec_ref_pic_marking_repetition_info( msg+offset, payload_size, img ); + break; + case SEI_SPARE_PIC: + interpret_spare_pic( msg+offset, payload_size, img ); + break; + case SEI_SCENE_INFO: + interpret_scene_information( msg+offset, payload_size, img ); + break; + case SEI_SUB_SEQ_INFO: + interpret_subsequence_info( msg+offset, payload_size, img ); + break; + case SEI_SUB_SEQ_LAYER_CHARACTERISTICS: + interpret_subsequence_layer_characteristics_info( msg+offset, payload_size, img ); + break; + case SEI_SUB_SEQ_CHARACTERISTICS: + interpret_subsequence_characteristics_info( msg+offset, payload_size, img ); + break; + case SEI_FULL_FRAME_FREEZE: + interpret_full_frame_freeze_info( msg+offset, payload_size, img ); + break; + case SEI_FULL_FRAME_FREEZE_RELEASE: + interpret_full_frame_freeze_release_info( msg+offset, payload_size, img ); + break; + case SEI_FULL_FRAME_SNAPSHOT: + interpret_full_frame_snapshot_info( msg+offset, payload_size, img ); + break; + case SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START: + interpret_progressive_refinement_end_info( msg+offset, payload_size, img ); + break; + case SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END: + interpret_progressive_refinement_end_info( msg+offset, payload_size, img ); + break; + case SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET: + interpret_motion_constrained_slice_group_set_info( msg+offset, payload_size, img ); + break; + default: + interpret_reserved_info( msg+offset, payload_size, img ); + break; + } + offset += payload_size; + + } while( msg[offset] != 0x80 ); // more_rbsp_data() msg[offset] != 0x80 + // ignore the trailing bits rbsp_trailing_bits(); + assert(msg[offset] == 0x80); // this is the trailing bits + assert( offset+1 == size ); + } + + + /*! + ************************************************************************ + * \brief + * Interpret the spare picture SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_spare_pic( byte* payload, int size, ImageParameters *img ) + { + int i,x,y; + Bitstream* buf; + int bit0, bit1, bitc, no_bit0; + int target_frame_num; + int num_spare_pics; + int delta_spare_frame_num, CandidateSpareFrameNum, SpareFrameNum = 0; + int ref_area_indicator; + + int m, n, left, right, top, bottom,directx, directy; + byte ***map; + + #ifdef WRITE_MAP_IMAGE + int symbol_size_in_bytes = img->pic_unit_bitsize_on_disk/8; + int j, k, i0, j0, tmp, kk; + char filename[20] = "map_dec.yuv"; + FILE *fp; + imgpel** Y; + static int old_pn=-1; + static int first = 1; + + printf("Spare picture SEI message\n"); + #endif + + UsedBits = 0; + + assert( payload!=NULL); + assert( img!=NULL); + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + target_frame_num = ue_v("SEI: target_frame_num", buf); + + #ifdef WRITE_MAP_IMAGE + printf( "target_frame_num is %d\n", target_frame_num ); + #endif + + num_spare_pics = 1 + ue_v("SEI: num_spare_pics_minus1", buf); + + #ifdef WRITE_MAP_IMAGE + printf( "num_spare_pics is %d\n", num_spare_pics ); + #endif + + get_mem3D(&map, num_spare_pics, img->height/16, img->width/16); + + for (i=0; iheight/16; y++) + for (x=0; xwidth/16; x++) + map[i][y][x] = 0; + break; + case 1: // The map is not compressed + for (y=0; yheight/16; y++) + for (x=0; xwidth/16; x++) + { + map[i][y][x] = u_1("SEI: ref_mb_indicator", buf); + } + break; + case 2: // The map is compressed + //!KS: could not check this function, description is unclear (as stated in Ed. Note) + bit0 = 0; + bit1 = 1; + bitc = bit0; + no_bit0 = -1; + + x = ( img->width/16 - 1 ) / 2; + y = ( img->height/16 - 1 ) / 2; + left = right = x; + top = bottom = y; + directx = 0; + directy = 1; + + for (m=0; mheight/16; m++) + for (n=0; nwidth/16; n++) + { + + if (no_bit0<0) + { + no_bit0 = ue_v("SEI: zero_run_length", buf); + } + if (no_bit0>0) map[i][y][x] = bit0; + else map[i][y][x] = bit1; + no_bit0--; + + // go to the next mb: + if ( directx == -1 && directy == 0 ) + { + if (x > left) x--; + else if (x == 0) + { + y = bottom + 1; + bottom++; + directx = 1; + directy = 0; + } + else if (x == left) + { + x--; + left--; + directx = 0; + directy = 1; + } + } + else if ( directx == 1 && directy == 0 ) + { + if (x < right) x++; + else if (x == img->width/16 - 1) + { + y = top - 1; + top--; + directx = -1; + directy = 0; + } + else if (x == right) + { + x++; + right++; + directx = 0; + directy = -1; + } + } + else if ( directx == 0 && directy == -1 ) + { + if ( y > top) y--; + else if (y == 0) + { + x = left - 1; + left--; + directx = 0; + directy = 1; + } + else if (y == top) + { + y--; + top--; + directx = -1; + directy = 0; + } + } + else if ( directx == 0 && directy == 1 ) + { + if (y < bottom) y++; + else if (y == img->height/16 - 1) + { + x = right+1; + right++; + directx = 0; + directy = -1; + } + else if (y == bottom) + { + y++; + bottom++; + directx = 1; + directy = 0; + } + } + + + } + break; + default: + printf( "Wrong ref_area_indicator %d!\n", ref_area_indicator ); + exit(0); + break; + } + + } // end of num_spare_pics + + #ifdef WRITE_MAP_IMAGE + // begin to write map seq + if ( old_pn != img->number ) + { + old_pn = img->number; + get_mem2Dpel(&Y, img->height, img->width); + if (first) + { + fp = fopen( filename, "wb" ); + first = 0; + } + else + fp = fopen( filename, "ab" ); + assert( fp != NULL ); + for (kk=0; kkheight/16; i++) + for (j=0; j < img->width/16; j++) + { + tmp=map[kk][i][j]==0? img->max_imgpel_value : 0; + for (i0=0; i0<16; i0++) + for (j0=0; j0<16; j0++) + Y[i*16+i0][j*16+j0]=tmp; + } + + // write the map image + for (i=0; i < img->height; i++) + for (j=0; j < img->width; j++) + fwrite(&(Y[i][j]), symbol_size_in_bytes, 1, p_out); + + for (k=0; k < 2; k++) + for (i=0; i < img->height/2; i++) + for (j=0; j < img->width/2; j++) + fwrite(&(img->dc_pred_value), symbol_size_in_bytes, 1, p_out); + } + fclose( fp ); + free_mem2Dpel( Y ); + } + // end of writing map image + #undef WRITE_MAP_IMAGE + #endif + + free_mem3D( map, num_spare_pics ); + + free(buf); + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Sub-sequence information SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_subsequence_info( byte* payload, int size, ImageParameters *img ) + { + Bitstream* buf; + int sub_seq_layer_num, sub_seq_id, first_ref_pic_flag, leading_non_ref_pic_flag, last_pic_flag, + sub_seq_frame_num_flag, sub_seq_frame_num; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + sub_seq_layer_num = ue_v("SEI: sub_seq_layer_num" , buf); + sub_seq_id = ue_v("SEI: sub_seq_id" , buf); + first_ref_pic_flag = u_1 ("SEI: first_ref_pic_flag" , buf); + leading_non_ref_pic_flag = u_1 ("SEI: leading_non_ref_pic_flag", buf); + last_pic_flag = u_1 ("SEI: last_pic_flag" , buf); + sub_seq_frame_num_flag = u_1 ("SEI: sub_seq_frame_num_flag" , buf); + if (sub_seq_frame_num_flag) + { + sub_seq_frame_num = ue_v("SEI: sub_seq_frame_num" , buf); + } + + #ifdef PRINT_SUBSEQUENCE_INFO + printf("Sub-sequence information SEI message\n"); + printf("sub_seq_layer_num = %d\n", sub_seq_layer_num ); + printf("sub_seq_id = %d\n", sub_seq_id); + printf("first_ref_pic_flag = %d\n", first_ref_pic_flag); + printf("leading_non_ref_pic_flag = %d\n", leading_non_ref_pic_flag); + printf("last_pic_flag = %d\n", last_pic_flag); + printf("sub_seq_frame_num_flag = %d\n", sub_seq_frame_num_flag); + if (sub_seq_frame_num_flag) + { + printf("sub_seq_frame_num = %d\n", sub_seq_frame_num); + } + #endif + + free(buf); + #ifdef PRINT_SUBSEQUENCE_INFO + #undef PRINT_SUBSEQUENCE_INFO + #endif + } + + /*! + ************************************************************************ + * \brief + * Interpret the Sub-sequence layer characteristics SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_subsequence_layer_characteristics_info( byte* payload, int size, ImageParameters *img ) + { + Bitstream* buf; + long num_sub_layers, accurate_statistics_flag, average_bit_rate, average_frame_rate; + int i; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + num_sub_layers = 1 + ue_v("SEI: num_sub_layers_minus1", buf); + + #ifdef PRINT_SUBSEQUENCE_LAYER_CHAR + printf("Sub-sequence layer characteristics SEI message\n"); + printf("num_sub_layers_minus1 = %d\n", num_sub_layers - 1); + #endif + + for (i=0; ibitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + sub_seq_layer_num = ue_v("SEI: sub_seq_layer_num", buf); + sub_seq_id = ue_v("SEI: sub_seq_id", buf); + duration_flag = u_1 ("SEI: duration_flag", buf); + + #ifdef PRINT_SUBSEQUENCE_CHAR + printf("Sub-sequence characteristics SEI message\n"); + printf("sub_seq_layer_num = %d\n", sub_seq_layer_num ); + printf("sub_seq_id = %d\n", sub_seq_id); + printf("duration_flag = %d\n", duration_flag); + #endif + + if ( duration_flag ) + { + sub_seq_duration = u_v (32, "SEI: duration_flag", buf); + #ifdef PRINT_SUBSEQUENCE_CHAR + printf("sub_seq_duration = %ld\n", sub_seq_duration); + #endif + } + + average_rate_flag = u_1 ("SEI: average_rate_flag", buf); + + #ifdef PRINT_SUBSEQUENCE_CHAR + printf("average_rate_flag = %d\n", average_rate_flag); + #endif + + if ( average_rate_flag ) + { + accurate_statistics_flag = u_1 ( "SEI: accurate_statistics_flag", buf); + average_bit_rate = u_v (16, "SEI: average_bit_rate", buf); + average_frame_rate = u_v (16, "SEI: average_frame_rate", buf); + + #ifdef PRINT_SUBSEQUENCE_CHAR + printf("accurate_statistics_flag = %d\n", accurate_statistics_flag); + printf("average_bit_rate = %ld\n", average_bit_rate); + printf("average_frame_rate = %ld\n", average_frame_rate); + #endif + } + + num_referenced_subseqs = ue_v("SEI: num_referenced_subseqs", buf); + + #ifdef PRINT_SUBSEQUENCE_CHAR + printf("num_referenced_subseqs = %d\n", num_referenced_subseqs); + #endif + + for (i=0; ibitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + scene_id = ue_v("SEI: scene_id" , buf); + scene_transition_type = ue_v("SEI: scene_transition_type", buf); + if ( scene_transition_type > 3 ) + { + second_scene_id = ue_v("SEI: scene_transition_type", buf);; + } + + #ifdef PRINT_SCENE_INFORMATION + printf("Scene information SEI message\n"); + printf("scene_transition_type = %d\n", scene_transition_type); + printf("scene_id = %d\n", scene_id); + if ( scene_transition_type > 3 ) + { + printf("second_scene_id = %d\n", second_scene_id); + } + #endif + free( buf ); + #ifdef PRINT_SCENE_INFORMATION + #undef PRINT_SCENE_INFORMATION + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Filler payload SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_filler_payload_info( byte* payload, int size, ImageParameters *img ) + { + int payload_cnt = 0; + + while (payload_cnt=16); + + for (offset = 0; offset < 16; offset++) + { + #ifdef PRINT_USER_DATA_UNREGISTERED_INFO + printf("%02x",payload[offset]); + #endif + } + + #ifdef PRINT_USER_DATA_UNREGISTERED_INFO + printf("\n"); + #endif + + while (offset < size) + { + payload_byte = payload[offset]; + offset ++; + #ifdef PRINT_USER_DATA_UNREGISTERED_INFO + printf("Unreg data payload_byte = %d\n", payload_byte); + #endif + } + #ifdef PRINT_USER_DATA_UNREGISTERED_INFO + #undef PRINT_USER_DATA_UNREGISTERED_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the User data registered by ITU-T T.35 SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_user_data_registered_itu_t_t35_info( byte* payload, int size, ImageParameters *img ) + { + int offset = 0; + byte itu_t_t35_country_code, itu_t_t35_country_code_extension_byte, payload_byte; + + itu_t_t35_country_code = payload[offset]; + offset++; + #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + printf("User data registered by ITU-T T.35 SEI message\n"); + printf(" itu_t_t35_country_code = %d \n", itu_t_t35_country_code); + #endif + if(itu_t_t35_country_code == 0xFF) + { + itu_t_t35_country_code_extension_byte = payload[offset]; + offset++; + #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + printf(" ITU_T_T35_COUNTRTY_CODE_EXTENSION_BYTE %d \n", itu_t_t35_country_code_extension_byte); + #endif + } + while (offset < size) + { + payload_byte = payload[offset]; + offset ++; + #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + printf("itu_t_t35 payload_byte = %d\n", payload_byte); + #endif + } + #ifdef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + #undef PRINT_USER_DATA_REGISTERED_ITU_T_T35_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Pan scan rectangle SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_pan_scan_rect_info( byte* payload, int size, ImageParameters *img ) + { + int pan_scan_rect_id, pan_scan_rect_left_offset, pan_scan_rect_right_offset; + int pan_scan_rect_top_offset, pan_scan_rect_bottom_offset; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + pan_scan_rect_id = ue_v("SEI: pan_scan_rect_id", buf); + + pan_scan_rect_left_offset = se_v("SEI: pan_scan_rect_left_offset" , buf); + pan_scan_rect_right_offset = se_v("SEI: pan_scan_rect_right_offset" , buf); + pan_scan_rect_top_offset = se_v("SEI: pan_scan_rect_top_offset" , buf); + pan_scan_rect_bottom_offset = se_v("SEI: pan_scan_rect_bottom_offset", buf); + + #ifdef PRINT_PAN_SCAN_RECT + printf("Pan scan rectangle SEI message\n"); + printf("pan_scan_rect_id = %d\n", pan_scan_rect_id); + printf("pan_scan_rect_left_offset = %d\n", pan_scan_rect_left_offset); + printf("pan_scan_rect_right_offset = %d\n", pan_scan_rect_right_offset); + printf("pan_scan_rect_top_offset = %d\n", pan_scan_rect_top_offset); + printf("pan_scan_rect_bottom_offset = %d\n", pan_scan_rect_bottom_offset); + #endif + free (buf); + #ifdef PRINT_PAN_SCAN_RECT + #undef PRINT_PAN_SCAN_RECT + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Random access point SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_recovery_point_info( byte* payload, int size, ImageParameters *img ) + { + int recovery_frame_cnt, exact_match_flag, broken_link_flag, changing_slice_group_idc; + + + Bitstream* buf; + + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + recovery_frame_cnt = ue_v( "SEI: recovery_frame_cnt" , buf); + exact_match_flag = u_1 ( "SEI: exact_match_flag" , buf); + broken_link_flag = u_1 ( "SEI: broken_link_flag" , buf); + changing_slice_group_idc = u_v ( 2, "SEI: changing_slice_group_idc", buf); + + #ifdef PRINT_RECOVERY_POINT + printf("Recovery point SEI message\n"); + printf("recovery_frame_cnt = %d\n", recovery_frame_cnt); + printf("exact_match_flag = %d\n", exact_match_flag); + printf("broken_link_flag = %d\n", broken_link_flag); + printf("changing_slice_group_idc = %d\n", changing_slice_group_idc); + #endif + free (buf); + #ifdef PRINT_RECOVERY_POINT + #undef PRINT_RECOVERY_POINT + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Decoded Picture Buffer Management Repetition SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_dec_ref_pic_marking_repetition_info( byte* payload, int size, ImageParameters *img ) + { + int original_idr_flag, original_frame_num; + + DecRefPicMarking_t *tmp_drpm; + + DecRefPicMarking_t *old_drpm; + int old_idr_flag , old_no_output_of_prior_pics_flag, old_long_term_reference_flag , old_adaptive_ref_pic_buffering_flag; + + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + original_idr_flag = u_1 ( "SEI: original_idr_flag" , buf); + original_frame_num = ue_v( "SEI: original_frame_num" , buf); + + #ifdef PRINT_DEC_REF_PIC_MARKING + printf("Decoded Picture Buffer Management Repetition SEI message\n"); + printf("original_idr_flag = %d\n", original_idr_flag); + printf("original_frame_num = %d\n", original_frame_num); + #endif + + // we need to save everything that is probably overwritten in dec_ref_pic_marking() + old_drpm = img->dec_ref_pic_marking_buffer; + old_idr_flag = img->idr_flag; + + old_no_output_of_prior_pics_flag = img->no_output_of_prior_pics_flag; + old_long_term_reference_flag = img->long_term_reference_flag; + old_adaptive_ref_pic_buffering_flag = img->adaptive_ref_pic_buffering_flag; + + // set new initial values + img->idr_flag = original_idr_flag; + img->dec_ref_pic_marking_buffer = NULL; + + dec_ref_pic_marking(buf); + + // print out decoded values + #ifdef PRINT_DEC_REF_PIC_MARKING + if (img->idr_flag) + { + printf("no_output_of_prior_pics_flag = %d\n", img->no_output_of_prior_pics_flag); + printf("long_term_reference_flag = %d\n", img->long_term_reference_flag); + } + else + { + printf("adaptive_ref_pic_buffering_flag = %d\n", img->adaptive_ref_pic_buffering_flag); + if (img->adaptive_ref_pic_buffering_flag) + { + tmp_drpm=img->dec_ref_pic_marking_buffer; + while (tmp_drpm != NULL) + { + printf("memory_management_control_operation = %d\n", tmp_drpm->memory_management_control_operation); + + if ((tmp_drpm->memory_management_control_operation==1)||(tmp_drpm->memory_management_control_operation==3)) + { + printf("difference_of_pic_nums_minus1 = %d\n", tmp_drpm->difference_of_pic_nums_minus1); + } + if (tmp_drpm->memory_management_control_operation==2) + { + printf("long_term_pic_num = %d\n", tmp_drpm->long_term_pic_num); + } + if ((tmp_drpm->memory_management_control_operation==3)||(tmp_drpm->memory_management_control_operation==6)) + { + printf("long_term_frame_idx = %d\n", tmp_drpm->long_term_frame_idx); + } + if (tmp_drpm->memory_management_control_operation==4) + { + printf("max_long_term_pic_idx_plus1 = %d\n", tmp_drpm->max_long_term_frame_idx_plus1); + } + tmp_drpm = tmp_drpm->Next; + } + + } + } + #endif + + while (img->dec_ref_pic_marking_buffer) + { + tmp_drpm = img->dec_ref_pic_marking_buffer->Next; + free (tmp_drpm); + } + + // restore old values in img + img->dec_ref_pic_marking_buffer = old_drpm; + img->idr_flag = old_idr_flag; + img->no_output_of_prior_pics_flag = old_no_output_of_prior_pics_flag; + img->long_term_reference_flag = old_long_term_reference_flag; + img->adaptive_ref_pic_buffering_flag = old_adaptive_ref_pic_buffering_flag; + + + free (buf); + #ifdef PRINT_DEC_REF_PIC_MARKING + #undef PRINT_DEC_REF_PIC_MARKING + #endif + } + + /*! + ************************************************************************ + * \brief + * Interpret the Full-frame freeze SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_full_frame_freeze_info( byte* payload, int size, ImageParameters *img ) + { + #ifdef PRINT_FULL_FRAME_FREEZE_INFO + printf("Full-frame freeze SEI message\n"); + if (size) + { + printf("payload size of this message should be zero, but is %d bytes.\n", size); + } + #endif + + #ifdef PRINT_FULL_FRAME_FREEZE_INFO + #undef PRINT_FULL_FRAME_FREEZE_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Full-frame freeze release SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_full_frame_freeze_release_info( byte* payload, int size, ImageParameters *img ) + { + #ifdef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO + printf("Full-frame freeze release SEI message\n"); + if (size) + { + printf("payload size of this message should be zero, but is %d bytes.\n", size); + } + #endif + + #ifdef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO + #undef PRINT_FULL_FRAME_FREEZE_RELEASE_INFO + #endif + } + + /*! + ************************************************************************ + * \brief + * Interpret the Full-frame snapshot SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_full_frame_snapshot_info( byte* payload, int size, ImageParameters *img ) + { + int snapshot_id; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + snapshot_id = ue_v("SEI: snapshot_id", buf); + + #ifdef PRINT_FULL_FRAME_SNAPSHOT_INFO + printf("Full-frame snapshot SEI message\n"); + printf("snapshot_id = %d\n", snapshot_id); + #endif + free (buf); + #ifdef PRINT_FULL_FRAME_SNAPSHOT_INFO + #undef PRINT_FULL_FRAME_SNAPSHOT_INFO + #endif + } + + /*! + ************************************************************************ + * \brief + * Interpret the Progressive refinement segment start SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_progressive_refinement_start_info( byte* payload, int size, ImageParameters *img ) + { + int progressive_refinement_id, num_refinement_steps_minus1; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + progressive_refinement_id = ue_v("SEI: progressive_refinement_id" , buf); + num_refinement_steps_minus1 = ue_v("SEI: num_refinement_steps_minus1", buf); + + #ifdef PRINT_PROGRESSIVE_REFINEMENT_START_INFO + printf("Progressive refinement segment start SEI message\n"); + printf("progressive_refinement_id = %d\n", progressive_refinement_id); + printf("num_refinement_steps_minus1 = %d\n", num_refinement_steps_minus1); + #endif + free (buf); + #ifdef PRINT_PROGRESSIVE_REFINEMENT_START_INFO + #undef PRINT_PROGRESSIVE_REFINEMENT_START_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Progressive refinement segment end SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_progressive_refinement_end_info( byte* payload, int size, ImageParameters *img ) + { + int progressive_refinement_id; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + progressive_refinement_id = ue_v("SEI: progressive_refinement_id" , buf); + + #ifdef PRINT_PROGRESSIVE_REFINEMENT_END_INFO + printf("Progressive refinement segment end SEI message\n"); + printf("progressive_refinement_id = %d\n", progressive_refinement_id); + #endif + free (buf); + #ifdef PRINT_PROGRESSIVE_REFINEMENT_END_INFO + #undef PRINT_PROGRESSIVE_REFINEMENT_END_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Motion-constrained slice group set SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_motion_constrained_slice_group_set_info( byte* payload, int size, ImageParameters *img ) + { + int num_slice_groups_minus1, slice_group_id, exact_match_flag, pan_scan_rect_flag, pan_scan_rect_id; + int i; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + num_slice_groups_minus1 = ue_v("SEI: num_slice_groups_minus1" , buf); + + #ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("Motion-constrained slice group set SEI message\n"); + printf("num_slice_groups_minus1 = %d\n", num_slice_groups_minus1); + #endif + + for (i=0; i<=num_slice_groups_minus1;i++) + { + slice_group_id = ue_v("SEI: slice_group_id" , buf); + #ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("slice_group_id = %d\n", slice_group_id); + #endif + } + + exact_match_flag = u_1("SEI: exact_match_flag" , buf); + pan_scan_rect_flag = u_1("SEI: pan_scan_rect_flag" , buf); + + #ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("exact_match_flag = %d\n", exact_match_flag); + printf("pan_scan_rect_flag = %d\n", pan_scan_rect_flag); + #endif + + if (pan_scan_rect_flag) + { + pan_scan_rect_id = ue_v("SEI: pan_scan_rect_id" , buf); + #ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + printf("pan_scan_rect_id = %d\n", pan_scan_rect_id); + #endif + } + + free (buf); + #ifdef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + #undef PRINT_MOTION_CONST_SLICE_GROUP_SET_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Reserved SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_reserved_info( byte* payload, int size, ImageParameters *img ) + { + int offset = 0; + byte payload_byte; + + #ifdef PRINT_RESERVED_INFO + printf("Reserved SEI message\n"); + #endif + assert (size<16); + + while (offset < size) + { + payload_byte = payload[offset]; + offset ++; + #ifdef PRINT_RESERVED_INFO + printf("reserved_sei_message_payload_byte = %d\n", payload_byte); + #endif + } + #ifdef PRINT_RESERVED_INFO + #undef PRINT_RESERVED_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Buffering period SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_buffering_period_info( byte* payload, int size, ImageParameters *img ) + { + int seq_parameter_set_id, initial_cpb_removal_delay, initial_cpb_removal_delay_offset; + unsigned int k; + + Bitstream* buf; + seq_parameter_set_rbsp_t *sps; + + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + seq_parameter_set_id = ue_v("SEI: seq_parameter_set_id" , buf); + + sps = &SeqParSet[seq_parameter_set_id]; + + activate_sps(sps); + + #ifdef PRINT_BUFFERING_PERIOD_INFO + printf("Buffering period SEI message\n"); + printf("seq_parameter_set_id = %d\n", seq_parameter_set_id); + #endif + + // Note: NalHrdBpPresentFlag and CpbDpbDelaysPresentFlag can also be set "by some means not specified in this Recommendation | International Standard" + if (sps->vui_parameters_present_flag) + { + + if (sps->vui_seq_parameters.nal_hrd_parameters_present_flag) + { + for (k=0; kvui_seq_parameters.nal_hrd_parameters.cpb_cnt_minus1+1; k++) + { + initial_cpb_removal_delay = u_v(sps->vui_seq_parameters.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay" , buf); + initial_cpb_removal_delay_offset = u_v(sps->vui_seq_parameters.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay_offset" , buf); + + #ifdef PRINT_BUFFERING_PERIOD_INFO + printf("nal initial_cpb_removal_delay[%d] = %d\n", k, initial_cpb_removal_delay); + printf("nal initial_cpb_removal_delay_offset[%d] = %d\n", k, initial_cpb_removal_delay_offset); + #endif + } + } + + if (sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + for (k=0; kvui_seq_parameters.vcl_hrd_parameters.cpb_cnt_minus1+1; k++) + { + initial_cpb_removal_delay = u_v(sps->vui_seq_parameters.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay" , buf); + initial_cpb_removal_delay_offset = u_v(sps->vui_seq_parameters.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1+1, "SEI: initial_cpb_removal_delay_offset" , buf); + + #ifdef PRINT_BUFFERING_PERIOD_INFO + printf("vcl initial_cpb_removal_delay[%d] = %d\n", k, initial_cpb_removal_delay); + printf("vcl initial_cpb_removal_delay_offset[%d] = %d\n", k, initial_cpb_removal_delay_offset); + #endif + } + } + } + + free (buf); + #ifdef PRINT_BUFFERING_PERIOD_INFO + #undef PRINT_BUFFERING_PERIOD_INFO + #endif + } + + + /*! + ************************************************************************ + * \brief + * Interpret the Picture timing SEI message + * \param payload + * a pointer that point to the sei payload + * \param size + * the size of the sei message + * \param img + * the image pointer + * + ************************************************************************ + */ + void interpret_picture_timing_info( byte* payload, int size, ImageParameters *img ) + { + int cpb_removal_delay, dpb_output_delay, picture_structure_present_flag, picture_structure; + int clock_time_stamp_flag; + int ct_type, nuit_field_based_flag, counting_type, full_timestamp_flag, discontinuity_flag, cnt_dropped_flag, nframes; + int seconds_value, minutes_value, hours_value, seconds_flag, minutes_flag, hours_flag, time_offset; + int NumClockTs = 0; + int i; + + int cpb_removal_len = 24; + int dpb_output_len = 24; + + Boolean CpbDpbDelaysPresentFlag; + + Bitstream* buf; + + buf = malloc(sizeof(Bitstream)); + buf->bitstream_length = size; + buf->streamBuffer = payload; + buf->frame_bitoffset = 0; + + UsedBits = 0; + + if (NULL==active_sps) + { + fprintf (stderr, "Warning: no active SPS, timing SEI cannot be parsed\n"); + return; + } + + #ifdef PRINT_PCITURE_TIMING_INFO + printf("Picture timing SEI message\n"); + #endif + + // CpbDpbDelaysPresentFlag can also be set "by some means not specified in this Recommendation | International Standard" + CpbDpbDelaysPresentFlag = (active_sps->vui_parameters_present_flag + && ( (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag != 0) + ||(active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag != 0))); + + if (CpbDpbDelaysPresentFlag ) + { + if (active_sps->vui_parameters_present_flag) + { + if (active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag) + { + cpb_removal_len = active_sps->vui_seq_parameters.nal_hrd_parameters.cpb_removal_delay_length_minus1 + 1; + dpb_output_len = active_sps->vui_seq_parameters.nal_hrd_parameters.dpb_output_delay_length_minus1 + 1; + } + else if (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag) + { + cpb_removal_len = active_sps->vui_seq_parameters.vcl_hrd_parameters.cpb_removal_delay_length_minus1 + 1; + dpb_output_len = active_sps->vui_seq_parameters.vcl_hrd_parameters.dpb_output_delay_length_minus1 + 1; + } + } + + if ((active_sps->vui_seq_parameters.nal_hrd_parameters_present_flag)|| + (active_sps->vui_seq_parameters.vcl_hrd_parameters_present_flag)) + { + cpb_removal_delay = u_v(cpb_removal_len, "SEI: cpb_removal_delay" , buf); + dpb_output_delay = u_v(dpb_output_len, "SEI: dpb_output_delay" , buf); + #ifdef PRINT_PCITURE_TIMING_INFO + printf("cpb_removal_delay = %d\n",cpb_removal_delay); + printf("dpb_output_delay = %d\n",dpb_output_delay); + #endif + } + } + + if (!active_sps->vui_parameters_present_flag) + { + picture_structure_present_flag = 0; + } + else + { + picture_structure_present_flag = active_sps->vui_seq_parameters.pic_struct_present_flag; + } + + if (picture_structure_present_flag) + { + picture_structure = u_v(4, "SEI: pic_struct" , buf); + #ifdef PRINT_PCITURE_TIMING_INFO + printf("picture_structure = %d\n",picture_structure); + #endif + switch (picture_structure) + { + case 0: + case 1: + case 2: + NumClockTs = 1; + break; + case 3: + case 4: + case 7: + NumClockTs = 2; + break; + case 5: + case 6: + case 8: + NumClockTs = 3; + break; + default: + error("reserved picture_structure used (can't determine NumClockTs)", 500); + } + for (i=0; ivui_seq_parameters.nal_hrd_parameters.time_offset_length) //!KS which HRD params shall be used? + { + time_offset=0; + // time_offset = i_v(active_sps->vui_seq_parameters.nal_hrd_parameters.time_offset_length, "SEI: time_offset" , buf); + #ifdef PRINT_PCITURE_TIMING_INFO + printf("time_offset = %d\n",time_offset); + #endif + } + } + } + } + + free (buf); + #ifdef PRINT_PCITURE_TIMING_INFO + #undef PRINT_PCITURE_TIMING_INFO + #endif + } Index: llvm-test/MultiSource/Applications/JM/ldecod/sei.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/sei.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/sei.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,65 ---- + + /*! + ************************************************************************************* + * \file sei.h + * + * \brief + * Prototypes for sei.c + ************************************************************************************* + */ + + #ifndef SEI_H + #define SEI_H + + typedef enum { + SEI_BUFFERING_PERIOD = 0, + SEI_PIC_TIMING, + SEI_PAN_SCAN_RECT, + SEI_FILLER_PAYLOAD, + SEI_USER_DATA_REGISTERED_ITU_T_T35, + SEI_USER_DATA_UNREGISTERED, + SEI_RECOVERY_POINT, + SEI_DEC_REF_PIC_MARKING_REPETITION, + SEI_SPARE_PIC, + SEI_SCENE_INFO, + SEI_SUB_SEQ_INFO, + SEI_SUB_SEQ_LAYER_CHARACTERISTICS, + SEI_SUB_SEQ_CHARACTERISTICS, + SEI_FULL_FRAME_FREEZE, + SEI_FULL_FRAME_FREEZE_RELEASE, + SEI_FULL_FRAME_SNAPSHOT, + SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START, + SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END, + SEI_MOTION_CONSTRAINED_SLICE_GROUP_SET, + SEI_FILM_GRAIN_CHARACTERISTICS, + SEI_DEBLOCKING_FILTER_DISPLAY_PREFERENCE, + SEI_STEREO_VIDEO_INFO, + + SEI_MAX_ELEMENTS //!< number of maximum syntax elements + } SEI_type; + + #define MAX_FN 256 + + void InterpretSEIMessage(byte* msg, int size, ImageParameters *img); + void interpret_spare_pic( byte* payload, int size, ImageParameters *img ); + void interpret_subsequence_info( byte* payload, int size, ImageParameters *img ); + void interpret_subsequence_layer_characteristics_info( byte* payload, int size, ImageParameters *img ); + void interpret_subsequence_characteristics_info( byte* payload, int size, ImageParameters *img ); + void interpret_scene_information( byte* payload, int size, ImageParameters *img ); // JVT-D099 + void interpret_user_data_registered_itu_t_t35_info( byte* payload, int size, ImageParameters *img ); + void interpret_user_data_unregistered_info( byte* payload, int size, ImageParameters *img ); + void interpret_pan_scan_rect_info( byte* payload, int size, ImageParameters *img ); + void interpret_recovery_point_info( byte* payload, int size, ImageParameters *img ); + void interpret_filler_payload_info( byte* payload, int size, ImageParameters *img ); + void interpret_dec_ref_pic_marking_repetition_info( byte* payload, int size, ImageParameters *img ); + void interpret_full_frame_freeze_info( byte* payload, int size, ImageParameters *img ); + void interpret_full_frame_freeze_release_info( byte* payload, int size, ImageParameters *img ); + void interpret_full_frame_snapshot_info( byte* payload, int size, ImageParameters *img ); + void interpret_progressive_refinement_start_info( byte* payload, int size, ImageParameters *img ); + void interpret_progressive_refinement_end_info( byte* payload, int size, ImageParameters *img ); + void interpret_motion_constrained_slice_group_set_info( byte* payload, int size, ImageParameters *img ); + void interpret_reserved_info( byte* payload, int size, ImageParameters *img ); + void interpret_buffering_period_info( byte* payload, int size, ImageParameters *img ); + void interpret_picture_timing_info( byte* payload, int size, ImageParameters *img ); + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/transform8x8.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/transform8x8.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/transform8x8.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1035 ---- + + /*! + *************************************************************************** + * \file transform8x8.c + * + * \brief + * 8x8 transform functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Yuri Vatis + * - Jan Muenster + * + * \date + * 12. October 2003 + ************************************************************************** + */ + + #include + #include "transform8x8.h" + + + + #define Q_BITS_8 16 + #define DQ_BITS_8 6 + #define DQ_ROUND_8 (1<<(DQ_BITS_8-1)) + + + + + static const int quant_coef8[6][8][8] = + { + { + {13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222}, + {12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428}, + {16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481}, + {12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428}, + {13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222}, + {12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428}, + {16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481}, + {12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428} + }, + { + {11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058}, + {11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826}, + {14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290}, + {11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826}, + {11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058}, + {11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826}, + {14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290}, + {11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826} + }, + { + {10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675}, + {9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943}, + {12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985}, + {9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943}, + {10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675}, + {9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943}, + {12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985}, + {9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943} + }, + { + {9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931}, + {8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228}, + {11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259}, + {8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228}, + {9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931}, + {8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228}, + {11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259}, + {8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228} + }, + { + {8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740}, + {7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346}, + {10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777}, + {7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346}, + {8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740}, + {7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346}, + {10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777}, + {7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346} + }, + { + {7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830}, + {6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428}, + {9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640}, + {6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428}, + {7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830}, + {6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428}, + {9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640}, + {6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428} + } + }; + + + + static const int dequant_coef8[6][8][8] = + { + { + {20, 19, 25, 19, 20, 19, 25, 19}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {25, 24, 32, 24, 25, 24, 32, 24}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {20, 19, 25, 19, 20, 19, 25, 19}, + {19, 18, 24, 18, 19, 18, 24, 18}, + {25, 24, 32, 24, 25, 24, 32, 24}, + {19, 18, 24, 18, 19, 18, 24, 18} + }, + { + {22, 21, 28, 21, 22, 21, 28, 21}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {22, 21, 28, 21, 22, 21, 28, 21}, + {21, 19, 26, 19, 21, 19, 26, 19}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {21, 19, 26, 19, 21, 19, 26, 19} + }, + { + {26, 24, 33, 24, 26, 24, 33, 24}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {33, 31, 42, 31, 33, 31, 42, 31}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {26, 24, 33, 24, 26, 24, 33, 24}, + {24, 23, 31, 23, 24, 23, 31, 23}, + {33, 31, 42, 31, 33, 31, 42, 31}, + {24, 23, 31, 23, 24, 23, 31, 23} + }, + { + {28, 26, 35, 26, 28, 26, 35, 26}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {35, 33, 45, 33, 35, 33, 45, 33}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {28, 26, 35, 26, 28, 26, 35, 26}, + {26, 25, 33, 25, 26, 25, 33, 25}, + {35, 33, 45, 33, 35, 33, 45, 33}, + {26, 25, 33, 25, 26, 25, 33, 25} + }, + { + {32, 30, 40, 30, 32, 30, 40, 30}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {40, 38, 51, 38, 40, 38, 51, 38}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {32, 30, 40, 30, 32, 30, 40, 30}, + {30, 28, 38, 28, 30, 28, 38, 28}, + {40, 38, 51, 38, 40, 38, 51, 38}, + {30, 28, 38, 28, 30, 28, 38, 28} + }, + { + {36, 34, 46, 34, 36, 34, 46, 34}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {46, 43, 58, 43, 46, 43, 58, 43}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {36, 34, 46, 34, 36, 34, 46, 34}, + {34, 32, 43, 32, 34, 32, 43, 32}, + {46, 43, 58, 43, 46, 43, 58, 43}, + {34, 32, 43, 32, 34, 32, 43, 32} + } + + }; + + + + #ifdef _NEW_8x8_ARRAYS_INCLUDED_ + //! single scan pattern + const byte SNGL_SCAN8x8[64][2] = { + {0,0}, {1,0}, {0,1}, {0,2}, {1,1}, {2,0}, {3,0}, {2,1}, {1,2}, {0,3}, {0,4}, {1,3}, {2,2}, {3,1}, {4,0}, {5,0}, + {4,1}, {3,2}, {2,3}, {1,4}, {0,5}, {0,6}, {1,5}, {2,4}, {3,3}, {4,2}, {5,1}, {6,0}, {7,0}, {6,1}, {5,2}, {4,3}, + {3,4}, {2,5}, {1,6}, {0,7}, {1,7}, {2,6}, {3,5}, {4,4}, {5,3}, {6,2}, {7,1}, {7,2}, {6,3}, {5,4}, {4,5}, {3,6}, + {2,7}, {3,7}, {4,6}, {5,5}, {6,4}, {7,3}, {7,4}, {6,5}, {5,6}, {4,7}, {5,7}, {6,6}, {7,5}, {7,6}, {6,7}, {7,7} + }; + + //! field scan pattern + const byte FIELD_SCAN8x8[64][2] = { + {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2}, {2,0}, {2,1}, {1,3}, {0,5}, {0,6}, {1,4}, {2,2}, {3,0}, + {3,1}, {2,3}, {1,5}, {0,7}, {1,6}, {2,4}, {3,2}, {4,0}, {4,1}, {3,3}, {2,5}, {1,7}, {2,6}, {3,4}, {4,2}, {5,0}, + {5,1}, {4,3}, {3,5}, {2,7}, {3,6}, {4,4}, {5,2}, {6,0}, {6,1}, {5,3}, {4,5}, {3,7}, {4,6}, {5,4}, {6,2}, {7,0}, + {7,1}, {6,3}, {5,5}, {4,7}, {5,6}, {6,4}, {7,2}, {7,3}, {6,5}, {5,7}, {6,6}, {7,4}, {7,5}, {6,7}, {7,6}, {7,7} + }; + + + //! array used to find expencive coefficients + const byte COEFF_COST8x8[64] = + { + 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1, + 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + }; + + #endif + + // Notation for comments regarding prediction and predictors. + // The pels of the 4x4 block are labelled a..p. The predictor pels above + // are labelled A..H, from the left I..P, and from above left X, as follows: + // + // Z A B C D E F G H I J K L M N O P + // Q a1 b1 c1 d1 e1 f1 g1 h1 + // R a2 b2 c2 d2 e2 f2 g2 h2 + // S a3 b3 c3 d3 e3 f3 g3 h3 + // T a4 b4 c4 d4 e4 f4 g4 h4 + // U a5 b5 c5 d5 e5 f5 g5 h5 + // V a6 b6 c6 d6 e6 f6 g6 h6 + // W a7 b7 c7 d7 e7 f7 g7 h7 + // X a8 b8 c8 d8 e8 f8 g8 h8 + + + // Predictor array index definitions + #define P_Z (PredPel[0]) + #define P_A (PredPel[1]) + #define P_B (PredPel[2]) + #define P_C (PredPel[3]) + #define P_D (PredPel[4]) + #define P_E (PredPel[5]) + #define P_F (PredPel[6]) + #define P_G (PredPel[7]) + #define P_H (PredPel[8]) + #define P_I (PredPel[9]) + #define P_J (PredPel[10]) + #define P_K (PredPel[11]) + #define P_L (PredPel[12]) + #define P_M (PredPel[13]) + #define P_N (PredPel[14]) + #define P_O (PredPel[15]) + #define P_P (PredPel[16]) + #define P_Q (PredPel[17]) + #define P_R (PredPel[18]) + #define P_S (PredPel[19]) + #define P_T (PredPel[20]) + #define P_U (PredPel[21]) + #define P_V (PredPel[22]) + #define P_W (PredPel[23]) + #define P_X (PredPel[24]) + + /*! + ************************************************************************ + * \brief + * Make intra 8x8 prediction according to all 9 prediction modes. + * The routine uses left and upper neighbouring points from + * previous coded blocks to do this (if available). Notice that + * inaccessible neighbouring points are signalled with a negative + * value in the predmode array . + * + * \par Input: + * Starting point of current 8x8 block image posision + * + ************************************************************************ + */ + int intrapred8x8( struct img_par *img, //!< image parameters + int b8) + + { + int i,j; + int s0; + int PredPel[25]; // array of predictor pels + imgpel **imgY = dec_picture->imgY; // For MB level frame/field coding tools -- set default to imgY + + int mb_nr=img->current_mb_nr; + + PixelPos pix_a[8]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + int img_block_x = (img->mb_x)*4 + 2*(b8%2); + int img_block_y = (img->mb_y)*4 + 2*(b8/2); + int ioff = (b8%2)*8; + int joff = (b8/2)*8; + + byte predmode = img->ipredmode[img_block_x][img_block_y]; + + for (i=0;i<8;i++) + { + getNeighbour(mb_nr, ioff -1 , joff +i , 1, &pix_a[i]); + } + + getNeighbour(mb_nr, ioff , joff -1 , 1, &pix_b); + getNeighbour(mb_nr, ioff +8 , joff -1 , 1, &pix_c); + getNeighbour(mb_nr, ioff -1 , joff -1 , 1, &pix_d); + + pix_c.available = pix_c.available &&!(ioff == 8 && joff == 8); + + if (active_pps->constrained_intra_pred_flag) + { + for (i=0, block_available_left=1; i<8;i++) + block_available_left &= pix_a[i].available ? img->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? img->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? img->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? img->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + // *left_available = block_available_left; + // *up_available = block_available_up; + // *all_available = block_available_up && block_available_left && block_available_up_left; + + // form predictor pels + // form predictor pels + if (block_available_up) + { + P_A = imgY[pix_b.pos_y][pix_b.pos_x+0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x+1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x+2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x+3]; + P_E = imgY[pix_b.pos_y][pix_b.pos_x+4]; + P_F = imgY[pix_b.pos_y][pix_b.pos_x+5]; + P_G = imgY[pix_b.pos_y][pix_b.pos_x+6]; + P_H = imgY[pix_b.pos_y][pix_b.pos_x+7]; + } + else + { + P_A = P_B = P_C = P_D = P_E = P_F = P_G = P_H = img->dc_pred_value; + } + + if (block_available_up_right) + { + P_I = imgY[pix_c.pos_y][pix_c.pos_x+0]; + P_J = imgY[pix_c.pos_y][pix_c.pos_x+1]; + P_K = imgY[pix_c.pos_y][pix_c.pos_x+2]; + P_L = imgY[pix_c.pos_y][pix_c.pos_x+3]; + P_M = imgY[pix_c.pos_y][pix_c.pos_x+4]; + P_N = imgY[pix_c.pos_y][pix_c.pos_x+5]; + P_O = imgY[pix_c.pos_y][pix_c.pos_x+6]; + P_P = imgY[pix_c.pos_y][pix_c.pos_x+7]; + + } + else + { + P_I = P_J = P_K = P_L = P_M = P_N = P_O = P_P = P_H; + } + + if (block_available_left) + { + P_Q = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + P_R = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + P_S = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + P_T = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + P_U = imgY[pix_a[4].pos_y][pix_a[4].pos_x]; + P_V = imgY[pix_a[5].pos_y][pix_a[5].pos_x]; + P_W = imgY[pix_a[6].pos_y][pix_a[6].pos_x]; + P_X = imgY[pix_a[7].pos_y][pix_a[7].pos_x]; + } + else + { + P_Q = P_R = P_S = P_T = P_U = P_V = P_W = P_X = img->dc_pred_value; + } + + if (block_available_up_left) + { + P_Z = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + P_Z = img->dc_pred_value; + } + + LowPassForIntra8x8Pred(&(P_Z), block_available_up_left, block_available_up, block_available_left); + + //img->mpr[x][y] + switch(predmode) + { + case DC_PRED: + s0 = 0; + if (block_available_up && block_available_left) + { + // no edge + s0 = (P_A + P_B + P_C + P_D + P_E + P_F + P_G + P_H + P_Q + P_R + P_S + P_T + P_U + P_V + P_W + P_X + 8) >> 4; + } + else if (!block_available_up && block_available_left) + { + // upper edge + s0 = (P_Q + P_R + P_S + P_T + P_U + P_V + P_W + P_X + 4) >> 3; + } + else if (block_available_up && !block_available_left) + { + // left edge + s0 = (P_A + P_B + P_C + P_D + P_E + P_F + P_G + P_H + 4) >> 3; + } + else //if (!block_available_up && !block_available_left) + { + // top left corner, nothing to predict from + s0 = img->dc_pred_value; + } + for(i = 0; i < 2*BLOCK_SIZE; i++) + for(j = 0; j < 2*BLOCK_SIZE; j++) + img->mpr[i+ioff][j+joff] = s0; + break; + + case VERT_PRED: + if (!block_available_up) + printf ("warning: Intra_8x8_Vertical prediction mode not allowed at mb %d\n",img->current_mb_nr); + + for (i=0; i < 2*BLOCK_SIZE; i++) + { + img->mpr[i+ioff][0+joff] = + img->mpr[i+ioff][1+joff] = + img->mpr[i+ioff][2+joff] = + img->mpr[i+ioff][3+joff] = + img->mpr[i+ioff][4+joff] = + img->mpr[i+ioff][5+joff] = + img->mpr[i+ioff][6+joff] = + img->mpr[i+ioff][7+joff] = (&P_A)[i]; + } + break; + case HOR_PRED: + if (!block_available_left) + printf ("warning: Intra_8x8_Horizontal prediction mode not allowed at mb %d\n",img->current_mb_nr); + + for (j=0; j < 2*BLOCK_SIZE; j++) + { + img->mpr[0+ioff][j+joff] = + img->mpr[1+ioff][j+joff] = + img->mpr[2+ioff][j+joff] = + img->mpr[3+ioff][j+joff] = + img->mpr[4+ioff][j+joff] = + img->mpr[5+ioff][j+joff] = + img->mpr[6+ioff][j+joff] = + img->mpr[7+ioff][j+joff] = (&P_Q)[j]; + } + break; + + case DIAG_DOWN_LEFT_PRED: + if (!block_available_up) + printf ("warning: Intra_8x8_Diagonal_Down_Left prediction mode not allowed at mb %d\n",img->current_mb_nr); + // Mode DIAG_DOWN_LEFT_PRED + img->mpr[0+ioff][0+joff] = (P_A + P_C + 2*(P_B) + 2) >> 2; + img->mpr[0+ioff][1+joff] = + img->mpr[1+ioff][0+joff] = (P_B + P_D + 2*(P_C) + 2) >> 2; + img->mpr[0+ioff][2+joff] = + img->mpr[1+ioff][1+joff] = + img->mpr[2+ioff][0+joff] = (P_C + P_E + 2*(P_D) + 2) >> 2; + img->mpr[0+ioff][3+joff] = + img->mpr[1+ioff][2+joff] = + img->mpr[2+ioff][1+joff] = + img->mpr[3+ioff][0+joff] = (P_D + P_F + 2*(P_E) + 2) >> 2; + img->mpr[0+ioff][4+joff] = + img->mpr[1+ioff][3+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[3+ioff][1+joff] = + img->mpr[4+ioff][0+joff] = (P_E + P_G + 2*(P_F) + 2) >> 2; + img->mpr[0+ioff][5+joff] = + img->mpr[1+ioff][4+joff] = + img->mpr[2+ioff][3+joff] = + img->mpr[3+ioff][2+joff] = + img->mpr[4+ioff][1+joff] = + img->mpr[5+ioff][0+joff] = (P_F + P_H + 2*(P_G) + 2) >> 2; + img->mpr[0+ioff][6+joff] = + img->mpr[1+ioff][5+joff] = + img->mpr[2+ioff][4+joff] = + img->mpr[3+ioff][3+joff] = + img->mpr[4+ioff][2+joff] = + img->mpr[5+ioff][1+joff] = + img->mpr[6+ioff][0+joff] = (P_G + P_I + 2*(P_H) + 2) >> 2; + img->mpr[0+ioff][7+joff] = + img->mpr[1+ioff][6+joff] = + img->mpr[2+ioff][5+joff] = + img->mpr[3+ioff][4+joff] = + img->mpr[4+ioff][3+joff] = + img->mpr[5+ioff][2+joff] = + img->mpr[6+ioff][1+joff] = + img->mpr[7+ioff][0+joff] = (P_H + P_J + 2*(P_I) + 2) >> 2; + img->mpr[1+ioff][7+joff] = + img->mpr[2+ioff][6+joff] = + img->mpr[3+ioff][5+joff] = + img->mpr[4+ioff][4+joff] = + img->mpr[5+ioff][3+joff] = + img->mpr[6+ioff][2+joff] = + img->mpr[7+ioff][1+joff] = (P_I + P_K + 2*(P_J) + 2) >> 2; + img->mpr[2+ioff][7+joff] = + img->mpr[3+ioff][6+joff] = + img->mpr[4+ioff][5+joff] = + img->mpr[5+ioff][4+joff] = + img->mpr[6+ioff][3+joff] = + img->mpr[7+ioff][2+joff] = (P_J + P_L + 2*(P_K) + 2) >> 2; + img->mpr[3+ioff][7+joff] = + img->mpr[4+ioff][6+joff] = + img->mpr[5+ioff][5+joff] = + img->mpr[6+ioff][4+joff] = + img->mpr[7+ioff][3+joff] = (P_K + P_M + 2*(P_L) + 2) >> 2; + img->mpr[4+ioff][7+joff] = + img->mpr[5+ioff][6+joff] = + img->mpr[6+ioff][5+joff] = + img->mpr[7+ioff][4+joff] = (P_L + P_N + 2*(P_M) + 2) >> 2; + img->mpr[5+ioff][7+joff] = + img->mpr[6+ioff][6+joff] = + img->mpr[7+ioff][5+joff] = (P_M + P_O + 2*(P_N) + 2) >> 2; + img->mpr[6+ioff][7+joff] = + img->mpr[7+ioff][6+joff] = (P_N + P_P + 2*(P_O) + 2) >> 2; + img->mpr[7+ioff][7+joff] = (P_O + 3*(P_P) + 2) >> 2; + break; + + case VERT_LEFT_PRED: + if (!block_available_up) + printf ("warning: Intra_4x4_Vertical_Left prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = (P_A + P_B + 1) >> 1; + img->mpr[1+ioff][0+joff] = + img->mpr[0+ioff][2+joff] = (P_B + P_C + 1) >> 1; + img->mpr[2+ioff][0+joff] = + img->mpr[1+ioff][2+joff] = + img->mpr[0+ioff][4+joff] = (P_C + P_D + 1) >> 1; + img->mpr[3+ioff][0+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[1+ioff][4+joff] = + img->mpr[0+ioff][6+joff] = (P_D + P_E + 1) >> 1; + img->mpr[4+ioff][0+joff] = + img->mpr[3+ioff][2+joff] = + img->mpr[2+ioff][4+joff] = + img->mpr[1+ioff][6+joff] = (P_E + P_F + 1) >> 1; + img->mpr[5+ioff][0+joff] = + img->mpr[4+ioff][2+joff] = + img->mpr[3+ioff][4+joff] = + img->mpr[2+ioff][6+joff] = (P_F + P_G + 1) >> 1; + img->mpr[6+ioff][0+joff] = + img->mpr[5+ioff][2+joff] = + img->mpr[4+ioff][4+joff] = + img->mpr[3+ioff][6+joff] = (P_G + P_H + 1) >> 1; + img->mpr[7+ioff][0+joff] = + img->mpr[6+ioff][2+joff] = + img->mpr[5+ioff][4+joff] = + img->mpr[4+ioff][6+joff] = (P_H + P_I + 1) >> 1; + img->mpr[7+ioff][2+joff] = + img->mpr[6+ioff][4+joff] = + img->mpr[5+ioff][6+joff] = (P_I + P_J + 1) >> 1; + img->mpr[7+ioff][4+joff] = + img->mpr[6+ioff][6+joff] = (P_J + P_K + 1) >> 1; + img->mpr[7+ioff][6+joff] = (P_K + P_L + 1) >> 1; + img->mpr[0+ioff][1+joff] = (P_A + P_C + 2*P_B + 2) >> 2; + img->mpr[1+ioff][1+joff] = + img->mpr[0+ioff][3+joff] = (P_B + P_D + 2*P_C + 2) >> 2; + img->mpr[2+ioff][1+joff] = + img->mpr[1+ioff][3+joff] = + img->mpr[0+ioff][5+joff] = (P_C + P_E + 2*P_D + 2) >> 2; + img->mpr[3+ioff][1+joff] = + img->mpr[2+ioff][3+joff] = + img->mpr[1+ioff][5+joff] = + img->mpr[0+ioff][7+joff] = (P_D + P_F + 2*P_E + 2) >> 2; + img->mpr[4+ioff][1+joff] = + img->mpr[3+ioff][3+joff] = + img->mpr[2+ioff][5+joff] = + img->mpr[1+ioff][7+joff] = (P_E + P_G + 2*P_F + 2) >> 2; + img->mpr[5+ioff][1+joff] = + img->mpr[4+ioff][3+joff] = + img->mpr[3+ioff][5+joff] = + img->mpr[2+ioff][7+joff] = (P_F + P_H + 2*P_G + 2) >> 2; + img->mpr[6+ioff][1+joff] = + img->mpr[5+ioff][3+joff] = + img->mpr[4+ioff][5+joff] = + img->mpr[3+ioff][7+joff] = (P_G + P_I + 2*P_H + 2) >> 2; + img->mpr[7+ioff][1+joff] = + img->mpr[6+ioff][3+joff] = + img->mpr[5+ioff][5+joff] = + img->mpr[4+ioff][7+joff] = (P_H + P_J + 2*P_I + 2) >> 2; + img->mpr[7+ioff][3+joff] = + img->mpr[6+ioff][5+joff] = + img->mpr[5+ioff][7+joff] = (P_I + P_K + 2*P_J + 2) >> 2; + img->mpr[7+ioff][5+joff] = + img->mpr[6+ioff][7+joff] = (P_J + P_L + 2*P_K + 2) >> 2; + img->mpr[7+ioff][7+joff] = (P_K + P_M + 2*P_L + 2) >> 2; + break; + + + case DIAG_DOWN_RIGHT_PRED: + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_8x8_Diagonal_Down_Right prediction mode not allowed at mb %d\n",img->current_mb_nr); + + // Mode DIAG_DOWN_RIGHT_PRED + img->mpr[0+ioff][7+joff] = (P_X + P_V + 2*(P_W) + 2) >> 2; + img->mpr[0+ioff][6+joff] = + img->mpr[1+ioff][7+joff] = (P_W + P_U + 2*(P_V) + 2) >> 2; + img->mpr[0+ioff][5+joff] = + img->mpr[1+ioff][6+joff] = + img->mpr[2+ioff][7+joff] = (P_V + P_T + 2*(P_U) + 2) >> 2; + img->mpr[0+ioff][4+joff] = + img->mpr[1+ioff][5+joff] = + img->mpr[2+ioff][6+joff] = + img->mpr[3+ioff][7+joff] = (P_U + P_S + 2*(P_T) + 2) >> 2; + img->mpr[0+ioff][3+joff] = + img->mpr[1+ioff][4+joff] = + img->mpr[2+ioff][5+joff] = + img->mpr[3+ioff][6+joff] = + img->mpr[4+ioff][7+joff] = (P_T + P_R + 2*(P_S) + 2) >> 2; + img->mpr[0+ioff][2+joff] = + img->mpr[1+ioff][3+joff] = + img->mpr[2+ioff][4+joff] = + img->mpr[3+ioff][5+joff] = + img->mpr[4+ioff][6+joff] = + img->mpr[5+ioff][7+joff] = (P_S + P_Q + 2*(P_R) + 2) >> 2; + img->mpr[0+ioff][1+joff] = + img->mpr[1+ioff][2+joff] = + img->mpr[2+ioff][3+joff] = + img->mpr[3+ioff][4+joff] = + img->mpr[4+ioff][5+joff] = + img->mpr[5+ioff][6+joff] = + img->mpr[6+ioff][7+joff] = (P_R + P_Z + 2*(P_Q) + 2) >> 2; + img->mpr[0+ioff][0+joff] = + img->mpr[1+ioff][1+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[3+ioff][3+joff] = + img->mpr[4+ioff][4+joff] = + img->mpr[5+ioff][5+joff] = + img->mpr[6+ioff][6+joff] = + img->mpr[7+ioff][7+joff] = (P_Q + P_A + 2*(P_Z) + 2) >> 2; + img->mpr[1+ioff][0+joff] = + img->mpr[2+ioff][1+joff] = + img->mpr[3+ioff][2+joff] = + img->mpr[4+ioff][3+joff] = + img->mpr[5+ioff][4+joff] = + img->mpr[6+ioff][5+joff] = + img->mpr[7+ioff][6+joff] = (P_Z + P_B + 2*(P_A) + 2) >> 2; + img->mpr[2+ioff][0+joff] = + img->mpr[3+ioff][1+joff] = + img->mpr[4+ioff][2+joff] = + img->mpr[5+ioff][3+joff] = + img->mpr[6+ioff][4+joff] = + img->mpr[7+ioff][5+joff] = (P_A + P_C + 2*(P_B) + 2) >> 2; + img->mpr[3+ioff][0+joff] = + img->mpr[4+ioff][1+joff] = + img->mpr[5+ioff][2+joff] = + img->mpr[6+ioff][3+joff] = + img->mpr[7+ioff][4+joff] = (P_B + P_D + 2*(P_C) + 2) >> 2; + img->mpr[4+ioff][0+joff] = + img->mpr[5+ioff][1+joff] = + img->mpr[6+ioff][2+joff] = + img->mpr[7+ioff][3+joff] = (P_C + P_E + 2*(P_D) + 2) >> 2; + img->mpr[5+ioff][0+joff] = + img->mpr[6+ioff][1+joff] = + img->mpr[7+ioff][2+joff] = (P_D + P_F + 2*(P_E) + 2) >> 2; + img->mpr[6+ioff][0+joff] = + img->mpr[7+ioff][1+joff] = (P_E + P_G + 2*(P_F) + 2) >> 2; + img->mpr[7+ioff][0+joff] = (P_F + P_H + 2*(P_G) + 2) >> 2; + break; + + case VERT_RIGHT_PRED:/* diagonal prediction -22.5 deg to horizontal plane */ + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_8x8_Vertical_Right prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = + img->mpr[1+ioff][2+joff] = + img->mpr[2+ioff][4+joff] = + img->mpr[3+ioff][6+joff] = (P_Z + P_A + 1) >> 1; + img->mpr[1+ioff][0+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[3+ioff][4+joff] = + img->mpr[4+ioff][6+joff] = (P_A + P_B + 1) >> 1; + img->mpr[2+ioff][0+joff] = + img->mpr[3+ioff][2+joff] = + img->mpr[4+ioff][4+joff] = + img->mpr[5+ioff][6+joff] = (P_B + P_C + 1) >> 1; + img->mpr[3+ioff][0+joff] = + img->mpr[4+ioff][2+joff] = + img->mpr[5+ioff][4+joff] = + img->mpr[6+ioff][6+joff] = (P_C + P_D + 1) >> 1; + img->mpr[4+ioff][0+joff] = + img->mpr[5+ioff][2+joff] = + img->mpr[6+ioff][4+joff] = + img->mpr[7+ioff][6+joff] = (P_D + P_E + 1) >> 1; + img->mpr[5+ioff][0+joff] = + img->mpr[6+ioff][2+joff] = + img->mpr[7+ioff][4+joff] = (P_E + P_F + 1) >> 1; + img->mpr[6+ioff][0+joff] = + img->mpr[7+ioff][2+joff] = (P_F + P_G + 1) >> 1; + img->mpr[7+ioff][0+joff] = (P_G + P_H + 1) >> 1; + img->mpr[0+ioff][1+joff] = + img->mpr[1+ioff][3+joff] = + img->mpr[2+ioff][5+joff] = + img->mpr[3+ioff][7+joff] = (P_Q + P_A + 2*P_Z + 2) >> 2; + img->mpr[1+ioff][1+joff] = + img->mpr[2+ioff][3+joff] = + img->mpr[3+ioff][5+joff] = + img->mpr[4+ioff][7+joff] = (P_Z + P_B + 2*P_A + 2) >> 2; + img->mpr[2+ioff][1+joff] = + img->mpr[3+ioff][3+joff] = + img->mpr[4+ioff][5+joff] = + img->mpr[5+ioff][7+joff] = (P_A + P_C + 2*P_B + 2) >> 2; + img->mpr[3+ioff][1+joff] = + img->mpr[4+ioff][3+joff] = + img->mpr[5+ioff][5+joff] = + img->mpr[6+ioff][7+joff] = (P_B + P_D + 2*P_C + 2) >> 2; + img->mpr[4+ioff][1+joff] = + img->mpr[5+ioff][3+joff] = + img->mpr[6+ioff][5+joff] = + img->mpr[7+ioff][7+joff] = (P_C + P_E + 2*P_D + 2) >> 2; + img->mpr[5+ioff][1+joff] = + img->mpr[6+ioff][3+joff] = + img->mpr[7+ioff][5+joff] = (P_D + P_F + 2*P_E + 2) >> 2; + img->mpr[6+ioff][1+joff] = + img->mpr[7+ioff][3+joff] = (P_E + P_G + 2*P_F + 2) >> 2; + img->mpr[7+ioff][1+joff] = (P_F + P_H + 2*P_G + 2) >> 2; + img->mpr[0+ioff][2+joff] = + img->mpr[1+ioff][4+joff] = + img->mpr[2+ioff][6+joff] = (P_R + P_Z + 2*P_Q + 2) >> 2; + img->mpr[0+ioff][3+joff] = + img->mpr[1+ioff][5+joff] = + img->mpr[2+ioff][7+joff] = (P_S + P_Q + 2*P_R + 2) >> 2; + img->mpr[0+ioff][4+joff] = + img->mpr[1+ioff][6+joff] = (P_T + P_R + 2*P_S + 2) >> 2; + img->mpr[0+ioff][5+joff] = + img->mpr[1+ioff][7+joff] = (P_U + P_S + 2*P_T + 2) >> 2; + img->mpr[0+ioff][6+joff] = (P_V + P_T + 2*P_U + 2) >> 2; + img->mpr[0+ioff][7+joff] = (P_W + P_U + 2*P_V + 2) >> 2; + break; + + case HOR_DOWN_PRED:/* diagonal prediction -22.5 deg to horizontal plane */ + if ((!block_available_up)||(!block_available_left)||(!block_available_up_left)) + printf ("warning: Intra_8x8_Horizontal_Down prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = + img->mpr[2+ioff][1+joff] = + img->mpr[4+ioff][2+joff] = + img->mpr[6+ioff][3+joff] = (P_Q + P_Z + 1) >> 1; + img->mpr[0+ioff][1+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[4+ioff][3+joff] = + img->mpr[6+ioff][4+joff] = (P_R + P_Q + 1) >> 1; + img->mpr[0+ioff][2+joff] = + img->mpr[2+ioff][3+joff] = + img->mpr[4+ioff][4+joff] = + img->mpr[6+ioff][5+joff] = (P_S + P_R + 1) >> 1; + img->mpr[0+ioff][3+joff] = + img->mpr[2+ioff][4+joff] = + img->mpr[4+ioff][5+joff] = + img->mpr[6+ioff][6+joff] = (P_T + P_S + 1) >> 1; + img->mpr[0+ioff][4+joff] = + img->mpr[2+ioff][5+joff] = + img->mpr[4+ioff][6+joff] = + img->mpr[6+ioff][7+joff] = (P_U + P_T + 1) >> 1; + img->mpr[0+ioff][5+joff] = + img->mpr[2+ioff][6+joff] = + img->mpr[4+ioff][7+joff] = (P_V + P_U + 1) >> 1; + img->mpr[0+ioff][6+joff] = + img->mpr[2+ioff][7+joff] = (P_W + P_V + 1) >> 1; + img->mpr[0+ioff][7+joff] = (P_X + P_W + 1) >> 1; + img->mpr[1+ioff][0+joff] = + img->mpr[3+ioff][1+joff] = + img->mpr[5+ioff][2+joff] = + img->mpr[7+ioff][3+joff] = (P_Q + P_A + 2*P_Z + 2) >> 2; + img->mpr[1+ioff][1+joff] = + img->mpr[3+ioff][2+joff] = + img->mpr[5+ioff][3+joff] = + img->mpr[7+ioff][4+joff] = (P_Z + P_R + 2*P_Q + 2) >> 2; + img->mpr[1+ioff][2+joff] = + img->mpr[3+ioff][3+joff] = + img->mpr[5+ioff][4+joff] = + img->mpr[7+ioff][5+joff] = (P_Q + P_S + 2*P_R + 2) >> 2; + img->mpr[1+ioff][3+joff] = + img->mpr[3+ioff][4+joff] = + img->mpr[5+ioff][5+joff] = + img->mpr[7+ioff][6+joff] = (P_R + P_T + 2*P_S + 2) >> 2; + img->mpr[1+ioff][4+joff] = + img->mpr[3+ioff][5+joff] = + img->mpr[5+ioff][6+joff] = + img->mpr[7+ioff][7+joff] = (P_S + P_U + 2*P_T + 2) >> 2; + img->mpr[1+ioff][5+joff] = + img->mpr[3+ioff][6+joff] = + img->mpr[5+ioff][7+joff] = (P_T + P_V + 2*P_U + 2) >> 2; + img->mpr[1+ioff][6+joff] = + img->mpr[3+ioff][7+joff] = (P_U + P_W + 2*P_V + 2) >> 2; + img->mpr[1+ioff][7+joff] = (P_V + P_X + 2*P_W + 2) >> 2; + img->mpr[2+ioff][0+joff] = + img->mpr[4+ioff][1+joff] = + img->mpr[6+ioff][2+joff] = (P_Z + P_B + 2*P_A + 2) >> 2; + img->mpr[3+ioff][0+joff] = + img->mpr[5+ioff][1+joff] = + img->mpr[7+ioff][2+joff] = (P_A + P_C + 2*P_B + 2) >> 2; + img->mpr[4+ioff][0+joff] = + img->mpr[6+ioff][1+joff] = (P_B + P_D + 2*P_C + 2) >> 2; + img->mpr[5+ioff][0+joff] = + img->mpr[7+ioff][1+joff] = (P_C + P_E + 2*P_D + 2) >> 2; + img->mpr[6+ioff][0+joff] = (P_D + P_F + 2*P_E + 2) >> 2; + img->mpr[7+ioff][0+joff] = (P_E + P_G + 2*P_F + 2) >> 2; + break; + + case HOR_UP_PRED:/* diagonal prediction -22.5 deg to horizontal plane */ + if (!block_available_left) + printf ("warning: Intra_8x8_Horizontal_Up prediction mode not allowed at mb %d\n",img->current_mb_nr); + + img->mpr[0+ioff][0+joff] = (P_Q + P_R + 1) >> 1; + img->mpr[0+ioff][1+joff] = + img->mpr[2+ioff][0+joff] = (P_R + P_S + 1) >> 1; + img->mpr[0+ioff][2+joff] = + img->mpr[2+ioff][1+joff] = + img->mpr[4+ioff][0+joff] = (P_S + P_T + 1) >> 1; + img->mpr[0+ioff][3+joff] = + img->mpr[2+ioff][2+joff] = + img->mpr[4+ioff][1+joff] = + img->mpr[6+ioff][0+joff] = (P_T + P_U + 1) >> 1; + img->mpr[0+ioff][4+joff] = + img->mpr[2+ioff][3+joff] = + img->mpr[4+ioff][2+joff] = + img->mpr[6+ioff][1+joff] = (P_U + P_V + 1) >> 1; + img->mpr[0+ioff][5+joff] = + img->mpr[2+ioff][4+joff] = + img->mpr[4+ioff][3+joff] = + img->mpr[6+ioff][2+joff] = (P_V + P_W + 1) >> 1; + img->mpr[0+ioff][6+joff] = + img->mpr[2+ioff][5+joff] = + img->mpr[4+ioff][4+joff] = + img->mpr[6+ioff][3+joff] = (P_W + P_X + 1) >> 1; + img->mpr[6+ioff][4+joff] = + img->mpr[7+ioff][4+joff] = + img->mpr[4+ioff][5+joff] = + img->mpr[5+ioff][5+joff] = + img->mpr[6+ioff][5+joff] = + img->mpr[7+ioff][5+joff] = + img->mpr[2+ioff][6+joff] = + img->mpr[3+ioff][6+joff] = + img->mpr[4+ioff][6+joff] = + img->mpr[5+ioff][6+joff] = + img->mpr[6+ioff][6+joff] = + img->mpr[7+ioff][6+joff] = + img->mpr[0+ioff][7+joff] = + img->mpr[1+ioff][7+joff] = + img->mpr[2+ioff][7+joff] = + img->mpr[3+ioff][7+joff] = + img->mpr[4+ioff][7+joff] = + img->mpr[5+ioff][7+joff] = + img->mpr[6+ioff][7+joff] = + img->mpr[7+ioff][7+joff] = P_X; + img->mpr[1+ioff][6+joff] = + img->mpr[3+ioff][5+joff] = + img->mpr[5+ioff][4+joff] = + img->mpr[7+ioff][3+joff] = (P_W + 3*P_X + 2) >> 2; + img->mpr[1+ioff][5+joff] = + img->mpr[3+ioff][4+joff] = + img->mpr[5+ioff][3+joff] = + img->mpr[7+ioff][2+joff] = (P_X + P_V + 2*P_W + 2) >> 2; + img->mpr[1+ioff][4+joff] = + img->mpr[3+ioff][3+joff] = + img->mpr[5+ioff][2+joff] = + img->mpr[7+ioff][1+joff] = (P_W + P_U + 2*P_V + 2) >> 2; + img->mpr[1+ioff][3+joff] = + img->mpr[3+ioff][2+joff] = + img->mpr[5+ioff][1+joff] = + img->mpr[7+ioff][0+joff] = (P_V + P_T + 2*P_U + 2) >> 2; + img->mpr[1+ioff][2+joff] = + img->mpr[3+ioff][1+joff] = + img->mpr[5+ioff][0+joff] = (P_U + P_S + 2*P_T + 2) >> 2; + img->mpr[1+ioff][1+joff] = + img->mpr[3+ioff][0+joff] = (P_T + P_R + 2*P_S + 2) >> 2; + img->mpr[1+ioff][0+joff] = (P_S + P_Q + 2*P_R + 2) >> 2; + break; + + default: + printf("Error: illegal intra_4x4 prediction mode: %d\n",predmode); + return SEARCH_SYNC; + break; + } + return DECODING_OK; + } + + + + /*! + ************************************************************************************* + * \brief + * Prefiltering for Intra8x8 prediction + ************************************************************************************* + */ + void LowPassForIntra8x8Pred(int *PredPel, int block_up_left, int block_up, int block_left) + { + int i; + int LoopArray[25]; + + + for(i = 0; i < 25; i++) + LoopArray[i] = PredPel[i] ; + + if(block_up) + { + if(block_up_left) + { + LoopArray[1] = ((&P_Z)[0] + ((&P_Z)[1]<<1) + (&P_Z)[2] + 2)>>2; + } + else + LoopArray[1] = ((&P_Z)[1] + ((&P_Z)[1]<<1) + (&P_Z)[2] + 2)>>2; + + + for(i = 2; i <16; i++) + { + LoopArray[i] = ((&P_Z)[i-1] + ((&P_Z)[i]<<1) + (&P_Z)[i+1] + 2)>>2; + } + LoopArray[16] = (P_P + (P_P<<1) + P_O + 2)>>2; + } + + if(block_up_left) + { + + if(block_up && block_left) + { + LoopArray[0] = (P_Q + (P_Z<<1) + P_A +2)>>2; + } + else + { + if(block_up) + LoopArray[0] = (P_Z + (P_Z<<1) + P_A +2)>>2; + else + if(block_left) + LoopArray[0] = (P_Z + (P_Z<<1) + P_Q +2)>>2; + } + + } + + if(block_left) + { + if(block_up_left) + LoopArray[17] = (P_Z + (P_Q<<1) + P_R + 2)>>2; + else + LoopArray[17] = (P_Q + (P_Q<<1) + P_R + 2)>>2; + + for(i = 18; i <24; i++) + { + LoopArray[i] = ((&P_Z)[i-1] + ((&P_Z)[i]<<1) + (&P_Z)[i+1] + 2)>>2; + } + LoopArray[24] = (P_W + (P_X<<1) + P_X + 2)>>2; + } + + for(i = 0; i < 25; i++) + PredPel[i] = LoopArray[i]; + } + + + + /*! + *********************************************************************** + * \brief + * Inverse 8x8 transformation + *********************************************************************** + */ + void itrans8x8(struct img_par *img, //!< image parameters + int ioff, //!< index to 4x4 block + int joff) //!< + { + int i,j; + int m6[8][8]; + Boolean lossless_qpprime = ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + + int residue_transform_flag = img->residue_transform_flag; + + + for( i=0; i<8 && !lossless_qpprime; i++) + { + int a[8], b[8]; + a[0] = img->m7[ioff + 0][joff + i] + img->m7[ioff + 4][joff + i]; + a[4] = img->m7[ioff + 0][joff + i] - img->m7[ioff + 4][joff + i]; + a[2] = (img->m7[ioff + 2][joff + i]>>1) - img->m7[ioff + 6][joff + i]; + a[6] = img->m7[ioff + 2][joff + i] + (img->m7[ioff + 6][joff + i]>>1); + + b[0] = a[0] + a[6]; + b[2] = a[4] + a[2]; + b[4] = a[4] - a[2]; + b[6] = a[0] - a[6]; + + a[1] = -img->m7[ioff + 3][joff + i] + img->m7[ioff + 5][joff + i] - img->m7[ioff + 7][joff + i] - (img->m7[ioff + 7][joff + i]>>1); + a[3] = img->m7[ioff + 1][joff + i] + img->m7[ioff + 7][joff + i] - img->m7[ioff + 3][joff + i] - (img->m7[ioff + 3][joff + i]>>1); + a[5] = -img->m7[ioff + 1][joff + i] + img->m7[ioff + 7][joff + i] + img->m7[ioff + 5][joff + i] + (img->m7[ioff + 5][joff + i]>>1); + a[7] = img->m7[ioff + 3][joff + i] + img->m7[ioff + 5][joff + i] + img->m7[ioff + 1][joff + i] + (img->m7[ioff + 1][joff + i]>>1); + + b[1] = a[1] + (a[7]>>2); + // b[7] = -(a[1]>>2 + 0) + a[7]; KS: do we need to add zero? + b[7] = -(a[1]>>2) + a[7]; + b[3] = a[3] + (a[5]>>2); + b[5] = (a[3]>>2) - a[5]; + + m6[0][i] = b[0] + b[7]; + m6[1][i] = b[2] + b[5]; + m6[2][i] = b[4] + b[3]; + m6[3][i] = b[6] + b[1]; + m6[4][i] = b[6] - b[1]; + m6[5][i] = b[4] - b[3]; + m6[6][i] = b[2] - b[5]; + m6[7][i] = b[0] - b[7]; + } + for( i=0; i<8 && !lossless_qpprime; i++) + { + int a[8], b[8]; + a[0] = m6[i][0] + m6[i][4]; + a[4] = m6[i][0] - m6[i][4]; + a[2] = (m6[i][2]>>1) - m6[i][6]; + a[6] = m6[i][2] + (m6[i][6]>>1); + + b[0] = a[0] + a[6]; + b[2] = a[4] + a[2]; + b[4] = a[4] - a[2]; + b[6] = a[0] - a[6]; + + a[1] = -m6[i][3] + m6[i][5] - m6[i][7] - (m6[i][7]>>1); + a[3] = m6[i][1] + m6[i][7] - m6[i][3] - (m6[i][3]>>1); + a[5] = -m6[i][1] + m6[i][7] + m6[i][5] + (m6[i][5]>>1); + a[7] = m6[i][3] + m6[i][5] + m6[i][1] + (m6[i][1]>>1); + + b[1] = a[1] + (a[7]>>2); + b[7] = -(a[1]>>2) + a[7]; + b[3] = a[3] + (a[5]>>2); + b[5] = (a[3]>>2) - a[5]; + + img->m7[ioff + i][joff + 0] = b[0] + b[7]; + img->m7[ioff + i][joff + 1] = b[2] + b[5]; + img->m7[ioff + i][joff + 2] = b[4] + b[3]; + img->m7[ioff + i][joff + 3] = b[6] + b[1]; + img->m7[ioff + i][joff + 4] = b[6] - b[1]; + img->m7[ioff + i][joff + 5] = b[4] - b[3]; + img->m7[ioff + i][joff + 6] = b[2] - b[5]; + img->m7[ioff + i][joff + 7] = b[0] - b[7]; + } + for( i=0; i<8; i++) + { + for( j=0; j<8; j++) + { + // Residue Color Transform + if(!residue_transform_flag) + { + if(lossless_qpprime) + img->m7[i+ioff][j+joff] =min(img->max_imgpel_value,max(0,img->m7[ioff + i][joff + j]+(long)img->mpr[i+ioff][j+joff])); + else + img->m7[i+ioff][j+joff] =min(img->max_imgpel_value,max(0,(img->m7[ioff + i][joff + j]+((long)img->mpr[i+ioff][j+joff] << DQ_BITS_8)+DQ_ROUND_8)>>DQ_BITS_8)); + } + else + { + if(lossless_qpprime) + img->m7[i+ioff][j+joff] = img->m7[ioff + i][joff + j]; + else + img->m7[i+ioff][j+joff] =(img->m7[ioff + i][joff + j]+DQ_ROUND_8)>>DQ_BITS_8; + } + } + } + } Index: llvm-test/MultiSource/Applications/JM/ldecod/transform8x8.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/transform8x8.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/transform8x8.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,38 ---- + /*! + *************************************************************************** + * + * \file transform8x8.h + * + * \brief + * prototypes of 8x8 transform functions + * + * \date + * 9. October 2003 + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Yuri Vatis vatis at hhi.de + **************************************************************************/ + + #ifndef _TRANSFORM8X8_H_ + #define _TRANSFORM8X8_H_ + + #include "global.h" + #include "image.h" + #include "mb_access.h" + #include "elements.h" + #include + + + int **cofAC8x8_intra, ****cofAC8x8_iintra; // [level/run][scan_pos] + + + void intrapred_luma8x8(int img_x,int img_y, int *left_available, int *up_available, int *all_available); + int intrapred8x8(struct img_par *img, int b8); + void itrans8x8(struct img_par *img, int ioff, int joff); + double RDCost_for_8x8IntraBlocks(int *c_nz, int b8, int ipmode, double lambda, double min_rdcost, int mostProbableMode); + int dct_luma8x8(int block_x,int block_y,int *coeff_cost); + void LowPassForIntra8x8Pred(int *PredPel, int block_up_left, int block_up, int block_left); + + + #endif Index: llvm-test/MultiSource/Applications/JM/ldecod/vlc.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/vlc.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/vlc.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1368 ---- + + /*! + ************************************************************************ + * \file vlc.c + * + * \brief + * VLC support functions + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Lang?y + * - Detlev Marpe + * - Gabi Blaettermann + ************************************************************************ + */ + #include "contributors.h" + + #include + #include + #include + #include + + #include "global.h" + #include "vlc.h" + #include "elements.h" + + + // A little trick to avoid those horrible #if TRACE all over the source code + #if TRACE + #define SYMTRACESTRING(s) strncpy(sym->tracestring,s,TRACESTRING_SIZE) + #else + #define SYMTRACESTRING(s) // do nothing + #endif + + extern void tracebits(const char *trace_str, int len, int info,int value1); + + + int UsedBits; // for internal statistics, is adjusted by se_v, ue_v, u_1 + + // Note that all NA values are filled with 0 + + //! for the linfo_levrun_inter routine + const byte NTAB1[4][8][2] = + { + {{1,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,1},{1,2},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{2,0},{1,3},{1,4},{1,5},{0,0},{0,0},{0,0},{0,0}}, + {{3,0},{2,1},{2,2},{1,6},{1,7},{1,8},{1,9},{4,0}}, + }; + const byte LEVRUN1[16]= + { + 4,2,2,1,1,1,1,1,1,1,0,0,0,0,0,0, + }; + + + const byte NTAB2[4][8][2] = + { + {{1,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,1},{2,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,2},{3,0},{4,0},{5,0},{0,0},{0,0},{0,0},{0,0}}, + {{1,3},{1,4},{2,1},{3,1},{6,0},{7,0},{8,0},{9,0}}, + }; + + //! for the linfo_levrun__c2x2 routine + const byte LEVRUN3[4] = + { + 2,1,0,0 + }; + const byte NTAB3[2][2][2] = + { + {{1,0},{0,0}}, + {{2,0},{1,1}}, + }; + + /*! + ************************************************************************************* + * \brief + * ue_v, reads an ue(v) syntax element, the length in bits is stored in + * the global UsedBits variable + * + * \param tracestring + * the string for the trace file + * + * \param bitstream + * the stream to be read from + * + * \return + * the value of the coded syntax element + * + ************************************************************************************* + */ + int ue_v (char *tracestring, Bitstream *bitstream) + { + SyntaxElement symbol, *sym=&symbol; + + assert (bitstream->streamBuffer != NULL); + sym->type = SE_HEADER; + sym->mapping = linfo_ue; // Mapping rule + SYMTRACESTRING(tracestring); + readSyntaxElement_VLC (sym, bitstream); + UsedBits+=sym->len; + return sym->value1; + } + + + /*! + ************************************************************************************* + * \brief + * ue_v, reads an se(v) syntax element, the length in bits is stored in + * the global UsedBits variable + * + * \param tracestring + * the string for the trace file + * + * \param bitstream + * the stream to be read from + * + * \return + * the value of the coded syntax element + * + ************************************************************************************* + */ + int se_v (char *tracestring, Bitstream *bitstream) + { + SyntaxElement symbol, *sym=&symbol; + + assert (bitstream->streamBuffer != NULL); + sym->type = SE_HEADER; + sym->mapping = linfo_se; // Mapping rule: signed integer + SYMTRACESTRING(tracestring); + readSyntaxElement_VLC (sym, bitstream); + UsedBits+=sym->len; + return sym->value1; + } + + + /*! + ************************************************************************************* + * \brief + * ue_v, reads an u(v) syntax element, the length in bits is stored in + * the global UsedBits variable + * + * \param LenInBits + * length of the syntax element + * + * \param tracestring + * the string for the trace file + * + * \param bitstream + * the stream to be read from + * + * \return + * the value of the coded syntax element + * + ************************************************************************************* + */ + int u_v (int LenInBits, char*tracestring, Bitstream *bitstream) + { + SyntaxElement symbol, *sym=&symbol; + + assert (bitstream->streamBuffer != NULL); + sym->type = SE_HEADER; + sym->mapping = linfo_ue; // Mapping rule + sym->len = LenInBits; + SYMTRACESTRING(tracestring); + readSyntaxElement_FLC (sym, bitstream); + UsedBits+=sym->len; + return sym->inf; + }; + + + /*! + ************************************************************************************* + * \brief + * ue_v, reads an u(1) syntax element, the length in bits is stored in + * the global UsedBits variable + * + * \param tracestring + * the string for the trace file + * + * \param bitstream + * the stream to be read from + * + * \return + * the value of the coded syntax element + * + ************************************************************************************* + */ + int u_1 (char *tracestring, Bitstream *bitstream) + { + return u_v (1, tracestring, bitstream); + } + + + + /*! + ************************************************************************ + * \brief + * mapping rule for ue(v) syntax elements + * \par Input: + * lenght and info + * \par Output: + * number in the code table + ************************************************************************ + */ + void linfo_ue(int len, int info, int *value1, int *dummy) + { + *value1 = (int)pow(2,(len/2))+info-1; // *value1 = (int)(2<<(len>>1))+info-1; + } + + /*! + ************************************************************************ + * \brief + * mapping rule for se(v) syntax elements + * \par Input: + * lenght and info + * \par Output: + * signed mvd + ************************************************************************ + */ + void linfo_se(int len, int info, int *value1, int *dummy) + { + int n; + n = (int)pow(2,(len/2))+info-1; + *value1 = (n+1)/2; + if((n & 0x01)==0) // lsb is signed bit + *value1 = -*value1; + } + + + /*! + ************************************************************************ + * \par Input: + * length and info + * \par Output: + * cbp (intra) + ************************************************************************ + */ + void linfo_cbp_intra(int len,int info,int *cbp, int *dummy) + { + extern const byte NCBP[2][48][2]; + int cbp_idx; + + linfo_ue(len,info,&cbp_idx,dummy); + *cbp=NCBP[active_sps->chroma_format_idc?1:0][cbp_idx][0]; + } + + /*! + ************************************************************************ + * \par Input: + * length and info + * \par Output: + * cbp (inter) + ************************************************************************ + */ + void linfo_cbp_inter(int len,int info,int *cbp, int *dummy) + { + extern const byte NCBP[2][48][2]; + int cbp_idx; + + linfo_ue(len,info,&cbp_idx,dummy); + *cbp=NCBP[active_sps->chroma_format_idc?1:0][cbp_idx][1]; + } + + /*! + ************************************************************************ + * \par Input: + * length and info + * \par Output: + * level, run + ************************************************************************ + */ + void linfo_levrun_inter(int len, int info, int *level, int *irun) + { + int l2; + int inf; + if (len<=9) + { + l2=max(0,len/2-1); + inf=info/2; + *level=NTAB1[l2][inf][0]; + *irun=NTAB1[l2][inf][1]; + if ((info&0x01)==1) + *level=-*level; // make sign + } + else // if len > 9, skip using the array + { + *irun=(info&0x1e)>>1; + *level = LEVRUN1[*irun] + info/32 + (int)pow(2,len/2 - 5); + if ((info&0x01)==1) + *level=-*level; + } + if (len == 1) // EOB + *level = 0; + } + + + /*! + ************************************************************************ + * \par Input: + * length and info + * \par Output: + * level, run + ************************************************************************ + */ + void linfo_levrun_c2x2(int len, int info, int *level, int *irun) + { + int l2; + int inf; + + if (len<=5) + { + l2=max(0,len/2-1); + inf=info/2; + *level=NTAB3[l2][inf][0]; + *irun=NTAB3[l2][inf][1]; + if ((info&0x01)==1) + *level=-*level; // make sign + } + else // if len > 5, skip using the array + { + *irun=(info&0x06)>>1; + *level = LEVRUN3[*irun] + info/8 + (int)pow(2,len/2 - 3); + if ((info&0x01)==1) + *level=-*level; + } + if (len == 1) // EOB + *level = 0; + } + + /*! + ************************************************************************ + * \brief + * read next UVLC codeword from UVLC-partition and + * map it to the corresponding syntax element + ************************************************************************ + */ + int readSyntaxElement_VLC(SyntaxElement *sym, Bitstream *currStream) + { + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + + sym->len = GetVLCSymbol (buf, frame_bitoffset, &(sym->inf), BitstreamLengthInBytes); + if (sym->len == -1) + return -1; + currStream->frame_bitoffset += sym->len; + sym->mapping(sym->len,sym->inf,&(sym->value1),&(sym->value2)); + + #if TRACE + tracebits(sym->tracestring, sym->len, sym->inf, sym->value1); + #endif + + return 1; + } + + + /*! + ************************************************************************ + * \brief + * read next UVLC codeword from UVLC-partition and + * map it to the corresponding syntax element + ************************************************************************ + */ + int readSyntaxElement_UVLC(SyntaxElement *sym, struct img_par *img, struct inp_par *inp, struct datapartition *dP) + { + Bitstream *currStream = dP->bitstream; + + return (readSyntaxElement_VLC(sym, currStream)); + } + + /*! + ************************************************************************ + * \brief + * read next VLC codeword for 4x4 Intra Prediction Mode and + * map it to the corresponding Intra Prediction Direction + ************************************************************************ + */ + int readSyntaxElement_Intra4x4PredictionMode(SyntaxElement *sym, struct img_par *img, struct inp_par *inp, struct datapartition *dP) + { + Bitstream *currStream = dP->bitstream; + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + + sym->len = GetVLCSymbol_IntraMode (buf, frame_bitoffset, &(sym->inf), BitstreamLengthInBytes); + + if (sym->len == -1) + return -1; + + currStream->frame_bitoffset += sym->len; + sym->value1 = sym->len == 1 ? -1 : sym->inf; + + #if TRACE + tracebits2(sym->tracestring, sym->len, sym->value1); + #endif + + return 1; + } + + int GetVLCSymbol_IntraMode (byte buffer[],int totbitoffset,int *info, int bytecount) + { + + register int inf; + long byteoffset; // byte from start of buffer + int bitoffset; // bit from start of byte + int ctr_bit=0; // control bit for current bit posision + int bitcounter=1; + int len; + int info_bit; + + byteoffset = totbitoffset/8; + bitoffset = 7-(totbitoffset%8); + ctr_bit = (buffer[byteoffset] & (0x01< bytecount) + { + return -1; + } + inf=(inf<<1); + if(buffer[byteoffset] & (0x01<<(bitoffset))) + inf |=1; + } + + *info = inf; + return bitcounter; // return absolute offset in bit from start of frame + } + + + /*! + ************************************************************************ + * \brief + * test if bit buffer contains only stop bit + * + * \param buffer + * buffer containing VLC-coded data bits + * \param totbitoffset + * bit offset from start of partition + * \param bytecount + * buffer length + * \return + * true if more bits available + ************************************************************************ + */ + int more_rbsp_data (byte buffer[],int totbitoffset,int bytecount) + { + + long byteoffset; // byte from start of buffer + int bitoffset; // bit from start of byte + int ctr_bit=0; // control bit for current bit posision + + int cnt=0; + + + byteoffset= totbitoffset/8; + bitoffset= 7-(totbitoffset%8); + + assert (byteoffset=0) + { + ctr_bit = (buffer[byteoffset] & (0x01<0) cnt++; + bitoffset--; + } + + return (0!=cnt); + + } + + + /*! + ************************************************************************ + * \brief + * Check if there are symbols for the next MB + ************************************************************************ + */ + int uvlc_startcode_follows(struct img_par *img, struct inp_par *inp, int dummy) + { + int dp_Nr = assignSE2partition[img->currentSlice->dp_mode][SE_MBTYPE]; + DataPartition *dP = &(img->currentSlice->partArr[dp_Nr]); + Bitstream *currStream = dP->bitstream; + byte *buf = currStream->streamBuffer; + + //KS: new function test for End of Buffer + return (!(more_rbsp_data(buf, currStream->frame_bitoffset,currStream->bitstream_length))); + } + + + + /*! + ************************************************************************ + * \brief + * read one exp-golomb VLC symbol + * + * \param buffer + * containing VLC-coded data bits + * \param totbitoffset + * bit offset from start of partition + * \param info + * returns the value of the symbol + * \param bytecount + * buffer length + * \return + * bits read + ************************************************************************ + */ + int GetVLCSymbol (byte buffer[],int totbitoffset,int *info, int bytecount) + { + + register int inf; + long byteoffset; // byte from start of buffer + int bitoffset; // bit from start of byte + int ctr_bit=0; // control bit for current bit posision + int bitcounter=1; + int len; + int info_bit; + + byteoffset= totbitoffset/8; + bitoffset= 7-(totbitoffset%8); + ctr_bit = (buffer[byteoffset] & (0x01< bytecount) + { + return -1; + } + inf=(inf<<1); + if(buffer[byteoffset] & (0x01<<(bitoffset))) + inf |=1; + } + + *info = inf; + return bitcounter; // return absolute offset in bit from start of frame + } + + extern void tracebits2(const char *trace_str, int len, int info) ; + + /*! + ************************************************************************ + * \brief + * code from bitstream (2d tables) + ************************************************************************ + */ + + int code_from_bitstream_2d(SyntaxElement *sym, + DataPartition *dP, + int *lentab, + int *codtab, + int tabwidth, + int tabheight, + int *code) + { + Bitstream *currStream = dP->bitstream; + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + + int i,j; + int len, cod; + + // this VLC decoding method is not optimized for speed + for (j = 0; j < tabheight; j++) { + for (i = 0; i < tabwidth; i++) + { + len = lentab[i]; + if (!len) + continue; + cod = codtab[i]; + + if ((ShowBits(buf, frame_bitoffset, BitstreamLengthInBytes, len) == cod)) + { + sym->value1 = i; + sym->value2 = j; + currStream->frame_bitoffset += len; // move bitstream pointer + sym->len = len; + goto found_code; + } + } + lentab += tabwidth; + codtab += tabwidth; + } + + return -1; // failed to find code + + found_code: + + *code = cod; + + return 0; + } + + + /*! + ************************************************************************ + * \brief + * read FLC codeword from UVLC-partition + ************************************************************************ + */ + int readSyntaxElement_FLC(SyntaxElement *sym, Bitstream *currStream) + { + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + + if ((GetBits(buf, frame_bitoffset, &(sym->inf), BitstreamLengthInBytes, sym->len)) < 0) + return -1; + + currStream->frame_bitoffset += sym->len; // move bitstream pointer + sym->value1 = sym->inf; + + #if TRACE + tracebits2(sym->tracestring, sym->len, sym->inf); + #endif + + return 1; + } + + + + /*! + ************************************************************************ + * \brief + * read NumCoeff/TrailingOnes codeword from UVLC-partition + ************************************************************************ + */ + + int readSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *sym, DataPartition *dP, + char *type) + { + Bitstream *currStream = dP->bitstream; + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + + int vlcnum, retval; + int code, *ct, *lt; + + int lentab[3][4][17] = + { + { // 0702 + { 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16}, + { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16}, + { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16}, + { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16}, + }, + { + { 2, 6, 6, 7, 8, 8, 9,11,11,12,12,12,13,13,13,14,14}, + { 0, 2, 5, 6, 6, 7, 8, 9,11,11,12,12,13,13,14,14,14}, + { 0, 0, 3, 6, 6, 7, 8, 9,11,11,12,12,13,13,13,14,14}, + { 0, 0, 0, 4, 4, 5, 6, 6, 7, 9,11,11,12,13,13,13,14}, + }, + { + { 4, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,10}, + { 0, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9,10,10,10}, + { 0, 0, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,10}, + { 0, 0, 0, 4, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9,10,10,10}, + }, + + }; + + int codtab[3][4][17] = + { + { + { 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7,4}, + { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10,6}, + { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9,5}, + { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12,8}, + }, + { + { 3,11, 7, 7, 7, 4, 7,15,11,15,11, 8,15,11, 7, 9,7}, + { 0, 2, 7,10, 6, 6, 6, 6,14,10,14,10,14,10,11, 8,6}, + { 0, 0, 3, 9, 5, 5, 5, 5,13, 9,13, 9,13, 9, 6,10,5}, + { 0, 0, 0, 5, 4, 6, 8, 4, 4, 4,12, 8,12,12, 8, 1,4}, + }, + { + {15,15,11, 8,15,11, 9, 8,15,11,15,11, 8,13, 9, 5,1}, + { 0,14,15,12,10, 8,14,10,14,14,10,14,10, 7,12, 8,4}, + { 0, 0,13,14,11, 9,13, 9,13,10,13, 9,13, 9,11, 7,3}, + { 0, 0, 0,12,11,10, 9, 8,13,12,12,12, 8,12,10, 6,2}, + }, + }; + + vlcnum = sym->value1; + // vlcnum is the index of Table used to code coeff_token + // vlcnum==3 means (8<=nC) which uses 6bit FLC + + if (vlcnum == 3) + { + // read 6 bit FLC + code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBytes, 6); + currStream->frame_bitoffset += 6; + sym->value2 = code & 3; + sym->value1 = (code >> 2); + + if (!sym->value1 && sym->value2 == 3) + { + // #c = 0, #t1 = 3 => #c = 0 + sym->value2 = 0; + } + else + sym->value1++; + + sym->len = 6; + + retval = 0; + } + else + + { + lt = &lentab[vlcnum][0][0]; + ct = &codtab[vlcnum][0][0]; + retval = code_from_bitstream_2d(sym, dP, lt, ct, 17, 4, &code); + } + + if (retval) + { + printf("ERROR: failed to find NumCoeff/TrailingOnes\n"); + exit(-1); + } + + #if TRACE + snprintf(sym->tracestring, + TRACESTRING_SIZE, "%s # c & tr.1s vlc=%d #c=%d #t1=%d", + type, vlcnum, sym->value1, sym->value2); + tracebits2(sym->tracestring, sym->len, code); + + #endif + + return retval; + } + + + /*! + ************************************************************************ + * \brief + * read NumCoeff/TrailingOnes codeword from UVLC-partition ChromaDC + ************************************************************************ + */ + int readSyntaxElement_NumCoeffTrailingOnesChromaDC(SyntaxElement *sym, DataPartition *dP) + { + int retval; + int code, *ct, *lt; + + int lentab[3][4][17] = + { + //YUV420 + {{ 2, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 3, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV422 + {{ 1, 7, 7, 9, 9,10,11,12,13, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 7, 9,10,11,12,12, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 3, 7, 7, 9,10,11,12, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 5, 6, 7, 7,10,11, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV444 + {{ 1, 6, 8, 9,10,11,13,13,13,14,14,15,15,16,16,16,16}, + { 0, 2, 6, 8, 9,10,11,13,13,14,14,15,15,15,16,16,16}, + { 0, 0, 3, 7, 8, 9,10,11,13,13,14,14,15,15,16,16,16}, + { 0, 0, 0, 5, 6, 7, 8, 9,10,11,13,14,14,15,15,16,16}} + }; + + int codtab[3][4][17] = + { + //YUV420 + {{ 1, 7, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV422 + {{ 1,15,14, 7, 6, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1,13,12, 5, 6, 6, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 1,11,10, 4, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 1, 1, 9, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}}, + //YUV444 + {{ 1, 5, 7, 7, 7, 7,15,11, 8,15,11,15,11,15,11, 7, 4}, + { 0, 1, 4, 6, 6, 6, 6,14,10,14,10,14,10, 1,14,10, 6}, + { 0, 0, 1, 5, 5, 5, 5, 5,13, 9,13, 9,13, 9,13, 9, 5}, + { 0, 0, 0, 3, 3, 4, 4, 4, 4, 4,12,12, 8,12, 8,12, 8}} + + }; + int yuv = active_sps->chroma_format_idc - 1; + //ADD-VG-14052004-END + + lt = &lentab[yuv][0][0]; + ct = &codtab[yuv][0][0]; + + retval = code_from_bitstream_2d(sym, dP, lt, ct, 17, 4, &code); + + if (retval) + { + printf("ERROR: failed to find NumCoeff/TrailingOnes ChromaDC\n"); + exit(-1); + } + + + #if TRACE + snprintf(sym->tracestring, + TRACESTRING_SIZE, "ChrDC # c & tr.1s #c=%d #t1=%d", + sym->value1, sym->value2); + tracebits2(sym->tracestring, sym->len, code); + + #endif + + return retval; + } + + + + + /*! + ************************************************************************ + * \brief + * read Level VLC0 codeword from UVLC-partition + ************************************************************************ + */ + int readSyntaxElement_Level_VLC0(SyntaxElement *sym, struct datapartition *dP) + { + Bitstream *currStream = dP->bitstream; + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + int len, sign=0, level=0, code; + int offset, addbit; + + len = 0; + while (!ShowBits(buf, frame_bitoffset+len, BitstreamLengthInBytes, 1)) + len++; + + len++; + code = 1; + frame_bitoffset += len; + + if (len < 15) + { + sign = (len - 1) & 1; + level = (len-1) / 2 + 1; + } + else if (len == 15) + { + // escape code + code = (code << 4) | ShowBits(buf, frame_bitoffset, BitstreamLengthInBytes, 4); + len += 4; + frame_bitoffset += 4; + sign = (code & 1); + level = ((code >> 1) & 0x7) + 8; + } + else if (len >= 16) + { + // escape code + addbit=len-16; + code = ShowBits(buf, frame_bitoffset, BitstreamLengthInBytes, (len-4)); + len = (len-4); + frame_bitoffset += len; + sign = (code & 1); + + offset=(2048<> 1) + offset; + code |= (1 << (len)); // for display purpose only + len += addbit + 16; + } + + if (sign) + level = -level; + + sym->inf = level; + sym->len = len; + + #if TRACE + tracebits2(sym->tracestring, sym->len, code); + #endif + currStream->frame_bitoffset = frame_bitoffset; + return 0; + + } + + /*! + ************************************************************************ + * \brief + * read Level VLC codeword from UVLC-partition + ************************************************************************ + */ + int readSyntaxElement_Level_VLCN(SyntaxElement *sym, int vlc, struct datapartition *dP) + { + + Bitstream *currStream = dP->bitstream; + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + + int levabs, sign; + int len = 0; + int code, sb; + + int numPrefix; + int shift = vlc-1; + int escape = (15< suffix + if (vlc-1) + { + sb = ShowBits(buf, frame_bitoffset+len, BitstreamLengthInBytes, vlc-1); + code = (code << (vlc-1) )| sb; + levabs += sb; + len += (vlc-1); + } + + // read 1 bit -> sign + sign = ShowBits(buf, frame_bitoffset+len, BitstreamLengthInBytes, 1); + code = (code << 1)| sign; + len ++; + } + else // escape + { + addbit = numPrefix - 15; + + sb = ShowBits(buf, frame_bitoffset+len, BitstreamLengthInBytes, (11+addbit)); + code = (code << (11+addbit) )| sb; + + len += (11+addbit); + offset = (2048< sign + sign = ShowBits(buf, frame_bitoffset+len, BitstreamLengthInBytes, 1); + code = (code << 1)| sign; + len++; + } + + sym->inf = (sign)?-levabs:levabs; + sym->len = len; + + currStream->frame_bitoffset = frame_bitoffset+len; + + #if TRACE + tracebits2(sym->tracestring, sym->len, code); + #endif + + return 0; + } + + /*! + ************************************************************************ + * \brief + * read Total Zeros codeword from UVLC-partition + ************************************************************************ + */ + int readSyntaxElement_TotalZeros(SyntaxElement *sym, DataPartition *dP) + { + int vlcnum, retval; + int code, *ct, *lt; + + int lentab[TOTRUN_NUM][16] = + { + + { 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, + { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, + { 4,3,3,3,4,4,3,3,4,5,5,6,5,6}, + { 5,3,4,4,3,3,3,4,3,4,5,5,5}, + { 4,4,4,3,3,3,3,3,4,5,4,5}, + { 6,5,3,3,3,3,3,3,4,3,6}, + { 6,5,3,3,3,2,3,4,3,6}, + { 6,4,5,3,2,2,3,3,6}, + { 6,6,4,2,2,3,2,5}, + { 5,5,3,2,2,2,4}, + { 4,4,3,3,1,3}, + { 4,4,2,1,3}, + { 3,3,1,2}, + { 2,2,1}, + { 1,1}, + }; + + int codtab[TOTRUN_NUM][16] = + { + {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, + {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, + {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, + {3,7,5,4,6,5,4,3,3,2,2,1,0}, + {5,4,3,7,6,5,4,3,2,1,1,0}, + {1,1,7,6,5,4,3,2,1,1,0}, + {1,1,5,4,3,3,2,1,1,0}, + {1,1,1,3,3,2,2,1,0}, + {1,0,1,3,2,1,1,1,}, + {1,0,1,3,2,1,1,}, + {0,1,1,2,1,3}, + {0,1,1,1,1}, + {0,1,1,1}, + {0,1,1}, + {0,1}, + }; + vlcnum = sym->value1; + + lt = &lentab[vlcnum][0]; + ct = &codtab[vlcnum][0]; + + retval = code_from_bitstream_2d(sym, dP, lt, ct, 16, 1, &code); + + if (retval) + { + printf("ERROR: failed to find Total Zeros\n"); + exit(-1); + } + + + #if TRACE + tracebits2(sym->tracestring, sym->len, code); + + #endif + + return retval; + } + + /*! + ************************************************************************ + * \brief + * read Total Zeros Chroma DC codeword from UVLC-partition + ************************************************************************ + */ + int readSyntaxElement_TotalZerosChromaDC(SyntaxElement *sym, DataPartition *dP) + { + int vlcnum, retval; + int code, *ct, *lt; + + int lentab[3][TOTRUN_NUM][16] = + { + //YUV420 + {{ 1,2,3,3}, + { 1,2,2}, + { 1,1}}, + //YUV422 + {{ 1,3,3,4,4,4,5,5}, + { 3,2,3,3,3,3,3}, + { 3,3,2,2,3,3}, + { 3,2,2,2,3}, + { 2,2,2,2}, + { 2,2,1}, + { 1,1}}, + //YUV444 + {{ 1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, + { 3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, + { 4,3,3,3,4,4,3,3,4,5,5,6,5,6}, + { 5,3,4,4,3,3,3,4,3,4,5,5,5}, + { 4,4,4,3,3,3,3,3,4,5,4,5}, + { 6,5,3,3,3,3,3,3,4,3,6}, + { 6,5,3,3,3,2,3,4,3,6}, + { 6,4,5,3,2,2,3,3,6}, + { 6,6,4,2,2,3,2,5}, + { 5,5,3,2,2,2,4}, + { 4,4,3,3,1,3}, + { 4,4,2,1,3}, + { 3,3,1,2}, + { 2,2,1}, + { 1,1}} + }; + + int codtab[3][TOTRUN_NUM][16] = + { + //YUV420 + {{ 1,1,1,0}, + { 1,1,0}, + { 1,0}}, + //YUV422 + {{ 1,2,3,2,3,1,1,0}, + { 0,1,1,4,5,6,7}, + { 0,1,1,2,6,7}, + { 6,0,1,2,7}, + { 0,1,2,3}, + { 0,1,1}, + { 0,1}}, + //YUV444 + {{1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, + {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, + {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, + {3,7,5,4,6,5,4,3,3,2,2,1,0}, + {5,4,3,7,6,5,4,3,2,1,1,0}, + {1,1,7,6,5,4,3,2,1,1,0}, + {1,1,5,4,3,3,2,1,1,0}, + {1,1,1,3,3,2,2,1,0}, + {1,0,1,3,2,1,1,1,}, + {1,0,1,3,2,1,1,}, + {0,1,1,2,1,3}, + {0,1,1,1,1}, + {0,1,1,1}, + {0,1,1}, + {0,1}} + }; + int yuv = active_sps->chroma_format_idc - 1; + + vlcnum = sym->value1; + + lt = &lentab[yuv][vlcnum][0]; + ct = &codtab[yuv][vlcnum][0]; + + retval = code_from_bitstream_2d(sym, dP, lt, ct, 16, 1, &code); + + if (retval) + { + printf("ERROR: failed to find Total Zeros\n"); + exit(-1); + } + + + #if TRACE + tracebits2(sym->tracestring, sym->len, code); + + #endif + + return retval; + } + + + /*! + ************************************************************************ + * \brief + * read Run codeword from UVLC-partition + ************************************************************************ + */ + int readSyntaxElement_Run(SyntaxElement *sym, DataPartition *dP) + { + int vlcnum, retval; + int code, *ct, *lt; + + int lentab[TOTRUN_NUM][16] = + { + {1,1}, + {1,2,2}, + {2,2,2,2}, + {2,2,2,3,3}, + {2,2,3,3,3,3}, + {2,3,3,3,3,3,3}, + {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11}, + }; + + int codtab[TOTRUN_NUM][16] = + { + {1,0}, + {1,1,0}, + {3,2,1,0}, + {3,2,1,1,0}, + {3,2,3,2,1,0}, + {3,0,1,3,2,5,4}, + {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1}, + }; + + vlcnum = sym->value1; + + lt = &lentab[vlcnum][0]; + ct = &codtab[vlcnum][0]; + + retval = code_from_bitstream_2d(sym, dP, lt, ct, 16, 1, &code); + + if (retval) + { + printf("ERROR: failed to find Run\n"); + exit(-1); + } + + + #if TRACE + tracebits2(sym->tracestring, sym->len, code); + #endif + + return retval; + } + + + /*! + ************************************************************************ + * \brief + * Reads bits from the bitstream buffer + * + * \param buffer + * containing VLC-coded data bits + * \param totbitoffset + * bit offset from start of partition + * \param info + * returns value of the read bits + * \param bytecount + * total bytes in bitstream + * \param numbits + * number of bits to read + * + ************************************************************************ + */ + int GetBits (byte buffer[],int totbitoffset,int *info, int bytecount, + int numbits) + { + + register int inf; + long byteoffset; // byte from start of buffer + int bitoffset; // bit from start of byte + + int bitcounter=numbits; + + byteoffset= totbitoffset/8; + bitoffset= 7-(totbitoffset%8); + + inf=0; + while (numbits) + { + inf <<=1; + inf |= (buffer[byteoffset] & (0x01<>bitoffset; + numbits--; + bitoffset--; + if (bitoffset < 0) + { + byteoffset++; + bitoffset += 8; + if (byteoffset > bytecount) + { + return -1; + } + } + } + + *info = inf; + return bitcounter; // return absolute offset in bit from start of frame + } + + /*! + ************************************************************************ + * \brief + * Reads bits from the bitstream buffer + * + * \param buffer + * buffer containing VLC-coded data bits + * \param totbitoffset + * bit offset from start of partition + * \param bytecount + * total bytes in bitstream + * \param numbits + * number of bits to read + * + ************************************************************************ + */ + + int ShowBits (byte buffer[],int totbitoffset,int bytecount, int numbits) + { + + register int inf; + long byteoffset; // byte from start of buffer + int bitoffset; // bit from start of byte + + byteoffset= totbitoffset/8; + bitoffset= 7-(totbitoffset%8); + + inf=0; + while (numbits) + { + inf <<=1; + inf |= (buffer[byteoffset] & (0x01<>bitoffset; + numbits--; + bitoffset--; + if (bitoffset < 0) + { + byteoffset++; + bitoffset += 8; + if (byteoffset > bytecount) + { + return -1; + } + } + } + + return inf; // return absolute offset in bit from start of frame + } + + + /*! + ************************************************************************ + * \brief + * peek at the next 2 UVLC codeword from UVLC-partition to determine + * if a skipped MB is field/frame + ************************************************************************ + */ + int peekSyntaxElement_UVLC(SyntaxElement *sym, struct img_par *img, struct inp_par *inp, struct datapartition *dP) + { + Bitstream *currStream = dP->bitstream; + int frame_bitoffset = currStream->frame_bitoffset; + byte *buf = currStream->streamBuffer; + int BitstreamLengthInBytes = currStream->bitstream_length; + + + sym->len = GetVLCSymbol (buf, frame_bitoffset, &(sym->inf), BitstreamLengthInBytes); + if (sym->len == -1) + return -1; + frame_bitoffset += sym->len; + sym->mapping(sym->len,sym->inf,&(sym->value1),&(sym->value2)); + + + #if TRACE + tracebits(sym->tracestring, sym->len, sym->inf, sym->value1); + #endif + + return 1; + } + + Index: llvm-test/MultiSource/Applications/JM/ldecod/vlc.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/ldecod/vlc.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/ldecod/vlc.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,56 ---- + + /*! + ************************************************************************ + * \file vlc.h + * + * \brief + * header for (CA)VLC coding functions + * + * \author + * Karsten Suehring + * + ************************************************************************ + */ + + #ifndef _VLC_H_ + #define _VLC_H_ + + int se_v (char *tracestring, Bitstream *bitstream); + int ue_v (char *tracestring, Bitstream *bitstream); + int u_1 (char *tracestring, Bitstream *bitstream); + int u_v (int LenInBits, char *tracestring, Bitstream *bitstream); + + // UVLC mapping + void linfo_ue(int len, int info, int *value1, int *dummy); + void linfo_se(int len, int info, int *value1, int *dummy); + + void linfo_cbp_intra(int len,int info,int *cbp, int *dummy); + void linfo_cbp_inter(int len,int info,int *cbp, int *dummy); + void linfo_levrun_inter(int len,int info,int *level,int *irun); + void linfo_levrun_c2x2(int len,int info,int *level,int *irun); + + int readSyntaxElement_VLC (SyntaxElement *sym, Bitstream *currStream); + int readSyntaxElement_UVLC(SyntaxElement *sym, struct img_par *img, struct inp_par *inp, struct datapartition *dp); + int readSyntaxElement_Intra4x4PredictionMode(SyntaxElement *sym, struct img_par *img, struct inp_par *inp, struct datapartition *dp); + + int GetVLCSymbol (byte buffer[],int totbitoffset,int *info, int bytecount); + int GetVLCSymbol_IntraMode (byte buffer[],int totbitoffset,int *info, int bytecount); + + int readSyntaxElement_FLC(SyntaxElement *sym, Bitstream *currStream); + int readSyntaxElement_NumCoeffTrailingOnes(SyntaxElement *sym, DataPartition *dP, + char *type); + int readSyntaxElement_NumCoeffTrailingOnesChromaDC(SyntaxElement *sym, DataPartition *dP); + int readSyntaxElement_Level_VLC0(SyntaxElement *sym, struct datapartition *dP); + int readSyntaxElement_Level_VLCN(SyntaxElement *sym, int vlc, struct datapartition *dP); + int readSyntaxElement_TotalZeros(SyntaxElement *sym, DataPartition *dP); + int readSyntaxElement_TotalZerosChromaDC(SyntaxElement *sym, DataPartition *dP); + int readSyntaxElement_Run(SyntaxElement *sym, DataPartition *dP); + int GetBits (byte buffer[],int totbitoffset,int *info, int bytecount, + int numbits); + int ShowBits (byte buffer[],int totbitoffset,int bytecount, int numbits); + + int more_rbsp_data (byte buffer[],int totbitoffset,int bytecount); + + + #endif + From evan.cheng at apple.com Sat Feb 11 04:33:43 2006 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 11 Feb 2006 04:33:43 -0600 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/JM/lencod/Makefile annexb.c annexb.h biariencode.c biariencode.h block.c block.h cabac.c cabac.h configfile.c configfile.h context_ini.c context_ini.h contributors.h ctx_tables.h decoder.c defines.h elements.h epzs.c epzs.h explicit_gop.c explicit_gop.h fast_me.c fast_me.h filehandle.c fmo.c fmo.h global.h header.c header.h image.c image.h intrarefresh.c intrarefresh.h leaky_bucket.c leaky_bucket.h lencod.c loopFilter.c macroblock.c macroblock.h mb_access.c mb_access.h mbuffer.c mbuffer.h memalloc.c memalloc.h minmax.h mode_decision.c mode_decision.h mv-search.c mv-search.h nal.c nalu.c nalu.h nalucommon.c nalucommon.h output.c output.h parset.c parset.h parsetcommon.c parsetcommon.h q_matrix.c q_matrix.h q_offsets.c q_offsets.h ratectl.c ratectl.h rdopt.c rdopt_coding_state.c rdopt_coding_state.h rdpicdecision.c refbuf.c refbuf.h rtp.c rtp.h sei.c sei.h simplified_fast_me.c simplified_fast_me.h slice.c transform8x8.c transfo! rm8x8.h vlc.c vlc.h weighted_prediction.c Message-ID: <200602111033.EAA24569@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/JM/lencod: Makefile added (r1.1) annexb.c added (r1.1) annexb.h added (r1.1) biariencode.c added (r1.1) biariencode.h added (r1.1) block.c added (r1.1) block.h added (r1.1) cabac.c added (r1.1) cabac.h added (r1.1) configfile.c added (r1.1) configfile.h added (r1.1) context_ini.c added (r1.1) context_ini.h added (r1.1) contributors.h added (r1.1) ctx_tables.h added (r1.1) decoder.c added (r1.1) defines.h added (r1.1) elements.h added (r1.1) epzs.c added (r1.1) epzs.h added (r1.1) explicit_gop.c added (r1.1) explicit_gop.h added (r1.1) fast_me.c added (r1.1) fast_me.h added (r1.1) filehandle.c added (r1.1) fmo.c added (r1.1) fmo.h added (r1.1) global.h added (r1.1) header.c added (r1.1) header.h added (r1.1) image.c added (r1.1) image.h added (r1.1) intrarefresh.c added (r1.1) intrarefresh.h added (r1.1) leaky_bucket.c added (r1.1) leaky_bucket.h added (r1.1) lencod.c added (r1.1) loopFilter.c added (r1.1) macroblock.c added (r1.1) macroblock.h added (r1.1) mb_access.c added (r1.1) mb_access.h added (r1.1) mbuffer.c added (r1.1) mbuffer.h added (r1.1) memalloc.c added (r1.1) memalloc.h added (r1.1) minmax.h added (r1.1) mode_decision.c added (r1.1) mode_decision.h added (r1.1) mv-search.c added (r1.1) mv-search.h added (r1.1) nal.c added (r1.1) nalu.c added (r1.1) nalu.h added (r1.1) nalucommon.c added (r1.1) nalucommon.h added (r1.1) output.c added (r1.1) output.h added (r1.1) parset.c added (r1.1) parset.h added (r1.1) parsetcommon.c added (r1.1) parsetcommon.h added (r1.1) q_matrix.c added (r1.1) q_matrix.h added (r1.1) q_offsets.c added (r1.1) q_offsets.h added (r1.1) ratectl.c added (r1.1) ratectl.h added (r1.1) rdopt.c added (r1.1) rdopt_coding_state.c added (r1.1) rdopt_coding_state.h added (r1.1) rdpicdecision.c added (r1.1) refbuf.c added (r1.1) refbuf.h added (r1.1) rtp.c added (r1.1) rtp.h added (r1.1) sei.c added (r1.1) sei.h added (r1.1) simplified_fast_me.c added (r1.1) simplified_fast_me.h added (r1.1) slice.c added (r1.1) transform8x8.c added (r1.1) transform8x8.h added (r1.1) vlc.c added (r1.1) vlc.h added (r1.1) weighted_prediction.c added (r1.1) --- Log message: Added H.264 reference encoder / decoder from ITU to LLVM test suite. --- Diffs of the changes: (+55883 -0) Makefile | 8 annexb.c | 117 + annexb.h | 25 biariencode.c | 342 +++ biariencode.h | 138 + block.c | 2800 +++++++++++++++++++++++++++++++ block.h | 179 ++ cabac.c | 1503 ++++++++++++++++ cabac.h | 65 configfile.c | 1162 +++++++++++++ configfile.h | 271 +++ context_ini.c | 365 ++++ context_ini.h | 32 contributors.h | 212 ++ ctx_tables.h | 729 ++++++++ decoder.c | 655 +++++++ defines.h | 207 ++ elements.h | 109 + epzs.c | 2321 ++++++++++++++++++++++++++ epzs.h | 71 explicit_gop.c | 472 +++++ explicit_gop.h | 25 fast_me.c | 914 ++++++++++ fast_me.h | 168 + filehandle.c | 140 + fmo.c | 730 ++++++++ fmo.h | 39 global.h | 1430 ++++++++++++++++ header.c | 564 ++++++ header.h | 22 image.c | 2640 +++++++++++++++++++++++++++++ image.h | 34 intrarefresh.c | 136 + intrarefresh.h | 26 leaky_bucket.c | 296 +++ leaky_bucket.h | 29 lencod.c | 2421 +++++++++++++++++++++++++++ loopFilter.c | 482 +++++ macroblock.c | 4435 ++++++++++++++++++++++++++++++++++++++++++++++++++ macroblock.h | 112 + mb_access.c | 683 +++++++ mb_access.h | 30 mbuffer.c | 3865 +++++++++++++++++++++++++++++++++++++++++++ mbuffer.h | 193 ++ memalloc.c | 763 ++++++++ memalloc.h | 63 minmax.h | 19 mode_decision.c | 1844 ++++++++++++++++++++ mode_decision.h | 87 mv-search.c | 3873 +++++++++++++++++++++++++++++++++++++++++++ mv-search.h | 76 nal.c | 147 + nalu.c | 78 nalu.h | 28 nalucommon.c | 72 nalucommon.h | 55 output.c | 468 +++++ output.h | 24 parset.c | 908 ++++++++++ parset.h | 45 parsetcommon.c | 100 + parsetcommon.h | 194 ++ q_matrix.c | 633 +++++++ q_matrix.h | 40 q_offsets.c | 550 ++++++ q_offsets.h | 28 ratectl.c | 1803 ++++++++++++++++++++ ratectl.h | 146 + rdopt.c | 3568 ++++++++++++++++++++++++++++++++++++++++ rdopt_coding_state.c | 203 ++ rdopt_coding_state.h | 53 rdpicdecision.c | 64 refbuf.c | 167 + refbuf.h | 28 rtp.c | 613 ++++++ rtp.h | 72 sei.c | 1644 ++++++++++++++++++ sei.h | 267 +++ simplified_fast_me.c | 825 +++++++++ simplified_fast_me.h | 90 + slice.c | 1134 ++++++++++++ transform8x8.c | 1801 ++++++++++++++++++++ transform8x8.h | 32 vlc.c | 1265 ++++++++++++++ vlc.h | 52 weighted_prediction.c | 764 ++++++++ 86 files changed, 55883 insertions(+) Index: llvm-test/MultiSource/Applications/JM/lencod/Makefile diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/Makefile:1.1 *** /dev/null Sat Feb 11 04:33:32 2006 --- llvm-test/MultiSource/Applications/JM/lencod/Makefile Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,8 ---- + LEVEL = ../../../.. + PROG = lencod + CPPFLAGS = -D __USE_LARGEFILE64 -D _FILE_OFFSET_BITS=64 + LDFLAGS = -lm $(TOOLLINKOPTS) + + RUN_OPTIONS = -d $(PROJ_SRC_DIR)/data/encoder.cfg -p InputFile=$(PROJ_SRC_DIR)/data/foreman_part_qcif.yuv + + include ../../../Makefile.multisrc Index: llvm-test/MultiSource/Applications/JM/lencod/annexb.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/annexb.c:1.1 *** /dev/null Sat Feb 11 04:33:41 2006 --- llvm-test/MultiSource/Applications/JM/lencod/annexb.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,117 ---- + + /*! + ************************************************************************************* + * \file annexb.c + * + * \brief + * Annex B Byte Stream format NAL Unit writing routines + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + ************************************************************************************* + */ + + #include + #include + + #include "global.h" + #include "nalucommon.h" + + static FILE *f = NULL; // the output file + + + /*! + ******************************************************************************************** + * \brief + * Writes a NALU to the Annex B Byte Stream + * + * \return + * number of bits written + * + ******************************************************************************************** + */ + int WriteAnnexbNALU (NALU_t *n) + { + int BitsWritten = 0; + + assert (n != NULL); + assert (n->forbidden_bit == 0); + assert (f != NULL); + assert (n->startcodeprefix_len == 3 || n->startcodeprefix_len == 4); + + // printf ("WriteAnnexbNALU: writing %d bytes w/ startcode_len %d\n", n->len+1, n->startcodeprefix_len); + if (n->startcodeprefix_len > 3) + { + putc (0, f); + BitsWritten =+ 8; + } + putc (0, f); + putc (0, f); + putc (1, f); + BitsWritten += 24; + + n->buf[0] = + n->forbidden_bit << 7 | + n->nal_reference_idc << 5 | + n->nal_unit_type; + + // printf ("First Byte %x, nal_ref_idc %x, nal_unit_type %d\n", n->buf[0], n->nal_reference_idc, n->nal_unit_type); + + if (n->len != fwrite (n->buf, 1, n->len, f)) + { + printf ("Fatal: cannot write %d bytes to bitstream file, exit (-1)\n", n->len); + exit (-1); + } + BitsWritten += n->len * 8; + + fflush (f); + #if TRACE + fprintf (p_trace, "\n\nAnnex B NALU w/ %s startcode, len %d, forbidden_bit %d, nal_reference_idc %d, nal_unit_type %d\n\n", + n->startcodeprefix_len == 4?"long":"short", n->len, n->forbidden_bit, n->nal_reference_idc, n->nal_unit_type); + fflush (p_trace); + #endif + return BitsWritten; + } + + + /*! + ******************************************************************************************** + * \brief + * Opens the output file for the bytestream + * + * \param Filename + * The filename of the file to be opened + * + * \return + * none. Function terminates the program in case of an error + * + ******************************************************************************************** + */ + void OpenAnnexbFile (char *Filename) + { + if ((f = fopen (Filename, "wb")) == NULL) + { + printf ("Fatal: cannot open Annex B bytestream file '%s', exit (-1)\n", Filename); + exit (-1); + } + } + + + /*! + ******************************************************************************************** + * \brief + * Closes the output bit stream file + * + * \return + * none. Funtion trerminates the program in case of an error + ******************************************************************************************** + */ + void CloseAnnexbFile() { + if (fclose (f)) + { + printf ("Fatal: cannot close Annex B bytestream file, exit (-1)\n"); + exit (-1); + } + } + Index: llvm-test/MultiSource/Applications/JM/lencod/annexb.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/annexb.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/annexb.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,25 ---- + + /*! + ************************************************************************************** + * \file + * annexb.h + * \brief + * Byte stream operations support + * This code reflects JVT version xxx + * \date 7 December 2002 + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + *************************************************************************************** + */ + + #ifndef _ANNEXB_H_ + #define _ANNEXB_H_ + + #include "nalucommon.h" + + int WriteAnnexbNALU (NALU_t *n); + void CloseAnnexbFile(); + void OpenAnnexbFile (char *Filename); + + #endif //_ANNEXB_H_ Index: llvm-test/MultiSource/Applications/JM/lencod/biariencode.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/biariencode.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/biariencode.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,342 ---- + + /*! + ************************************************************************************* + * \file biariencode.c + * + * \brief + * Routines for binary arithmetic encoding + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Gabi Blaettermann + ************************************************************************************* + */ + + #include + #include + + #include "global.h" + #include "biariencode.h" + + int binCount = 0; + + /*! + ************************************************************************ + * Macro for writing bytes of code + *********************************************************************** + */ + + #define put_byte() { \ + Ecodestrm[(*Ecodestrm_len)++] = Ebuffer; \ + Ebits_to_go = 8; \ + while (eep->C > 7) { \ + eep->C-=8; \ + eep->E++; \ + } \ + } + + #define put_one_bit(b) { \ + Ebuffer <<= 1; Ebuffer |= (b); \ + if (--Ebits_to_go == 0) \ + put_byte(); \ + } + + #define put_one_bit_plus_outstanding(b) { \ + put_one_bit(b); \ + while (Ebits_to_follow > 0) \ + { \ + Ebits_to_follow--; \ + put_one_bit(!(b)); \ + } \ + } + + int pic_bin_count; + + void reset_pic_bin_count() + { + pic_bin_count = 0; + } + + int get_pic_bin_count() + { + return pic_bin_count; + } + + + + /*! + ************************************************************************ + * \brief + * Allocates memory for the EncodingEnvironment struct + ************************************************************************ + */ + EncodingEnvironmentPtr arienco_create_encoding_environment() + { + EncodingEnvironmentPtr eep; + + if ( (eep = (EncodingEnvironmentPtr) calloc(1,sizeof(EncodingEnvironment))) == NULL) + no_mem_exit("arienco_create_encoding_environment: eep"); + + return eep; + } + + + + /*! + ************************************************************************ + * \brief + * Frees memory of the EncodingEnvironment struct + ************************************************************************ + */ + void arienco_delete_encoding_environment(EncodingEnvironmentPtr eep) + { + if (eep == NULL) + { + snprintf(errortext, ET_SIZE, "Error freeing eep (NULL pointer)"); + error (errortext, 200); + } + else + free(eep); + } + + + + /*! + ************************************************************************ + * \brief + * Initializes the EncodingEnvironment for the arithmetic coder + ************************************************************************ + */ + void arienco_start_encoding(EncodingEnvironmentPtr eep, + unsigned char *code_buffer, + int *code_len ) + { + Elow = 0; + Ebits_to_follow = 0; + Ebuffer = 0; + Ebits_to_go = 9; // to swallow first redundant bit + + Ecodestrm = code_buffer; + Ecodestrm_len = code_len; + + Erange = HALF-2; + + eep->C = 0; + eep->E = 0; + + } + + /*! + ************************************************************************ + * \brief + * Returns the number of currently written bits + ************************************************************************ + */ + int arienco_bits_written(EncodingEnvironmentPtr eep) + { + return (8 * (*Ecodestrm_len) + Ebits_to_follow + 8 - Ebits_to_go); + } + + + /*! + ************************************************************************ + * \brief + * Terminates the arithmetic codeword, writes stop bit and stuffing bytes (if any) + ************************************************************************ + */ + void arienco_done_encoding(EncodingEnvironmentPtr eep) + { + put_one_bit_plus_outstanding((Elow >> (B_BITS-1)) & 1); + put_one_bit((Elow >> (B_BITS-2))&1); + put_one_bit(1); + + stats->bit_use_stuffingBits[img->type]+=(8-Ebits_to_go); + + while (Ebits_to_go != 8) + put_one_bit(0); + + pic_bin_count += eep->E*8 + eep->C; // no of processed bins + } + + extern int cabac_encoding; + + /*! + ************************************************************************ + * \brief + * Actually arithmetic encoding of one binary symbol by using + * the probability estimate of its associated context model + ************************************************************************ + */ + void biari_encode_symbol(EncodingEnvironmentPtr eep, signed short symbol, BiContextTypePtr bi_ct ) + { + register unsigned int range = Erange; + register unsigned int low = Elow; + unsigned int rLPS = rLPS_table_64x4[bi_ct->state][(range>>6) & 3]; + + #if (2==TRACE) + if (cabac_encoding) + fprintf(p_trace, "%d 0x%04x %d %d\n", binCount++, Erange , bi_ct->state, bi_ct->MPS ); + #endif + + range -= rLPS; + bi_ct->count += cabac_encoding; + + /* covers all cases where code does not bother to shift down symbol to be + * either 0 or 1, e.g. in some cases for cbp, mb_Type etc the code simply + * masks off the bit position and passes in the resulting value */ + symbol = (symbol != 0); + + if (symbol != bi_ct->MPS) + { + low += range; + range = rLPS; + + if (!bi_ct->state) + bi_ct->MPS = bi_ct->MPS ^ 1; // switch LPS if necessary + bi_ct->state = AC_next_state_LPS_64[bi_ct->state]; // next state + } + else + bi_ct->state = AC_next_state_MPS_64[bi_ct->state]; // next state + + /* renormalisation */ + while (range < QUARTER) + { + if (low >= HALF) + { + put_one_bit_plus_outstanding(1); + low -= HALF; + } + else if (low < QUARTER) + { + put_one_bit_plus_outstanding(0); + } + else + { + Ebits_to_follow++; + low -= QUARTER; + } + low <<= 1; + range <<= 1; + } + Erange = range; + Elow = low; + eep->C++; + } + + /*! + ************************************************************************ + * \brief + * Arithmetic encoding of one binary symbol assuming + * a fixed prob. distribution with p(symbol) = 0.5 + ************************************************************************ + */ + void biari_encode_symbol_eq_prob(EncodingEnvironmentPtr eep, signed short symbol) + { + register unsigned int low = (Elow<<1); + + #if (2==TRACE) + extern int cabac_encoding; + if (cabac_encoding) + fprintf(p_trace, "%d 0x%04x\n", binCount++, Erange ); + #endif + + if (symbol != 0) + low += Erange; + + /* renormalisation as for biari_encode_symbol; + note that low has already been doubled */ + if (low >= ONE) + { + put_one_bit_plus_outstanding(1); + low -= ONE; + } + else + if (low < HALF) + { + put_one_bit_plus_outstanding(0); + } + else + { + Ebits_to_follow++; + low -= HALF; + } + Elow = low; + eep->C++; + } + + /*! + ************************************************************************ + * \brief + * Arithmetic encoding for last symbol before termination + ************************************************************************ + */ + void biari_encode_symbol_final(EncodingEnvironmentPtr eep, signed short symbol) + { + register unsigned int range = Erange-2; + register unsigned int low = Elow; + + #if (2==TRACE) + extern int cabac_encoding; + if (cabac_encoding) + fprintf(p_trace, "%d 0x%04x\n", binCount++, Erange); + #endif + + if (symbol) { + low += range; + range = 2; + } + + while (range < QUARTER) + { + if (low >= HALF) + { + put_one_bit_plus_outstanding(1); + low -= HALF; + } + else + if (low < QUARTER) + { + put_one_bit_plus_outstanding(0); + } + else + { + Ebits_to_follow++; + low -= QUARTER; + } + low <<= 1; + range <<= 1; + } + Erange = range; + Elow = low; + eep->C++; + } + + + /*! + ************************************************************************ + * \brief + * Initializes a given context with some pre-defined probability state + ************************************************************************ + */ + void biari_init_context (BiContextTypePtr ctx, const int* ini) + { + int pstate; + + pstate = ((ini[0]* max(0, img->qp)) >> 4) + ini[1]; + pstate = min (max ( 1, pstate), 126); + + if ( pstate >= 64 ) + { + ctx->state = pstate - 64; + ctx->MPS = 1; + } + else + { + ctx->state = 63 - pstate; + ctx->MPS = 0; + } + + ctx->count = 0; + } + Index: llvm-test/MultiSource/Applications/JM/lencod/biariencode.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/biariencode.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/biariencode.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,138 ---- + + /*! + *************************************************************************** + * \file + * biariencode.h + * + * \brief + * Headerfile for binary arithmetic encoding routines + * + * \author + * Detlev Marpe, + * Gabi Blaettermann + * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved. + * + * \date + * 21. Oct 2000 + ************************************************************************** + */ + + + #ifndef _BIARIENCOD_H_ + #define _BIARIENCOD_H_ + + + /************************************************************************ + * D e f i n i t i o n s + *********************************************************************** + */ + + // some definitions to increase the readability of the source code + + #define Elow (eep->Elow) + #define Erange (eep->Erange) + #define Ebits_to_follow (eep->Ebits_to_follow) + #define Ebuffer (eep->Ebuffer) + #define Ebits_to_go (eep->Ebits_to_go) + #define Ecodestrm (eep->Ecodestrm) + #define Ecodestrm_len (eep->Ecodestrm_len) + #define Ecodestrm_laststartcode (eep->Ecodestrm_laststartcode) + #define B_BITS 10 // Number of bits to represent the whole coding interval + #define ONE (1 << B_BITS) + #define HALF (1 << (B_BITS-1)) + #define QUARTER (1 << (B_BITS-2)) + + /* Range table for LPS */ + const byte rLPS_table_64x4[64][4]= + { + { 128, 176, 208, 240}, + { 128, 167, 197, 227}, + { 128, 158, 187, 216}, + { 123, 150, 178, 205}, + { 116, 142, 169, 195}, + { 111, 135, 160, 185}, + { 105, 128, 152, 175}, + { 100, 122, 144, 166}, + { 95, 116, 137, 158}, + { 90, 110, 130, 150}, + { 85, 104, 123, 142}, + { 81, 99, 117, 135}, + { 77, 94, 111, 128}, + { 73, 89, 105, 122}, + { 69, 85, 100, 116}, + { 66, 80, 95, 110}, + { 62, 76, 90, 104}, + { 59, 72, 86, 99}, + { 56, 69, 81, 94}, + { 53, 65, 77, 89}, + { 51, 62, 73, 85}, + { 48, 59, 69, 80}, + { 46, 56, 66, 76}, + { 43, 53, 63, 72}, + { 41, 50, 59, 69}, + { 39, 48, 56, 65}, + { 37, 45, 54, 62}, + { 35, 43, 51, 59}, + { 33, 41, 48, 56}, + { 32, 39, 46, 53}, + { 30, 37, 43, 50}, + { 29, 35, 41, 48}, + { 27, 33, 39, 45}, + { 26, 31, 37, 43}, + { 24, 30, 35, 41}, + { 23, 28, 33, 39}, + { 22, 27, 32, 37}, + { 21, 26, 30, 35}, + { 20, 24, 29, 33}, + { 19, 23, 27, 31}, + { 18, 22, 26, 30}, + { 17, 21, 25, 28}, + { 16, 20, 23, 27}, + { 15, 19, 22, 25}, + { 14, 18, 21, 24}, + { 14, 17, 20, 23}, + { 13, 16, 19, 22}, + { 12, 15, 18, 21}, + { 12, 14, 17, 20}, + { 11, 14, 16, 19}, + { 11, 13, 15, 18}, + { 10, 12, 15, 17}, + { 10, 12, 14, 16}, + { 9, 11, 13, 15}, + { 9, 11, 12, 14}, + { 8, 10, 12, 14}, + { 8, 9, 11, 13}, + { 7, 9, 11, 12}, + { 7, 9, 10, 12}, + { 7, 8, 10, 11}, + { 6, 8, 9, 11}, + { 6, 7, 9, 10}, + { 6, 7, 8, 9}, + { 2, 2, 2, 2} + }; + + const unsigned short AC_next_state_MPS_64[64] = + { + 1,2,3,4,5,6,7,8,9,10, + 11,12,13,14,15,16,17,18,19,20, + 21,22,23,24,25,26,27,28,29,30, + 31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50, + 51,52,53,54,55,56,57,58,59,60, + 61,62,62,63 + }; + + const unsigned short AC_next_state_LPS_64[64] = + { + 0, 0, 1, 2, 2, 4, 4, 5, 6, 7, + 8, 9, 9,11,11,12,13,13,15,15, + 16,16,18,18,19,19,21,21,22,22, + 23,24,24,25,26,26,27,27,28,29, + 29,30,30,30,31,32,32,33,33,33, + 34,34,35,35,35,36,36,36,37,37, + 37,38,38,63 + }; + + + #endif // BIARIENCOD_H + Index: llvm-test/MultiSource/Applications/JM/lencod/block.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/block.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/block.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,2800 ---- + + /*! + ************************************************************************************* + * \file block.c + * + * \brief + * Process one block + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Inge Lille-Langoy + * - Rickard Sjoberg + * - Stephan Wenger + * - Jani Lainema + * - Detlev Marpe + * - Thomas Wedi + * - Ragip Kurceren + * - Greg Conklin + ************************************************************************************* + */ + + #include "contributors.h" + + + #include + #include + #include + #include + #include + + #include "global.h" + + #include "image.h" + #include "mb_access.h" + #include "block.h" + #include "vlc.h" + + + const int quant_coef[6][4][4] = { + {{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243},{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243}}, + {{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660},{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660}}, + {{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194},{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194}}, + {{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647},{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647}}, + {{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355},{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355}}, + {{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893},{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893}} + }; + + const int dequant_coef[6][4][4] = { + {{10, 13, 10, 13},{ 13, 16, 13, 16},{10, 13, 10, 13},{ 13, 16, 13, 16}}, + {{11, 14, 11, 14},{ 14, 18, 14, 18},{11, 14, 11, 14},{ 14, 18, 14, 18}}, + {{13, 16, 13, 16},{ 16, 20, 16, 20},{13, 16, 13, 16},{ 16, 20, 16, 20}}, + {{14, 18, 14, 18},{ 18, 23, 18, 23},{14, 18, 14, 18},{ 18, 23, 18, 23}}, + {{16, 20, 16, 20},{ 20, 25, 20, 25},{16, 20, 16, 20},{ 20, 25, 20, 25}}, + {{18, 23, 18, 23},{ 23, 29, 23, 29},{18, 23, 18, 23},{ 23, 29, 23, 29}} + }; + static const int A[4][4] = { + { 16, 20, 16, 20}, + { 20, 25, 20, 25}, + { 16, 20, 16, 20}, + { 20, 25, 20, 25} + }; + + + // Notation for comments regarding prediction and predictors. + // The pels of the 4x4 block are labelled a..p. The predictor pels above + // are labelled A..H, from the left I..P, and from above left X, as follows: + // + // X A B C D E F G H + // I a b c d + // J e f g h + // K i j k l + // L m n o p + // + + // Predictor array index definitions + #define P_X (PredPel[0]) + #define P_A (PredPel[1]) + #define P_B (PredPel[2]) + #define P_C (PredPel[3]) + #define P_D (PredPel[4]) + #define P_E (PredPel[5]) + #define P_F (PredPel[6]) + #define P_G (PredPel[7]) + #define P_H (PredPel[8]) + #define P_I (PredPel[9]) + #define P_J (PredPel[10]) + #define P_K (PredPel[11]) + #define P_L (PredPel[12]) + + /*! + ************************************************************************ + * \brief + * Make intra 4x4 prediction according to all 9 prediction modes. + * The routine uses left and upper neighbouring points from + * previous coded blocks to do this (if available). Notice that + * inaccessible neighbouring points are signalled with a negative + * value in the predmode array . + * + * \par Input: + * Starting point of current 4x4 block image posision + * + * \par Output: + * none + ************************************************************************ + */ + void intrapred_luma(int img_x,int img_y, int *left_available, int *up_available, int *all_available) + { + int i,j; + int s0; + int PredPel[13]; // array of predictor pels + imgpel **imgY = enc_picture->imgY; // For MB level frame/field coding tools -- set default to imgY + + int ioff = (img_x & 15); + int joff = (img_y & 15); + int mb_nr=img->current_mb_nr; + + PixelPos pix_a[4]; + PixelPos pix_b, pix_c, pix_d; + + int block_available_up; + int block_available_left; + int block_available_up_left; + int block_available_up_right; + + for (i=0;i<4;i++) + { + getNeighbour(mb_nr, ioff -1 , joff +i , 1, &pix_a[i]); + } + + getNeighbour(mb_nr, ioff , joff -1 , 1, &pix_b); + getNeighbour(mb_nr, ioff +4 , joff -1 , 1, &pix_c); + getNeighbour(mb_nr, ioff -1 , joff -1 , 1, &pix_d); + + pix_c.available = pix_c.available && !(((ioff==4)||(ioff==12)) && ((joff==4)||(joff==12))); + + if (input->UseConstrainedIntraPred) + { + for (i=0, block_available_left=1; i<4;i++) + block_available_left &= pix_a[i].available ? img->intra_block[pix_a[i].mb_addr]: 0; + block_available_up = pix_b.available ? img->intra_block [pix_b.mb_addr] : 0; + block_available_up_right = pix_c.available ? img->intra_block [pix_c.mb_addr] : 0; + block_available_up_left = pix_d.available ? img->intra_block [pix_d.mb_addr] : 0; + } + else + { + block_available_left = pix_a[0].available; + block_available_up = pix_b.available; + block_available_up_right = pix_c.available; + block_available_up_left = pix_d.available; + } + + *left_available = block_available_left; + *up_available = block_available_up; + *all_available = block_available_up && block_available_left && block_available_up_left; + + i = (img_x & 15); + j = (img_y & 15); + + // form predictor pels + if (block_available_up) + { + P_A = imgY[pix_b.pos_y][pix_b.pos_x+0]; + P_B = imgY[pix_b.pos_y][pix_b.pos_x+1]; + P_C = imgY[pix_b.pos_y][pix_b.pos_x+2]; + P_D = imgY[pix_b.pos_y][pix_b.pos_x+3]; + + } + else + { + P_A = P_B = P_C = P_D = img->dc_pred_value; + } + + if (block_available_up_right) + { + P_E = imgY[pix_c.pos_y][pix_c.pos_x+0]; + P_F = imgY[pix_c.pos_y][pix_c.pos_x+1]; + P_G = imgY[pix_c.pos_y][pix_c.pos_x+2]; + P_H = imgY[pix_c.pos_y][pix_c.pos_x+3]; + } + else + { + P_E = P_F = P_G = P_H = P_D; + } + + if (block_available_left) + { + P_I = imgY[pix_a[0].pos_y][pix_a[0].pos_x]; + P_J = imgY[pix_a[1].pos_y][pix_a[1].pos_x]; + P_K = imgY[pix_a[2].pos_y][pix_a[2].pos_x]; + P_L = imgY[pix_a[3].pos_y][pix_a[3].pos_x]; + } + else + { + P_I = P_J = P_K = P_L = img->dc_pred_value; + } + + if (block_available_up_left) + { + P_X = imgY[pix_d.pos_y][pix_d.pos_x]; + } + else + { + P_X = img->dc_pred_value; + } + + for(i=0;i<9;i++) + img->mprr[i][0][0]=-1; + + /////////////////////////////// + // make DC prediction + /////////////////////////////// + s0 = 0; + if (block_available_up && block_available_left) + { + // no edge + s0 = (P_A + P_B + P_C + P_D + P_I + P_J + P_K + P_L + 4) >> (BLOCK_SHIFT + 1); + } + else if (!block_available_up && block_available_left) + { + // upper edge + s0 = (P_I + P_J + P_K + P_L + 2) >> BLOCK_SHIFT;; + } + else if (block_available_up && !block_available_left) + { + // left edge + s0 = (P_A + P_B + P_C + P_D + 2) >> BLOCK_SHIFT; + } + else //if (!block_available_up && !block_available_left) + { + // top left corner, nothing to predict from + s0 = img->dc_pred_value; + } + + // store DC prediction + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < BLOCK_SIZE; i++) + img->mprr[DC_PRED][j][i] = s0; + } + + /////////////////////////////// + // make horiz and vert prediction + /////////////////////////////// + + for (i=0; i < BLOCK_SIZE; i++) + { + img->mprr[VERT_PRED][0][i] = + img->mprr[VERT_PRED][1][i] = + img->mprr[VERT_PRED][2][i] = + img->mprr[VERT_PRED][3][i] = (&P_A)[i]; + img->mprr[HOR_PRED][i][0] = + img->mprr[HOR_PRED][i][1] = + img->mprr[HOR_PRED][i][2] = + img->mprr[HOR_PRED][i][3] = (&P_I)[i]; + } + + if(!block_available_up) + img->mprr[VERT_PRED][0][0]=-1; + if(!block_available_left) + img->mprr[HOR_PRED][0][0]=-1; + + if (block_available_up) + { + // Mode DIAG_DOWN_LEFT_PRED + img->mprr[DIAG_DOWN_LEFT_PRED][0][0] = (P_A + P_C + 2*(P_B) + 2) >> 2; + img->mprr[DIAG_DOWN_LEFT_PRED][0][1] = + img->mprr[DIAG_DOWN_LEFT_PRED][1][0] = (P_B + P_D + 2*(P_C) + 2) >> 2; + img->mprr[DIAG_DOWN_LEFT_PRED][0][2] = + img->mprr[DIAG_DOWN_LEFT_PRED][1][1] = + img->mprr[DIAG_DOWN_LEFT_PRED][2][0] = (P_C + P_E + 2*(P_D) + 2) >> 2; + img->mprr[DIAG_DOWN_LEFT_PRED][0][3] = + img->mprr[DIAG_DOWN_LEFT_PRED][1][2] = + img->mprr[DIAG_DOWN_LEFT_PRED][2][1] = + img->mprr[DIAG_DOWN_LEFT_PRED][3][0] = (P_D + P_F + 2*(P_E) + 2) >> 2; + img->mprr[DIAG_DOWN_LEFT_PRED][1][3] = + img->mprr[DIAG_DOWN_LEFT_PRED][2][2] = + img->mprr[DIAG_DOWN_LEFT_PRED][3][1] = (P_E + P_G + 2*(P_F) + 2) >> 2; + img->mprr[DIAG_DOWN_LEFT_PRED][2][3] = + img->mprr[DIAG_DOWN_LEFT_PRED][3][2] = (P_F + P_H + 2*(P_G) + 2) >> 2; + img->mprr[DIAG_DOWN_LEFT_PRED][3][3] = (P_G + 3*(P_H) + 2) >> 2; + + // Mode VERT_LEFT_PRED + img->mprr[VERT_LEFT_PRED][0][0] = (P_A + P_B + 1) >> 1; + img->mprr[VERT_LEFT_PRED][0][1] = + img->mprr[VERT_LEFT_PRED][2][0] = (P_B + P_C + 1) >> 1; + img->mprr[VERT_LEFT_PRED][0][2] = + img->mprr[VERT_LEFT_PRED][2][1] = (P_C + P_D + 1) >> 1; + img->mprr[VERT_LEFT_PRED][0][3] = + img->mprr[VERT_LEFT_PRED][2][2] = (P_D + P_E + 1) >> 1; + img->mprr[VERT_LEFT_PRED][2][3] = (P_E + P_F + 1) >> 1; + img->mprr[VERT_LEFT_PRED][1][0] = (P_A + 2*P_B + P_C + 2) >> 2; + img->mprr[VERT_LEFT_PRED][1][1] = + img->mprr[VERT_LEFT_PRED][3][0] = (P_B + 2*P_C + P_D + 2) >> 2; + img->mprr[VERT_LEFT_PRED][1][2] = + img->mprr[VERT_LEFT_PRED][3][1] = (P_C + 2*P_D + P_E + 2) >> 2; + img->mprr[VERT_LEFT_PRED][1][3] = + img->mprr[VERT_LEFT_PRED][3][2] = (P_D + 2*P_E + P_F + 2) >> 2; + img->mprr[VERT_LEFT_PRED][3][3] = (P_E + 2*P_F + P_G + 2) >> 2; + + } + + /* Prediction according to 'diagonal' modes */ + if (block_available_left) + { + // Mode HOR_UP_PRED + img->mprr[HOR_UP_PRED][0][0] = (P_I + P_J + 1) >> 1; + img->mprr[HOR_UP_PRED][0][1] = (P_I + 2*P_J + P_K + 2) >> 2; + img->mprr[HOR_UP_PRED][0][2] = + img->mprr[HOR_UP_PRED][1][0] = (P_J + P_K + 1) >> 1; + img->mprr[HOR_UP_PRED][0][3] = + img->mprr[HOR_UP_PRED][1][1] = (P_J + 2*P_K + P_L + 2) >> 2; + img->mprr[HOR_UP_PRED][1][2] = + img->mprr[HOR_UP_PRED][2][0] = (P_K + P_L + 1) >> 1; + img->mprr[HOR_UP_PRED][1][3] = + img->mprr[HOR_UP_PRED][2][1] = (P_K + 2*P_L + P_L + 2) >> 2; + img->mprr[HOR_UP_PRED][3][0] = + img->mprr[HOR_UP_PRED][2][2] = + img->mprr[HOR_UP_PRED][2][3] = + img->mprr[HOR_UP_PRED][3][1] = + img->mprr[HOR_UP_PRED][3][2] = + img->mprr[HOR_UP_PRED][3][3] = P_L; + } + + /* Prediction according to 'diagonal' modes */ + if (block_available_up && block_available_left && block_available_up_left) + { + // Mode DIAG_DOWN_RIGHT_PRED + img->mprr[DIAG_DOWN_RIGHT_PRED][3][0] = (P_L + 2*P_K + P_J + 2) >> 2; + img->mprr[DIAG_DOWN_RIGHT_PRED][2][0] = + img->mprr[DIAG_DOWN_RIGHT_PRED][3][1] = (P_K + 2*P_J + P_I + 2) >> 2; + img->mprr[DIAG_DOWN_RIGHT_PRED][1][0] = + img->mprr[DIAG_DOWN_RIGHT_PRED][2][1] = + img->mprr[DIAG_DOWN_RIGHT_PRED][3][2] = (P_J + 2*P_I + P_X + 2) >> 2; + img->mprr[DIAG_DOWN_RIGHT_PRED][0][0] = + img->mprr[DIAG_DOWN_RIGHT_PRED][1][1] = + img->mprr[DIAG_DOWN_RIGHT_PRED][2][2] = + img->mprr[DIAG_DOWN_RIGHT_PRED][3][3] = (P_I + 2*P_X + P_A + 2) >> 2; + img->mprr[DIAG_DOWN_RIGHT_PRED][0][1] = + img->mprr[DIAG_DOWN_RIGHT_PRED][1][2] = + img->mprr[DIAG_DOWN_RIGHT_PRED][2][3] = (P_X + 2*P_A + P_B + 2) >> 2; + img->mprr[DIAG_DOWN_RIGHT_PRED][0][2] = + img->mprr[DIAG_DOWN_RIGHT_PRED][1][3] = (P_A + 2*P_B + P_C + 2) >> 2; + img->mprr[DIAG_DOWN_RIGHT_PRED][0][3] = (P_B + 2*P_C + P_D + 2) >> 2; + + // Mode VERT_RIGHT_PRED + img->mprr[VERT_RIGHT_PRED][0][0] = + img->mprr[VERT_RIGHT_PRED][2][1] = (P_X + P_A + 1) >> 1; + img->mprr[VERT_RIGHT_PRED][0][1] = + img->mprr[VERT_RIGHT_PRED][2][2] = (P_A + P_B + 1) >> 1; + img->mprr[VERT_RIGHT_PRED][0][2] = + img->mprr[VERT_RIGHT_PRED][2][3] = (P_B + P_C + 1) >> 1; + img->mprr[VERT_RIGHT_PRED][0][3] = (P_C + P_D + 1) >> 1; + img->mprr[VERT_RIGHT_PRED][1][0] = + img->mprr[VERT_RIGHT_PRED][3][1] = (P_I + 2*P_X + P_A + 2) >> 2; + img->mprr[VERT_RIGHT_PRED][1][1] = + img->mprr[VERT_RIGHT_PRED][3][2] = (P_X + 2*P_A + P_B + 2) >> 2; + img->mprr[VERT_RIGHT_PRED][1][2] = + img->mprr[VERT_RIGHT_PRED][3][3] = (P_A + 2*P_B + P_C + 2) >> 2; + img->mprr[VERT_RIGHT_PRED][1][3] = (P_B + 2*P_C + P_D + 2) >> 2; + img->mprr[VERT_RIGHT_PRED][2][0] = (P_X + 2*P_I + P_J + 2) >> 2; + img->mprr[VERT_RIGHT_PRED][3][0] = (P_I + 2*P_J + P_K + 2) >> 2; + + // Mode HOR_DOWN_PRED + img->mprr[HOR_DOWN_PRED][0][0] = + img->mprr[HOR_DOWN_PRED][1][2] = (P_X + P_I + 1) >> 1; + img->mprr[HOR_DOWN_PRED][0][1] = + img->mprr[HOR_DOWN_PRED][1][3] = (P_I + 2*P_X + P_A + 2) >> 2; + img->mprr[HOR_DOWN_PRED][0][2] = (P_X + 2*P_A + P_B + 2) >> 2; + img->mprr[HOR_DOWN_PRED][0][3] = (P_A + 2*P_B + P_C + 2) >> 2; + img->mprr[HOR_DOWN_PRED][1][0] = + img->mprr[HOR_DOWN_PRED][2][2] = (P_I + P_J + 1) >> 1; + img->mprr[HOR_DOWN_PRED][1][1] = + img->mprr[HOR_DOWN_PRED][2][3] = (P_X + 2*P_I + P_J + 2) >> 2; + img->mprr[HOR_DOWN_PRED][2][0] = + img->mprr[HOR_DOWN_PRED][3][2] = (P_J + P_K + 1) >> 1; + img->mprr[HOR_DOWN_PRED][2][1] = + img->mprr[HOR_DOWN_PRED][3][3] = (P_I + 2*P_J + P_K + 2) >> 2; + img->mprr[HOR_DOWN_PRED][3][0] = (P_K + P_L + 1) >> 1; + img->mprr[HOR_DOWN_PRED][3][1] = (P_J + 2*P_K + P_L + 2) >> 2; + } + } + + /*! + ************************************************************************ + * \brief + * 16x16 based luma prediction + * + * \par Input: + * Image parameters + * + * \par Output: + * none + ************************************************************************ + */ + void intrapred_luma_16x16() + { + int s0=0,s1,s2; + imgpel s[2][16]; + int i,j; + + int ih,iv; + int ib,ic,iaa; + + imgpel **imgY_pred = enc_picture->imgY; // For Mb level field/frame coding tools -- default to frame pred + int mb_nr = img->current_mb_nr; + + PixelPos up; //!< pixel position p(0,-1) + PixelPos left[17]; //!< pixel positions p(-1, -1..15) + + int up_avail, left_avail, left_up_avail; + + for (i=0;i<17;i++) + { + getNeighbour(mb_nr, -1, i-1, 1, &left[i]); + } + + getNeighbour(mb_nr, 0, -1, 1, &up); + + if (!(input->UseConstrainedIntraPred)) + { + up_avail = up.available; + left_avail = left[1].available; + left_up_avail = left[0].available; + } + else + { + up_avail = up.available ? img->intra_block[up.mb_addr] : 0; + for (i=1, left_avail=1; i<17;i++) + left_avail &= left[i].available ? img->intra_block[left[i].mb_addr]: 0; + left_up_avail = left[0].available ? img->intra_block[left[0].mb_addr]: 0; + } + + s1=s2=0; + // make DC prediction + if (up_avail) + { + for (i=0; i < MB_BLOCK_SIZE; i++) + s1 += imgY_pred[up.pos_y][up.pos_x+i]; // sum hor pix + } + + if (left_avail) + { + for (i=0; i < MB_BLOCK_SIZE; i++) + s2 += imgY_pred[left[i+1].pos_y][left[i+1].pos_x]; // sum vert pix + } + + if (up_avail && left_avail) + s0=(s1+s2+16)/(2*MB_BLOCK_SIZE); // no edge + + if (!up_avail && left_avail) + s0=(s2+8)/MB_BLOCK_SIZE; // upper edge + + if (up_avail && !left_avail) + s0=(s1+8)/MB_BLOCK_SIZE; // left edge + + if (!up_avail && !left_avail) + s0=img->dc_pred_value; // top left corner, nothing to predict from + + // vertical prediction + if (up_avail) + memcpy(s[0], &imgY_pred[up.pos_y][up.pos_x], MB_BLOCK_SIZE * sizeof(imgpel)); + + // horizontal prediction + if (left_avail) + { + for (i=0; i < MB_BLOCK_SIZE; i++) + s[1][i]=imgY_pred[left[i+1].pos_y][left[i+1].pos_x]; + } + + for (j=0; j < MB_BLOCK_SIZE; j++) + { + memcpy(img->mprr_2[VERT_PRED_16][j], s[0], MB_BLOCK_SIZE * sizeof(imgpel)); // store vertical prediction + for (i=0; i < MB_BLOCK_SIZE; i++) + { + img->mprr_2[HOR_PRED_16 ][j][i]=s[1][j]; // store horizontal prediction + img->mprr_2[DC_PRED_16 ][j][i]=s0; // store DC prediction + } + } + if (!up_avail || !left_avail || !left_up_avail) // edge + return; + + // 16 bit integer plan pred + + ih=0; + iv=0; + for (i=1;i<9;i++) + { + if (i<8) + ih += i*(imgY_pred[up.pos_y][up.pos_x+7+i] - imgY_pred[up.pos_y][up.pos_x+7-i]); + else + ih += i*(imgY_pred[up.pos_y][up.pos_x+7+i] - imgY_pred[left[0].pos_y][left[0].pos_x]); + + iv += i*(imgY_pred[left[8+i].pos_y][left[8+i].pos_x] - imgY_pred[left[8-i].pos_y][left[8-i].pos_x]); + } + ib=(5*ih+32)>>6; + ic=(5*iv+32)>>6; + + iaa=16*(imgY_pred[up.pos_y][up.pos_x+15]+imgY_pred[left[16].pos_y][left[16].pos_x]); + + for (j=0;j< MB_BLOCK_SIZE;j++) + { + for (i=0;i< MB_BLOCK_SIZE;i++) + { + img->mprr_2[PLANE_16][j][i]=max(0,min((int)img->max_imgpel_value,(iaa+(i-7)*ib +(j-7)*ic + 16)/32));// store plane prediction + } + } + } + + + /*! + ************************************************************************ + * \brief + * For new intra pred routines + * + * \par Input: + * Image par, 16x16 based intra mode + * + * \par Output: + * none + ************************************************************************ + */ + int dct_luma_16x16(int new_intra_mode) + { + //int qp_const; + int i,j; + int ii,jj; + int jdiv, jmod; + int M1[16][16]; + int M4[4][4]; + int M5[4],M6[4]; + int M0[4][4][4][4]; + int run,scan_pos,coeff_ctr,level; + int qp_per,qp_rem,q_bits; + int ac_coef = 0; + + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field)); + + int b8, b4; + int* DCLevel = img->cofDC[0][0]; + int* DCRun = img->cofDC[0][1]; + int* ACLevel; + int* ACRun; + int **levelscale,**leveloffset; + int **invlevelscale; + Boolean lossless_qpprime = ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + + qp_per = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6; + qp_rem = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6; + q_bits = Q_BITS+qp_per; + levelscale = LevelScale4x4Luma[1][qp_rem]; + leveloffset = LevelOffset4x4Luma[1][qp_per]; + invlevelscale = InvLevelScale4x4Luma[1][qp_rem]; + + + for (j=0;j<16;j++) + { + jdiv = j >> 2; + jmod = j & 0x03; + jj = img->opix_y+j; + for (i=0;i<16;i++) + { + // Residue Color Transform + if(!img->residue_transform_flag) + M1[j][i]=imgY_org[jj][img->opix_x+i]-img->mprr_2[new_intra_mode][j][i]; + else + M1[j][i]=img->m7[j][i]; + + M0[jdiv][i >> 2][jmod][i & 0x03]=M1[j][i]; + } + } + + for (jj=0;jj<4 && !lossless_qpprime;jj++) + { + for (ii=0;ii<4;ii++) + { + for (j=0;j<4;j++) + { + M5[0] = M0[jj][ii][j][0] + M0[jj][ii][j][3]; + M5[1] = M0[jj][ii][j][1] + M0[jj][ii][j][2]; + M5[2] = M0[jj][ii][j][1] - M0[jj][ii][j][2]; + M5[3] = M0[jj][ii][j][0] - M0[jj][ii][j][3]; + + M4[j][0] = M5[0] + M5[1]; + M4[j][2] = M5[0] - M5[1]; + M4[j][1] = M5[3]*2 + M5[2]; + M4[j][3] = M5[3] - M5[2]*2; + } + // vertical + for (i=0;i<4;i++) + { + M5[0] = M4[0][i] + M4[3][i]; + M5[1] = M4[1][i] + M4[2][i]; + M5[2] = M4[1][i] - M4[2][i]; + M5[3] = M4[0][i] - M4[3][i]; + + M0[jj][ii][0][i] = M5[0] + M5[1]; + M0[jj][ii][2][i] = M5[0] - M5[1]; + M0[jj][ii][1][i] = M5[3]*2 + M5[2]; + M0[jj][ii][3][i] = M5[3] - M5[2]*2; + } + } + } + + // pick out DC coeff + + for (j=0;j<4;j++) + { + for (i=0;i<4;i++) + M4[j][i]= M0[j][i][0][0]; + } + + if (!lossless_qpprime) + { + for (j=0;j<4;j++) + { + M5[0] = M4[j][0]+M4[j][3]; + M5[1] = M4[j][1]+M4[j][2]; + M5[2] = M4[j][1]-M4[j][2]; + M5[3] = M4[j][0]-M4[j][3]; + + M4[j][0] = M5[0]+M5[1]; + M4[j][2] = M5[0]-M5[1]; + M4[j][1] = M5[3]+M5[2]; + M4[j][3] = M5[3]-M5[2]; + } + + // vertical + + for (i=0;i<4;i++) + { + M5[0] = M4[0][i]+M4[3][i]; + M5[1] = M4[1][i]+M4[2][i]; + M5[2] = M4[1][i]-M4[2][i]; + M5[3] = M4[0][i]-M4[3][i]; + + M4[0][i]=(M5[0]+M5[1])>>1; + M4[2][i]=(M5[0]-M5[1])>>1; + M4[1][i]=(M5[3]+M5[2])>>1; + M4[3][i]=(M5[3]-M5[2])>>1; + } + } + // quant + + run=-1; + scan_pos=0; + + for (coeff_ctr=0;coeff_ctr<16;coeff_ctr++) + { + if (is_field_mode) + { // Alternate scan for field coding + i=FIELD_SCAN[coeff_ctr][0]; + j=FIELD_SCAN[coeff_ctr][1]; + } + else + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + } + + run++; + + if(lossless_qpprime) + level= absm(M4[j][i]); + else + level= (absm(M4[j][i]) * levelscale[0][0] + (leveloffset[0][0]<<1)) >> (q_bits+1); + + if (input->symbol_mode == UVLC && img->qp < 10) + { + if (level > CAVLC_LEVEL_LIMIT) + level = CAVLC_LEVEL_LIMIT; + } + + if (level != 0) + { + DCLevel[scan_pos] = sign(level,M4[j][i]); + DCRun [scan_pos] = run; + ++scan_pos; + run=-1; + } + if(!lossless_qpprime) + M4[j][i]=sign(level,M4[j][i]); + } + DCLevel[scan_pos]=0; + + // invers DC transform + for (j=0;j<4 && !lossless_qpprime;j++) + { + M6[0]=M4[j][0]+M4[j][2]; + M6[1]=M4[j][0]-M4[j][2]; + M6[2]=M4[j][1]-M4[j][3]; + M6[3]=M4[j][1]+M4[j][3]; + + M4[j][0] = M6[0]+M6[3]; + M4[j][1] = M6[1]+M6[2]; + M4[j][2] = M6[1]-M6[2]; + M4[j][3] = M6[0]-M6[3]; + } + + for (i=0;i<4 && !lossless_qpprime;i++) + { + + M6[0]=M4[0][i]+M4[2][i]; + M6[1]=M4[0][i]-M4[2][i]; + M6[2]=M4[1][i]-M4[3][i]; + M6[3]=M4[1][i]+M4[3][i]; + + if(qp_per<6) + { + M0[0][i][0][0] = ((M6[0]+M6[3])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per); + M0[1][i][0][0] = ((M6[1]+M6[2])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per); + M0[2][i][0][0] = ((M6[1]-M6[2])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per); + M0[3][i][0][0] = ((M6[0]-M6[3])*invlevelscale[0][0]+(1<<(5-qp_per)))>>(6-qp_per); + } + else + { + M0[0][i][0][0] = ((M6[0]+M6[3])*invlevelscale[0][0])<<(qp_per-6); + M0[1][i][0][0] = ((M6[1]+M6[2])*invlevelscale[0][0])<<(qp_per-6); + M0[2][i][0][0] = ((M6[1]-M6[2])*invlevelscale[0][0])<<(qp_per-6); + M0[3][i][0][0] = ((M6[0]-M6[3])*invlevelscale[0][0])<<(qp_per-6); + } + } + + // AC inverse trans/quant for MB + for (jj=0;jj<4;jj++) + { + for (ii=0;ii<4;ii++) + { + for (j=0;j<4;j++) + { + memcpy(M4[j],M0[jj][ii][j], BLOCK_SIZE * sizeof(int)); + } + + run = -1; + scan_pos = 0; + b8 = 2*(jj >> 1) + (ii >> 1); + b4 = 2*(jj & 0x01) + (ii & 0x01); + ACLevel = img->cofAC [b8][b4][0]; + ACRun = img->cofAC [b8][b4][1]; + + for (coeff_ctr=1;coeff_ctr<16;coeff_ctr++) // set in AC coeff + { + + if (is_field_mode) + { // Alternate scan for field coding + i=FIELD_SCAN[coeff_ctr][0]; + j=FIELD_SCAN[coeff_ctr][1]; + } + else + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + } + run++; + + if(lossless_qpprime) + level= absm( M4[j][i]); + else + level= ( absm( M4[j][i]) * levelscale[i][j] + leveloffset[i][j]) >> q_bits; + + if (img->AdaptiveRounding) + { + if (lossless_qpprime || level == 0 ) + { + img->fadjust4x4[2][jj*BLOCK_SIZE+j][ii*BLOCK_SIZE+i] = 0; + } + else + { + img->fadjust4x4[2][jj*BLOCK_SIZE+j][ii*BLOCK_SIZE+i] = + (AdaptRndWeight * (absm(M4[j][i]) * levelscale[i][j] - (level << q_bits)) + (1<< (q_bits))) >> (q_bits + 1); + } + } + + if (level != 0) + { + ac_coef = 15; + ACLevel[scan_pos] = sign(level,M4[j][i]); + ACRun [scan_pos] = run; + ++scan_pos; + run=-1; + } + + if(!lossless_qpprime) + { + level=sign(level, M4[j][i]); + if(qp_per<4) + M4[j][i]=(level*invlevelscale[i][j]+(1<<(3-qp_per)))>>(4-qp_per); + else + M4[j][i]=(level*invlevelscale[i][j])<<(qp_per-4); + } + } + ACLevel[scan_pos] = 0; + + + // IDCT horizontal + for (j=0;j<4 && !lossless_qpprime;j++) + { + M6[0] = M4[j][0] + M4[j][2]; + M6[1] = M4[j][0] - M4[j][2]; + M6[2] =(M4[j][1]>>1) - M4[j][3]; + M6[3] = M4[j][1] + (M4[j][3]>>1); + + M4[j][0] = M6[0] + M6[3]; + M4[j][1] = M6[1] + M6[2]; + M4[j][2] = M6[1] - M6[2]; + M4[j][3] = M6[0] - M6[3]; + } + + // vert + for (i=0;i<4 && !lossless_qpprime;i++) + { + M6[0]= M4[0][i] + M4[2][i]; + M6[1]= M4[0][i] - M4[2][i]; + M6[2]=(M4[1][i]>>1) - M4[3][i]; + M6[3]= M4[1][i] + (M4[3][i]>>1); + + M0[jj][ii][0][i] = M6[0] + M6[3]; + M0[jj][ii][1][i] = M6[1] + M6[2]; + M0[jj][ii][2][i] = M6[1] - M6[2]; + M0[jj][ii][3][i] = M6[0] - M6[3]; + } + } + } + + // Residue Color Transform + if(!img->residue_transform_flag) + { + for (jj=0;jj> 2; + jmod = j & 0x03; + for (i=0;im7[j][i]=M0[jdiv][i >> 2][jmod][i & 0x03]; + } + } + else + { + for (j=0;j> 2; + jmod = j & 0x03; + for (i=0;im7[j][i]=((M0[jdiv][i >> 2][jmod][i & 0x03]+DQ_ROUND)>>DQ_BITS); + } + } + } + + if(!img->residue_transform_flag) + { + if(lossless_qpprime) + { + for (j=0;j<16;j++) + { + jj = img->pix_y+j; + for (i=0;i<16;i++) + enc_picture->imgY[jj][img->pix_x+i]=(imgpel)(M1[j][i]+img->mprr_2[new_intra_mode][j][i]); + } + } + else + { + for (j=0;j<16;j++) + { + jj = img->pix_y+j; + for (i=0;i<16;i++) + enc_picture->imgY[jj][img->pix_x+i]=(imgpel)clip1a((M1[j][i]+((long)img->mprr_2[new_intra_mode][j][i]<>DQ_BITS); + } + } + } + return ac_coef; + } + + + /*! + ************************************************************************ + * \brief + * The routine performs transform,quantization,inverse transform, adds the diff. + * to the prediction and writes the result to the decoded luma frame. Includes the + * RD constrained quantization also. + * + * \par Input: + * block_x,block_y: Block position inside a macro block (0,4,8,12). + * + * \par Output_ + * nonzero: 0 if no levels are nonzero. 1 if there are nonzero levels. \n + * coeff_cost: Counter for nonzero coefficients, used to discard expensive levels. + ************************************************************************ + */ + int dct_luma(int block_x,int block_y,int *coeff_cost, int intra) + { + int sign(int a,int b); + + int i,j,ilev, m4[4][4], m5[4],m6[4],coeff_ctr; + int ii; + //int qp_const; + int level,scan_pos,run; + int nonzero; + int qp_per,qp_rem,q_bits; + + int pos_x = block_x >> BLOCK_SHIFT; + int pos_y = block_y >> BLOCK_SHIFT; + int b8 = 2*(pos_y >> 1) + (pos_x >> 1); + int b4 = 2*(pos_y & 0x01) + (pos_x & 0x01); + int* ACLevel = img->cofAC[b8][b4][0]; + int* ACRun = img->cofAC[b8][b4][1]; + short pix_y; + + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field)); + + Boolean lossless_qpprime = ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + int **levelscale,**leveloffset; + int **invlevelscale; + + qp_per = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)/6; + qp_rem = (currMB->qp + img->bitdepth_luma_qp_scale - MIN_QP)%6; + q_bits = Q_BITS+qp_per; + + levelscale = LevelScale4x4Luma[intra][qp_rem]; + leveloffset = LevelOffset4x4Luma[intra][qp_per]; + invlevelscale = InvLevelScale4x4Luma[intra][qp_rem]; + + // Horizontal transform + if (!lossless_qpprime) + { + for (j=0; j < BLOCK_SIZE; j++) + { + m5[0] = img->m7[j][0]+img->m7[j][3]; + m5[1] = img->m7[j][1]+img->m7[j][2]; + m5[2] = img->m7[j][1]-img->m7[j][2]; + m5[3] = img->m7[j][0]-img->m7[j][3]; + + m4[j][0] = m5[0] + m5[1]; + m4[j][2] = m5[0] - m5[1]; + m4[j][1] = m5[3]*2 + m5[2]; + m4[j][3] = m5[3] - m5[2]*2; + } + + // Vertical transform + for (i=0; i < BLOCK_SIZE; i++) + { + m5[0] = m4[0][i] + m4[3][i]; + m5[1] = m4[1][i] + m4[2][i]; + m5[2] = m4[1][i] - m4[2][i]; + m5[3] = m4[0][i] - m4[3][i]; + + m4[0][i] = m5[0] + m5[1]; + m4[2][i] = m5[0] - m5[1]; + m4[1][i] = m5[3]*2 + m5[2]; + m4[3][i] = m5[3] - m5[2]*2; + } + } + // Quant + + nonzero=FALSE; + + run=-1; + scan_pos=0; + + for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++) + { + + if (is_field_mode) + { + // Alternate scan for field coding + i=FIELD_SCAN[coeff_ctr][0]; + j=FIELD_SCAN[coeff_ctr][1]; + } + else + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + } + + run++; + ilev=0; + + if(lossless_qpprime) + level = absm (img->m7[j][i]); + else + level = (absm (m4[j][i]) * levelscale[i][j] + leveloffset[i][j]) >> q_bits; + + if (img->AdaptiveRounding) + { + if (lossless_qpprime || level == 0 ) + { + img->fadjust4x4[intra][block_y+j][block_x+i] = 0; + } + else + { + img->fadjust4x4[intra][block_y+j][block_x+i] = + (AdaptRndWeight * (absm(m4[j][i]) * levelscale[i][j] - (level << q_bits)) + (1<< (q_bits))) >> (q_bits + 1); + } + } + + if (level != 0) + { + nonzero=TRUE; + + *coeff_cost += (level > 1 || lossless_qpprime) ? MAX_VALUE : COEFF_COST[input->disthres][run]; + + if(lossless_qpprime) + ACLevel[scan_pos] = sign(level,img->m7[j][i]); + else + ACLevel[scan_pos] = sign(level,m4[j][i]); + + ACRun [scan_pos] = run; + ++scan_pos; + run=-1; // reset zero level counter + + level=sign(level, m4[j][i]); + + if(lossless_qpprime) + { + ilev=level; + } + else if(qp_per<4) + { + ilev=(level*invlevelscale[i][j]+(1<<(3-qp_per)))>>(4-qp_per); + } + else + { + ilev=(level*invlevelscale[i][j])<<(qp_per-4); + } + } + if(!lossless_qpprime) + m4[j][i]=ilev; + } + + ACLevel[scan_pos] = 0; + + // IDCT. + // horizontal + + if (!lossless_qpprime) + { + for (j=0; j < BLOCK_SIZE; j++) + { + m6[0]=(m4[j][0] + m4[j][2]); + m6[1]=(m4[j][0] - m4[j][2]); + m6[2]=(m4[j][1]>>1) - m4[j][3]; + m6[3]= m4[j][1] + (m4[j][3]>>1); + + m4[j][0] = m6[0] + m6[3]; + m4[j][1] = m6[1] + m6[2]; + m4[j][2] = m6[1] - m6[2]; + m4[j][3] = m6[0] - m6[3]; + } + + // vertical + for (i=0; i < BLOCK_SIZE; i++) + { + + m6[0]=(m4[0][i] + m4[2][i]); + m6[1]=(m4[0][i] - m4[2][i]); + m6[2]=(m4[1][i]>>1) - m4[3][i]; + m6[3]= m4[1][i] + (m4[3][i]>>1); + + ii = i + block_x; + + if (!img->residue_transform_flag) + { + img->m7[0][i] = min(img->max_imgpel_value,max(0,(m6[0]+m6[3]+((long)img->mpr[0 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + img->m7[1][i] = min(img->max_imgpel_value,max(0,(m6[1]+m6[2]+((long)img->mpr[1 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + img->m7[2][i] = min(img->max_imgpel_value,max(0,(m6[1]-m6[2]+((long)img->mpr[2 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + img->m7[3][i] = min(img->max_imgpel_value,max(0,(m6[0]-m6[3]+((long)img->mpr[3 + block_y][ii] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + } + else + { + if(lossless_qpprime) + { + img->m7[0][i] = m6[0]+m6[3]; + img->m7[1][i] = m6[1]+m6[2]; + img->m7[2][i] = m6[1]-m6[2]; + img->m7[3][i] = m6[0]-m6[3]; + } + else + { + img->m7[0][i] =(m6[0]+m6[3]+DQ_ROUND)>>DQ_BITS; + img->m7[1][i] =(m6[1]+m6[2]+DQ_ROUND)>>DQ_BITS; + img->m7[2][i] =(m6[1]-m6[2]+DQ_ROUND)>>DQ_BITS; + img->m7[3][i] =(m6[0]-m6[3]+DQ_ROUND)>>DQ_BITS; + } + } + } + } + // Decoded block moved to frame memory + if (!img->residue_transform_flag) + { + if(lossless_qpprime) + { + for (j=0; j < BLOCK_SIZE; j++) + { + pix_y = img->pix_y+block_y+j; + for (i=0; i < BLOCK_SIZE; i++) + { + enc_picture->imgY[pix_y][img->pix_x+block_x+i]=img->m7[j][i]+img->mpr[j+block_y][i+block_x]; + } + } + } + else + { + for (j=0; j < BLOCK_SIZE; j++) + { + pix_y = img->pix_y+block_y+j; + for (i=0; i < BLOCK_SIZE; i++) + { + enc_picture->imgY[pix_y][img->pix_x+block_x+i]=img->m7[j][i]; + } + } + } + + } + return nonzero; + } + + + /*! + ************************************************************************ + * \brief + * Transform,quantization,inverse transform for chroma. + * The main reason why this is done in a separate routine is the + * additional 2x2 transform of DC-coeffs. This routine is called + * ones for each of the chroma components. + * + * \par Input: + * uv : Make difference between the U and V chroma component \n + * cr_cbp: chroma coded block pattern + * + * \par Output: + * cr_cbp: Updated chroma coded block pattern. + ************************************************************************ + */ + int dct_chroma(int uv,int cr_cbp) + { + int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,level ,scan_pos,run; + int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE]; + int coeff_cost; + int cr_cbp_tmp; + int DCcoded=0 ; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + int qp_per,qp_rem,q_bits; + int qp_c; + + int b4; + int* DCLevel = img->cofDC[uv+1][0]; + int* DCRun = img->cofDC[uv+1][1]; + int* ACLevel; + int* ACRun; + int intra = IS_INTRA (currMB); + int uv_scale = uv*(img->num_blk8x8_uv >> 1); + + //FRExt + int64 cbpblk_pattern[4]={0, 0xf0000, 0xff0000, 0xffff0000}; + int yuv = img->yuv_format; + int b8; + int m3[4][4]; + int m4[4][4]; + int qp_per_dc = 0; + int qp_rem_dc = 0; + int q_bits_422 = 0; + int ***levelscale, ***leveloffset; + int ***invlevelscale; + short pix_c_x, pix_c_y; + short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field)); + + Boolean lossless_qpprime = ((currMB->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + + qp_c = currMB->qp + img->chroma_qp_offset[uv]; + qp_c = Clip3(-img->bitdepth_chroma_qp_scale,51,qp_c); + qp_c = (qp_c < 0)? qp_c : QP_SCALE_CR[qp_c - MIN_QP]; + + qp_per = (qp_c + img->bitdepth_chroma_qp_scale)/6; + qp_rem = (qp_c + img->bitdepth_chroma_qp_scale)%6; + q_bits = Q_BITS+qp_per; + + levelscale = LevelScale4x4Chroma[uv][intra]; + leveloffset = LevelOffset4x4Chroma[uv][intra]; + invlevelscale = InvLevelScale4x4Chroma[uv][intra]; + + if (img->yuv_format == YUV422) + { + //for YUV422 only + qp_per_dc = (qp_c + 3 + img->bitdepth_chroma_qp_scale)/6; + qp_rem_dc = (qp_c + 3 + img->bitdepth_chroma_qp_scale)%6; + + q_bits_422 = Q_BITS+qp_per_dc; + } + + + //============= dct transform =============== + for (n2=0; n2 < img->mb_cr_size_y; n2 += BLOCK_SIZE) + { + for (n1=0; n1 < img->mb_cr_size_x; n1 += BLOCK_SIZE) + { + + // Horizontal transform. + for (j=0; j < BLOCK_SIZE && !lossless_qpprime; j++) + { + mb_y=n2+j; + + m5[0]=img->m7[mb_y][n1 ]+img->m7[mb_y][n1+3]; + m5[1]=img->m7[mb_y][n1+1]+img->m7[mb_y][n1+2]; + m5[2]=img->m7[mb_y][n1+1]-img->m7[mb_y][n1+2]; + m5[3]=img->m7[mb_y][n1 ]-img->m7[mb_y][n1+3]; + + img->m7[mb_y][n1 ] = (m5[0] + m5[1]); + img->m7[mb_y][n1+2] = (m5[0] - m5[1]); + img->m7[mb_y][n1+1] = m5[3]*2 + m5[2]; + img->m7[mb_y][n1+3] = m5[3] - m5[2]*2; + } + + // Vertical transform. + + for (i=0; i < BLOCK_SIZE && !lossless_qpprime; i++) + { + j1=n1+i; + m5[0] = img->m7[n2 ][j1] + img->m7[n2+3][j1]; + m5[1] = img->m7[n2+1][j1] + img->m7[n2+2][j1]; + m5[2] = img->m7[n2+1][j1] - img->m7[n2+2][j1]; + m5[3] = img->m7[n2 ][j1] - img->m7[n2+3][j1]; + + img->m7[n2+0][j1] = (m5[0] + m5[1]); + img->m7[n2+2][j1] = (m5[0] - m5[1]); + img->m7[n2+1][j1] = m5[3]*2 + m5[2]; + img->m7[n2+3][j1] = m5[3] - m5[2]*2; + } + } + } + + if (yuv == YUV420) + { + //================== CHROMA DC YUV420 =================== + // 2X2 transform of DC coeffs. + if(lossless_qpprime) + { + m1[0]=img->m7[0][0]; + m1[1]=img->m7[0][4]; + m1[2]=img->m7[4][0]; + m1[3]=img->m7[4][4]; + } + else + { + m1[0]=(img->m7[0][0] + img->m7[0][4] + img->m7[4][0] + img->m7[4][4]); + m1[1]=(img->m7[0][0] - img->m7[0][4] + img->m7[4][0] - img->m7[4][4]); + m1[2]=(img->m7[0][0] + img->m7[0][4] - img->m7[4][0] - img->m7[4][4]); + m1[3]=(img->m7[0][0] - img->m7[0][4] - img->m7[4][0] + img->m7[4][4]); + } + + // Quant of chroma 2X2 coeffs. + run=-1; + scan_pos=0; + + for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++) + { + run++; + ilev=0; + + if(lossless_qpprime) + level =absm(m1[coeff_ctr]); + else + level =(absm(m1[coeff_ctr]) * levelscale[qp_rem][0][0] + (leveloffset[qp_per][0][0]<<1)) >> (q_bits+1); + + if (input->symbol_mode == UVLC && img->qp < 4) + { + if (level > CAVLC_LEVEL_LIMIT) + level = CAVLC_LEVEL_LIMIT; + } + + if (level != 0) + { + currMB->cbp_blk |= 0xf0000 << (uv << 2) ; // if one of the 2x2-DC levels is != 0 set the + cr_cbp=max(1,cr_cbp); // coded-bit all 4 4x4 blocks (bit 16-19 or 20-23) + DCcoded = 1 ; + DCLevel[scan_pos] = sign(level ,m1[coeff_ctr]); + DCRun [scan_pos] = run; + scan_pos++; + run=-1; + + ilev=sign(level, m1[coeff_ctr]); + } + if(!lossless_qpprime) + m1[coeff_ctr]=ilev; + } + DCLevel[scan_pos] = 0; + + // Inverse transform of 2x2 DC levels + if(!lossless_qpprime) + { + m5[0]=(m1[0] + m1[1] + m1[2] + m1[3]); + m5[1]=(m1[0] - m1[1] + m1[2] - m1[3]); + m5[2]=(m1[0] + m1[1] - m1[2] - m1[3]); + m5[3]=(m1[0] - m1[1] - m1[2] + m1[3]); + if(qp_per<5) + { + for(i=0; i<4; i++) + m1[i]=(m5[i] * invlevelscale[qp_rem][0][0])>>(5-qp_per); + } + else + { + for(i=0; i<4; i++) + m1[i]=(m5[i] * invlevelscale[qp_rem][0][0])<<(qp_per-5); + } + + img->m7[0][0] = m1[0]; + img->m7[0][4] = m1[1]; + img->m7[4][0] = m1[2]; + img->m7[4][4] = m1[3]; + } + } + else if(yuv == YUV422) + { + //================== CHROMA DC YUV422 =================== + //transform DC coeff + //horizontal + + //pick out DC coeff + for (j=0; j < img->mb_cr_size_y; j+=BLOCK_SIZE) + { + for (i=0; i < img->mb_cr_size_x; i+=BLOCK_SIZE) + m3[i>>2][j>>2]= img->m7[j][i]; + } + //horizontal + if(!lossless_qpprime) + { + m4[0][0] = m3[0][0] + m3[1][0]; + m4[0][1] = m3[0][1] + m3[1][1]; + m4[0][2] = m3[0][2] + m3[1][2]; + m4[0][3] = m3[0][3] + m3[1][3]; + + m4[1][0] = m3[0][0] - m3[1][0]; + m4[1][1] = m3[0][1] - m3[1][1]; + m4[1][2] = m3[0][2] - m3[1][2]; + m4[1][3] = m3[0][3] - m3[1][3]; + + // vertical + for (i=0;i<2;i++) + { + m5[0] = m4[i][0] + m4[i][3]; + m5[1] = m4[i][1] + m4[i][2]; + m5[2] = m4[i][1] - m4[i][2]; + m5[3] = m4[i][0] - m4[i][3]; + + m4[i][0] = (m5[0] + m5[1]); + m4[i][2] = (m5[0] - m5[1]); + m4[i][1] = (m5[3] + m5[2]); + m4[i][3] = (m5[3] - m5[2]); + } + } + + run=-1; + scan_pos=0; + + //quant of chroma DC-coeffs + for (coeff_ctr=0;coeff_ctr<8;coeff_ctr++) + { + i=SCAN_YUV422[coeff_ctr][0]; + j=SCAN_YUV422[coeff_ctr][1]; + + run++; + + if(lossless_qpprime) + { + level = absm(m3[i][j]); + m4[i][j]=m3[i][j]; + } + else + level =(absm(m4[i][j]) * levelscale[qp_rem_dc][0][0] + (leveloffset[qp_per_dc][0][0]*2)) >> (q_bits_422+1); + + if (level != 0) + { + //YUV422 + currMB->cbp_blk |= 0xff0000 << (uv << 3) ; // if one of the DC levels is != 0 set the + cr_cbp=max(1,cr_cbp); // coded-bit all 4 4x4 blocks (bit 16-31 or 32-47) //YUV444 + DCcoded = 1 ; + + DCLevel[scan_pos] = sign(level,m4[i][j]); + DCRun [scan_pos] = run; + ++scan_pos; + run=-1; + } + if(!lossless_qpprime) + m3[i][j]=sign(level,m4[i][j]); + } + DCLevel[scan_pos]=0; + + //inverse DC transform + //horizontal + if(!lossless_qpprime) + { + m4[0][0] = m3[0][0] + m3[1][0]; + m4[0][1] = m3[0][1] + m3[1][1]; + m4[0][2] = m3[0][2] + m3[1][2]; + m4[0][3] = m3[0][3] + m3[1][3]; + + m4[1][0] = m3[0][0] - m3[1][0]; + m4[1][1] = m3[0][1] - m3[1][1]; + m4[1][2] = m3[0][2] - m3[1][2]; + m4[1][3] = m3[0][3] - m3[1][3]; + + // vertical + for (i=0;i<2;i++) + { + m6[0]=m4[i][0]+m4[i][2]; + m6[1]=m4[i][0]-m4[i][2]; + m6[2]=m4[i][1]-m4[i][3]; + m6[3]=m4[i][1]+m4[i][3]; + + if(qp_per_dc<4) + { + img->m7[0 ][i*4]=((((m6[0]+m6[3])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2; + img->m7[4 ][i*4]=((((m6[1]+m6[2])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2; + img->m7[8 ][i*4]=((((m6[1]-m6[2])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2; + img->m7[12][i*4]=((((m6[0]-m6[3])*invlevelscale[qp_rem_dc][0][0]+(1<<(3-qp_per_dc)))>>(4-qp_per_dc))+2)>>2; + } + else + { + img->m7[0 ][i*4]=((((m6[0]+m6[3])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2; + img->m7[4 ][i*4]=((((m6[1]+m6[2])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2; + img->m7[8 ][i*4]=((((m6[1]-m6[2])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2; + img->m7[12][i*4]=((((m6[0]-m6[3])*invlevelscale[qp_rem_dc][0][0])<<(qp_per_dc-4))+2)>>2; + } + }//for (i=0;i<2;i++) + } + } + else if(yuv == YUV444) + { + //================== CHROMA DC YUV444 =================== + //transform DC coeff + //pick out DC coeff + for (j=0; j < img->mb_cr_size_y; j+=BLOCK_SIZE) + { + for (i=0; i < img->mb_cr_size_x; i+=BLOCK_SIZE) + m4[i>>2][j>>2]= img->m7[j][i]; + } + + //horizontal + for (j=0;j<4 && !lossless_qpprime;j++) + { + m5[0] = m4[0][j] + m4[3][j]; + m5[1] = m4[1][j] + m4[2][j]; + m5[2] = m4[1][j] - m4[2][j]; + m5[3] = m4[0][j] - m4[3][j]; + + m4[0][j]=m5[0]+m5[1]; + m4[2][j]=m5[0]-m5[1]; + m4[1][j]=m5[3]+m5[2]; + m4[3][j]=m5[3]-m5[2]; + } + // vertical + for (i=0;i<4 && !lossless_qpprime;i++) + { + m5[0] = m4[i][0] + m4[i][3]; + m5[1] = m4[i][1] + m4[i][2]; + m5[2] = m4[i][1] - m4[i][2]; + m5[3] = m4[i][0] - m4[i][3]; + + m4[i][0]=(m5[0]+m5[1])>>1; + m4[i][2]=(m5[0]-m5[1])>>1; + m4[i][1]=(m5[3]+m5[2])>>1; + m4[i][3]=(m5[3]-m5[2])>>1; + } + + run=-1; + scan_pos=0; + + //quant of chroma DC-coeffs + for (coeff_ctr=0;coeff_ctr<16;coeff_ctr++) + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + + run++; + + if(lossless_qpprime) + level = absm(m4[i][j]); + else + level =(absm(m4[i][j]) * levelscale[qp_rem][0][0] + (leveloffset[qp_per][0][0]*2)) >> (q_bits+1); + + if (level != 0) + { + //YUV444 + currMB->cbp_blk |= ((int64)0xffff0000) << (uv << 4) ; // if one of the DC levels is != 0 set the + cr_cbp=max(1,cr_cbp); // coded-bit all 4 4x4 blocks (bit 16-31 or 32-47) //YUV444 + DCcoded = 1 ; + + DCLevel[scan_pos] = sign(level,m4[i][j]); + DCRun [scan_pos] = run; + ++scan_pos; + run=-1; + } + if(!lossless_qpprime) + m4[i][j]=sign(level,m4[i][j]); + } + DCLevel[scan_pos]=0; + + // inverse DC transform + //horizontal + for (j=0;j<4 && !lossless_qpprime;j++) + { + m6[0] = m4[0][j] + m4[2][j]; + m6[1] = m4[0][j] - m4[2][j]; + m6[2] = m4[1][j] - m4[3][j]; + m6[3] = m4[1][j] + m4[3][j]; + + m4[0][j] = m6[0] + m6[3]; + m4[1][j] = m6[1] + m6[2]; + m4[2][j] = m6[1] - m6[2]; + m4[3][j] = m6[0] - m6[3]; + } + + //vertical + for (i=0;i<4 && !lossless_qpprime;i++) + { + m6[0]=m4[i][0]+m4[i][2]; + m6[1]=m4[i][0]-m4[i][2]; + m6[2]=m4[i][1]-m4[i][3]; + m6[3]=m4[i][1]+m4[i][3]; + + if(qp_per<4) + { + img->m7[0 ][i*4] = ((((m6[0] + m6[3])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2; + img->m7[4 ][i*4] = ((((m6[1] + m6[2])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2; + img->m7[8 ][i*4] = ((((m6[1] - m6[2])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2; + img->m7[12][i*4] = ((((m6[0] - m6[3])*invlevelscale[qp_rem][0][0]+(1<<(3-qp_per)))>>(4-qp_per))+2)>>2; + } + else + { + img->m7[0 ][i*4] = ((((m6[0]+m6[3])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2; + img->m7[4 ][i*4] = ((((m6[1]+m6[2])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2; + img->m7[8 ][i*4] = ((((m6[1]-m6[2])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2; + img->m7[12][i*4] = ((((m6[0]-m6[3])*invlevelscale[qp_rem][0][0])<<(qp_per-4))+2)>>2; + } + } + } + + // Quant of chroma AC-coeffs. + coeff_cost=0; + cr_cbp_tmp=0; + + for (b8=0; b8 < (img->num_blk8x8_uv >> 1); b8++) + { + for (b4=0; b4 < 4; b4++) + { + n1 = hor_offset[yuv][b8][b4]; + n2 = ver_offset[yuv][b8][b4]; + ACLevel = img->cofAC[4+b8+uv_scale][b4][0]; + ACRun = img->cofAC[4+b8+uv_scale][b4][1]; + run=-1; + scan_pos=0; + + for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant + { + + if (is_field_mode) + { // Alternate scan for field coding + i=FIELD_SCAN[coeff_ctr][0]; + j=FIELD_SCAN[coeff_ctr][1]; + } + else + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + } + ++run; + ilev=0; + + if(lossless_qpprime) + level = absm(img->m7[n2+j][n1+i]); + else + level=(absm(img->m7[n2+j][n1+i])*levelscale[qp_rem][i][j]+leveloffset[qp_per][i][j])>>q_bits; + + if (img->AdaptiveRounding) + { + if (lossless_qpprime || level == 0 ) + { + img->fadjust4x4Cr[intra][uv][n2+j][n1+i] = 0; + } + else + { + img->fadjust4x4Cr[intra][uv][n2+j][n1+i] = + (AdaptRndWeight * (absm(img->m7[n2+j][n1+i]) * levelscale[qp_rem][i][j] - (level << q_bits)) + (1<< (q_bits))) >> (q_bits + 1); + } + } + + if (level != 0) + { + currMB->cbp_blk |= ((int64)1) << cbp_blk_chroma[b8 + uv_scale][b4]; + if (level > 1 || lossless_qpprime) + coeff_cost += MAX_VALUE; // set high cost, shall not be discarded + else + coeff_cost += COEFF_COST[input->disthres][run]; + + cr_cbp_tmp=2; + ACLevel[scan_pos] = sign(level,img->m7[n2+j][n1+i]); + ACRun [scan_pos] = run; + ++scan_pos; + run=-1; + + level=sign(level, img->m7[n2+j][n1+i]); + if(lossless_qpprime) + { + ilev = level; + } + else if(qp_per<4) + { + ilev=(level*invlevelscale[qp_rem][i][j]+(1<<(3-qp_per)))>>(4-qp_per); + } + else + { + ilev=(level*invlevelscale[qp_rem][i][j])<<(qp_per-4); + } + } + if(!lossless_qpprime) + img->m7[n2+j][n1+i]=ilev; + } + ACLevel[scan_pos] = 0; + } + } + + // * reset chroma coeffs + if(coeff_cost < _CHROMA_COEFF_COST_ && !lossless_qpprime) + { + cr_cbp_tmp = 0 ; + + for (b8=0; b8 < (img->num_blk8x8_uv >> 1); b8++) + { + for (b4=0; b4 < 4; b4++) + { + n1 = hor_offset[yuv][b8][b4]; + n2 = ver_offset[yuv][b8][b4]; + ACLevel = img->cofAC[4+b8+uv_scale][b4][0]; + ACRun = img->cofAC[4+b8+uv_scale][b4][1]; + if( DCcoded == 0) + currMB->cbp_blk &= ~((int64)cbpblk_pattern[yuv] << (uv << (1+yuv))); // if no chroma DC's: then reset coded-bits of this chroma subblock + + ACLevel[0] = 0; + for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// ac coeff + { + + if (is_field_mode) + { // Alternate scan for field coding + i=FIELD_SCAN[coeff_ctr][0]; + j=FIELD_SCAN[coeff_ctr][1]; + } + else + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + } + img->m7[n2+j][n1+i]=0; + ACLevel[coeff_ctr] = 0; + } + } + } + } + + if(cr_cbp_tmp==2) + cr_cbp = 2; + + // IDCT. + // Horizontal. + for (n2=0; n2 < img->mb_cr_size_y && !lossless_qpprime; n2 += BLOCK_SIZE) + { + for (n1=0; n1 < img->mb_cr_size_x; n1 += BLOCK_SIZE) + { + for (j=0; j < BLOCK_SIZE; j++) + { + j2 = n2 + j; + for (i=0; i < BLOCK_SIZE; i++) + { + m5[i]=img->m7[j2][n1+i]; + } + + m6[0] = (m5[0] + m5[2]); + m6[1] = (m5[0] - m5[2]); + m6[2] = (m5[1]>>1) - m5[3]; + m6[3] = m5[1] + (m5[3]>>1); + + img->m7[j2][n1 ] = m6[0] + m6[3]; + img->m7[j2][n1+1] = m6[1] + m6[2]; + img->m7[j2][n1+2] = m6[1] - m6[2]; + img->m7[j2][n1+3] = m6[0] - m6[3]; + } + + // Vertical. + for (i=0; i < BLOCK_SIZE && !lossless_qpprime; i++) + { + i1 = n1 + i; + for (j=0; j < BLOCK_SIZE; j++) + { + m5[j]=img->m7[n2+j][i1]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + // Residue Color Transform + if (!img->residue_transform_flag) + { + img->m7[n2 ][i1] = min(img->max_imgpel_value_uv,max(0,(m6[0]+m6[3]+((long)img->mpr[n2 ][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + img->m7[n2+1][i1] = min(img->max_imgpel_value_uv,max(0,(m6[1]+m6[2]+((long)img->mpr[n2+1][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + img->m7[n2+2][i1] = min(img->max_imgpel_value_uv,max(0,(m6[1]-m6[2]+((long)img->mpr[n2+2][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + img->m7[n2+3][i1] = min(img->max_imgpel_value_uv,max(0,(m6[0]-m6[3]+((long)img->mpr[n2+3][i1] << DQ_BITS)+DQ_ROUND)>>DQ_BITS)); + } + else + { + if(lossless_qpprime) + { + img->m7[n2 ][i1] = m6[0]+m6[3]; + img->m7[n2+1][i1] = m6[1]+m6[2]; + img->m7[n2+2][i1] = m6[1]-m6[2]; + img->m7[n2+3][i1] = m6[0]-m6[3]; + } + else + { + img->m7[n2 ][i1] = (m6[0]+m6[3]+DQ_ROUND)>>DQ_BITS; + img->m7[n2+1][i1] = (m6[1]+m6[2]+DQ_ROUND)>>DQ_BITS; + img->m7[n2+2][i1] = (m6[1]-m6[2]+DQ_ROUND)>>DQ_BITS; + img->m7[n2+3][i1] = (m6[0]-m6[3]+DQ_ROUND)>>DQ_BITS; + } + } + } + } + } + + // Decoded block moved to memory + if (!img->residue_transform_flag) + { + for (j=0; j < img->mb_cr_size_y; j++) + { + pix_c_y = img->pix_c_y+j; + for (i=0; i < img->mb_cr_size_x; i++) + { + pix_c_x = img->pix_c_x+i; + if(lossless_qpprime) + enc_picture->imgUV[uv][pix_c_y][pix_c_x]= img->m7[j][i]+img->mpr[j][i]; + else + enc_picture->imgUV[uv][pix_c_y][pix_c_x]= img->m7[j][i]; + } + } + } + return cr_cbp; + } + + + // Residue Color Transform + int dct_chroma4x4(int uv, int b8, int b4) + { + int sign(int a,int b); + + int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr; + int level,scan_pos,run; + int nonzeroAC; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int intra = IS_INTRA (currMB); + + int qp_per,qp_rem,q_bits; + int qp_c; + + int* ACLevel = img->cofAC[b8][b4][0]; + int* ACRun = img->cofAC[b8][b4][1]; + + int **levelscale, **leveloffset; + int **invlevelscale; + + Boolean lossless_qpprime = ((img->qp + img->bitdepth_luma_qp_scale)==0 && img->lossless_qpprime_flag==1); + + qp_c = currMB->qp + img->chroma_qp_offset[uv]; + qp_c = (qp_c < 0)? qp_c : QP_SCALE_CR[qp_c - MIN_QP]; + + qp_per = (qp_c + img->bitdepth_chroma_qp_scale)/6; + qp_rem = (qp_c + img->bitdepth_chroma_qp_scale)%6; + q_bits = Q_BITS+qp_per; + + levelscale = LevelScale4x4Chroma[uv][intra][qp_rem]; + leveloffset = LevelOffset4x4Chroma[uv][intra][qp_per]; + invlevelscale = InvLevelScale4x4Chroma[uv][intra][qp_rem]; + + // Horizontal transform + if(!lossless_qpprime) + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=img->m7[j][i]+img->m7[j][i1]; + m5[i1]=img->m7[j][i]-img->m7[j][i1]; + } + img->m7[j][0]=(m5[0]+m5[1]); + img->m7[j][2]=(m5[0]-m5[1]); + img->m7[j][1]=m5[3]*2+m5[2]; + img->m7[j][3]=m5[3]-m5[2]*2; + } + + // Vertical transform + if(!lossless_qpprime) + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < 2; j++) + { + j1=3-j; + m5[j]=img->m7[j][i]+img->m7[j1][i]; + m5[j1]=img->m7[j][i]-img->m7[j1][i]; + } + img->m7[0][i]=(m5[0]+m5[1]); + img->m7[2][i]=(m5[0]-m5[1]); + img->m7[1][i]=m5[3]*2+m5[2]; + img->m7[3][i]=m5[3]-m5[2]*2; + } + + // Quant + + nonzeroAC=FALSE; + + run=-1; + scan_pos=0; + + if(lossless_qpprime) + level = absm(img->m7[0][0]); + else + level =(absm(img->m7[0][0]) * levelscale[0][0] + leveloffset[0][0]) >> q_bits; + + b8 -= 4*(uv+1); + dc_level_temp[uv][2*(b8 & 0x01)+(b4 & 0x01)][2*(b8 >> 1)+(b4 >> 1)] = sign(level, img->m7[0][0]); + + /* Inverse Quantization */ + if(lossless_qpprime) + { + img->m7[0][0] = sign( level, img->m7[0][0]); + } + else + { + if(qp_per<4) + { + img->m7[0][0] = sign( ((level*invlevelscale[0][0]+(1<<(3-qp_per)))>>(4-qp_per)), img->m7[0][0]); + } + else + { + img->m7[0][0] = sign( ((level*invlevelscale[0][0])<<(qp_per-4)), img->m7[0][0]); + } + } + + for (coeff_ctr=1;coeff_ctr < 16;coeff_ctr++) + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + + run++; + ilev=0; + + if(lossless_qpprime) + level = absm (img->m7[j][i]); + else + level = (absm(img->m7[j][i])*levelscale[i][j]+leveloffset[i][j])>>q_bits; + + if (level != 0) + { + if(i||j) nonzeroAC=TRUE; + + ACLevel[scan_pos] = sign(level,img->m7[j][i]); + ACRun [scan_pos] = run; + ++scan_pos; + run=-1; // reset zero level counter + + level=sign(level, img->m7[j][i]); + if(lossless_qpprime) + { + ilev=level; + } + else if(qp_per<4) + { + ilev=(level*invlevelscale[i][j]+(1<<(3-qp_per)))>>(4-qp_per); + } + else + { + ilev=(level*invlevelscale[i][j])<<(qp_per-4); + } + } + if(!lossless_qpprime) + img->m7[j][i]=ilev; + } + ACLevel[scan_pos] = 0; + + + // IDCT. + // horizontal + if(!lossless_qpprime) + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < BLOCK_SIZE; i++) + { + m5[i]=img->m7[j][i]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (i=0; i < 2; i++) + { + i1=3-i; + img->m7[j][i]=m6[i]+m6[i1]; + img->m7[j][i1]=m6[i]-m6[i1]; + } + } + + // vertical + if(!lossless_qpprime) + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < BLOCK_SIZE; j++) + { + m5[j]=img->m7[j][i]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (j=0; j < 2; j++) + { + j1=3-j; + img->m7[j][i] =(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS; + img->m7[j1][i]=(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS; + } + } + + return nonzeroAC; + } + + // Residue Color Transform + int dct_chroma_DC(int uv, int cr_cbp) + { + int run, scan_pos, coeff_ctr, level, i, j; + int* DCLevel = img->cofDC[uv+1][0]; + int* DCRun = img->cofDC[uv+1][1]; + + run=-1; + scan_pos=0; + + for (coeff_ctr=0; coeff_ctr < 16; coeff_ctr++) + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + + run++; + + level = absm(dc_level[uv][i][j]); + + if (level != 0) + { + cr_cbp=max(1,cr_cbp); + DCLevel[scan_pos] = sign(level ,dc_level[uv][i][j]); + DCRun [scan_pos] = run; + scan_pos++; + run=-1; + } + } + DCLevel[scan_pos] = 0; + + return cr_cbp; + } + + + /*! + ************************************************************************ + * \brief + * The routine performs transform,quantization,inverse transform, adds the diff. + * to the prediction and writes the result to the decoded luma frame. Includes the + * RD constrained quantization also. + * + * \par Input: + * block_x,block_y: Block position inside a macro block (0,4,8,12). + * + * \par Output: + * nonzero: 0 if no levels are nonzero. 1 if there are nonzero levels. \n + * coeff_cost: Counter for nonzero coefficients, used to discard expensive levels. + * + * + ************************************************************************ + */ + int dct_luma_sp(int block_x,int block_y,int *coeff_cost) + { + int sign(int a,int b); + + int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr; + int qp_const,level,scan_pos,run; + int nonzero; + + int predicted_block[BLOCK_SIZE][BLOCK_SIZE],c_err,qp_const2; + int qp_per,qp_rem,q_bits; + int qp_per_sp,qp_rem_sp,q_bits_sp; + + int pos_x = block_x >> BLOCK_SHIFT; + int pos_y = block_y >> BLOCK_SHIFT; + int b8 = 2*(pos_y >> 1) + (pos_x >> 1); + int b4 = 2*(pos_y & 0x01) + (pos_x & 0x01); + int* ACLevel = img->cofAC[b8][b4][0]; + int* ACRun = img->cofAC[b8][b4][1]; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field)); + + // For encoding optimization + int c_err1, c_err2, level1, level2; + double D_dis1, D_dis2; + int len, info; + double lambda_mode = 0.85 * pow (2, (currMB->qp - SHIFT_QP)/3.0) * 4; + + qp_per = (currMB->qp-MIN_QP)/6; + qp_rem = (currMB->qp-MIN_QP)%6; + q_bits = Q_BITS+qp_per; + qp_per_sp = (currMB->qpsp-MIN_QP)/6; + qp_rem_sp = (currMB->qpsp-MIN_QP)%6; + q_bits_sp = Q_BITS+qp_per_sp; + + qp_const=(1<m7[j][i]+=img->mpr[j+block_y][i+block_x]; + predicted_block[i][j]=img->mpr[j+block_y][i+block_x]; + } + + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=img->m7[j][i]+img->m7[j][i1]; + m5[i1]=img->m7[j][i]-img->m7[j][i1]; + } + img->m7[j][0]=(m5[0]+m5[1]); + img->m7[j][2]=(m5[0]-m5[1]); + img->m7[j][1]=m5[3]*2+m5[2]; + img->m7[j][3]=m5[3]-m5[2]*2; + } + + // Vertical transform + + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < 2; j++) + { + j1=3-j; + m5[j]=img->m7[j][i]+img->m7[j1][i]; + m5[j1]=img->m7[j][i]-img->m7[j1][i]; + } + img->m7[0][i]=(m5[0]+m5[1]); + img->m7[2][i]=(m5[0]-m5[1]); + img->m7[1][i]=m5[3]*2+m5[2]; + img->m7[3][i]=m5[3]-m5[2]*2; + } + + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=predicted_block[i][j]+predicted_block[i1][j]; + m5[i1]=predicted_block[i][j]-predicted_block[i1][j]; + } + predicted_block[0][j]=(m5[0]+m5[1]); + predicted_block[2][j]=(m5[0]-m5[1]); + predicted_block[1][j]=m5[3]*2+m5[2]; + predicted_block[3][j]=m5[3]-m5[2]*2; + } + + // Vertical transform + + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < 2; j++) + { + j1=3-j; + m5[j]=predicted_block[i][j]+predicted_block[i][j1]; + m5[j1]=predicted_block[i][j]-predicted_block[i][j1]; + } + predicted_block[i][0]=(m5[0]+m5[1]); + predicted_block[i][2]=(m5[0]-m5[1]); + predicted_block[i][1]=m5[3]*2+m5[2]; + predicted_block[i][3]=m5[3]-m5[2]*2; + } + + // Quant + nonzero=FALSE; + + run=-1; + scan_pos=0; + + for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++) // 8 times if double scan, 16 normal scan + { + + if (is_field_mode) + { // Alternate scan for field coding + i=FIELD_SCAN[coeff_ctr][0]; + j=FIELD_SCAN[coeff_ctr][1]; + } + else + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + } + + run++; + ilev=0; + + // decide prediction + + // case 1 + level1 = (absm (predicted_block[i][j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp; + level1 = (level1 << q_bits_sp) / quant_coef[qp_rem_sp][i][j]; + c_err1 = img->m7[j][i]-sign(level1, predicted_block[i][j]); + level1 = (absm (c_err1) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits; + + // case 2 + c_err2=img->m7[j][i]-predicted_block[i][j]; + level2 = (absm (c_err2) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits; + + // select prediction + if ((level1 != level2) && (level1 != 0) && (level2 != 0)) + { + D_dis1 = img->m7[j][i] - ((sign(level1,c_err1)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j]; + levrun_linfo_inter(level1, run, &len, &info); + D_dis1 = D_dis1*D_dis1 + lambda_mode * len; + + D_dis2 = img->m7[j][i] - ((sign(level2,c_err2)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j]; + levrun_linfo_inter(level2, run, &len, &info); + D_dis2 = D_dis2 * D_dis2 + lambda_mode * len; + + if (D_dis1 == D_dis2) + level = (absm(level1) < absm(level2)) ? level1 : level2; + else + { + if (D_dis1 < D_dis2) + level = level1; + else + level = level2; + } + c_err = (level == level1) ? c_err1 : c_err2; + } + else if (level1 == level2) + { + level = level1; + c_err = c_err1; + } + else + { + level = (level1 == 0) ? level1 : level2; + c_err = (level1 == 0) ? c_err1 : c_err2; + } + + if (level != 0) + { + nonzero=TRUE; + if (level > 1) + *coeff_cost += MAX_VALUE; // set high cost, shall not be discarded + else + *coeff_cost += COEFF_COST[input->disthres][run]; + ACLevel[scan_pos] = sign(level,c_err); + ACRun [scan_pos] = run; + ++scan_pos; + run=-1; // reset zero level counter + ilev=((sign(level,c_err)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6); + } + ilev+=predicted_block[i][j] ; + img->m7[j][i] = sign((absm(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2)>> q_bits_sp, ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp; + } + ACLevel[scan_pos] = 0; + + + // IDCT. + // horizontal + + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < BLOCK_SIZE; i++) + { + m5[i]=img->m7[j][i]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (i=0; i < 2; i++) + { + i1=3-i; + img->m7[j][i]=m6[i]+m6[i1]; + img->m7[j][i1]=m6[i]-m6[i1]; + } + } + + // vertical + + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < BLOCK_SIZE; j++) + { + m5[j]=img->m7[j][i]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (j=0; j < 2; j++) + { + j1=3-j; + img->m7[j][i] =min(img->max_imgpel_value,max(0,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS)); + img->m7[j1][i]=min(img->max_imgpel_value,max(0,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS)); + } + } + + // Decoded block moved to frame memory + + for (j=0; j < BLOCK_SIZE; j++) + for (i=0; i < BLOCK_SIZE; i++) + enc_picture->imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=img->m7[j][i]; + + return nonzero; + } + + /*! + ************************************************************************ + * \brief + * Transform,quantization,inverse transform for chroma. + * The main reason why this is done in a separate routine is the + * additional 2x2 transform of DC-coeffs. This routine is called + * ones for each of the chroma components. + * + * \par Input: + * uv : Make difference between the U and V chroma component \n + * cr_cbp: chroma coded block pattern + * + * \par Output: + * cr_cbp: Updated chroma coded block pattern. + ************************************************************************ + */ + int dct_chroma_sp(int uv,int cr_cbp) + { + int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,qp_const,c_err,level ,scan_pos,run; + int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE]; + int coeff_cost; + int cr_cbp_tmp; + int predicted_chroma_block[MB_BLOCK_SIZE>>1][MB_BLOCK_SIZE>>1],qp_const2,mp1[BLOCK_SIZE]; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + short is_field_mode = (img->field_picture || ( img->MbaffFrameFlag && currMB->mb_field)); + + int qp_per,qp_rem,q_bits; + int qp_per_sp,qp_rem_sp,q_bits_sp; + + int b4; + int* DCLevel = img->cofDC[uv+1][0]; + int* DCRun = img->cofDC[uv+1][1]; + int* ACLevel; + int* ACRun; + + int c_err1, c_err2, level1, level2; + int len, info; + double D_dis1, D_dis2; + double lambda_mode = 0.85 * pow (2, (currMB->qp -SHIFT_QP)/3.0) * 4; + + + int qpChroma=Clip3(0, 51, currMB->qp + active_pps->chroma_qp_index_offset); + int qpChromaSP=Clip3(0, 51, currMB->qpsp + active_pps->chroma_qp_index_offset); + + qp_per = ((qpChroma<0?qpChroma:QP_SCALE_CR[qpChroma])-MIN_QP)/6; + qp_rem = ((qpChroma<0?qpChroma:QP_SCALE_CR[qpChroma])-MIN_QP)%6; + q_bits = Q_BITS+qp_per; + qp_const=(1<qpsp:QP_SCALE_CR[qpChromaSP])-MIN_QP)/6; + qp_rem_sp = ((qpChromaSP<0?currMB->qpsp:QP_SCALE_CR[qpChromaSP])-MIN_QP)%6; + q_bits_sp = Q_BITS+qp_per_sp; + qp_const2=(1<>1; j++) + for (i=0; i < MB_BLOCK_SIZE>>1; i++) + { + img->m7[j][i]+=img->mpr[j][i]; + predicted_chroma_block[i][j]=img->mpr[j][i]; + } + + for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE) + { + for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE) + { + + // Horizontal transform. + for (j=0; j < BLOCK_SIZE; j++) + { + mb_y=n2+j; + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=img->m7[mb_y][i+n1]+img->m7[mb_y][i1+n1]; + m5[i1]=img->m7[mb_y][i+n1]-img->m7[mb_y][i1+n1]; + } + img->m7[mb_y][n1] =(m5[0]+m5[1]); + img->m7[mb_y][n1+2]=(m5[0]-m5[1]); + img->m7[mb_y][n1+1]=m5[3]*2+m5[2]; + img->m7[mb_y][n1+3]=m5[3]-m5[2]*2; + } + + // Vertical transform. + + for (i=0; i < BLOCK_SIZE; i++) + { + j1=n1+i; + for (j=0; j < 2; j++) + { + j2=3-j; + m5[j]=img->m7[n2+j][j1]+img->m7[n2+j2][j1]; + m5[j2]=img->m7[n2+j][j1]-img->m7[n2+j2][j1]; + } + img->m7[n2+0][j1]=(m5[0]+m5[1]); + img->m7[n2+2][j1]=(m5[0]-m5[1]); + img->m7[n2+1][j1]=m5[3]*2+m5[2]; + img->m7[n2+3][j1]=m5[3]-m5[2]*2; + } + } + } + for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE) + { + for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE) + { + + // Horizontal transform. + for (j=0; j < BLOCK_SIZE; j++) + { + mb_y=n2+j; + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=predicted_chroma_block[i+n1][mb_y]+predicted_chroma_block[i1+n1][mb_y]; + m5[i1]=predicted_chroma_block[i+n1][mb_y]-predicted_chroma_block[i1+n1][mb_y]; + } + predicted_chroma_block[n1][mb_y] =(m5[0]+m5[1]); + predicted_chroma_block[n1+2][mb_y]=(m5[0]-m5[1]); + predicted_chroma_block[n1+1][mb_y]=m5[3]*2+m5[2]; + predicted_chroma_block[n1+3][mb_y]=m5[3]-m5[2]*2; + } + + // Vertical transform. + + for (i=0; i < BLOCK_SIZE; i++) + { + j1=n1+i; + for (j=0; j < 2; j++) + { + j2=3-j; + m5[j]=predicted_chroma_block[j1][n2+j]+predicted_chroma_block[j1][n2+j2]; + m5[j2]=predicted_chroma_block[j1][n2+j]-predicted_chroma_block[j1][n2+j2]; + } + predicted_chroma_block[j1][n2+0]=(m5[0]+m5[1]); + predicted_chroma_block[j1][n2+2]=(m5[0]-m5[1]); + predicted_chroma_block[j1][n2+1]=m5[3]*2+m5[2]; + predicted_chroma_block[j1][n2+3]=m5[3]-m5[2]*2; + } + } + } + + // 2X2 transform of DC coeffs. + m1[0]=(img->m7[0][0]+img->m7[0][4]+img->m7[4][0]+img->m7[4][4]); + m1[1]=(img->m7[0][0]-img->m7[0][4]+img->m7[4][0]-img->m7[4][4]); + m1[2]=(img->m7[0][0]+img->m7[0][4]-img->m7[4][0]-img->m7[4][4]); + m1[3]=(img->m7[0][0]-img->m7[0][4]-img->m7[4][0]+img->m7[4][4]); + + // 2X2 transform of DC coeffs. + mp1[0]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]+predicted_chroma_block[0][4]+predicted_chroma_block[4][4]); + mp1[1]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]+predicted_chroma_block[0][4]-predicted_chroma_block[4][4]); + mp1[2]=(predicted_chroma_block[0][0]+predicted_chroma_block[4][0]-predicted_chroma_block[0][4]-predicted_chroma_block[4][4]); + mp1[3]=(predicted_chroma_block[0][0]-predicted_chroma_block[4][0]-predicted_chroma_block[0][4]+predicted_chroma_block[4][4]); + + run=-1; + scan_pos=0; + + for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++) + { + run++; + ilev=0; + + // case 1 + c_err1 = (absm (mp1[coeff_ctr]) * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp + 1); + c_err1 = (c_err1 << (q_bits_sp + 1)) / quant_coef[qp_rem_sp][0][0]; + c_err1 = m1[coeff_ctr] - sign(c_err1, mp1[coeff_ctr]); + level1 = (absm(c_err1) * quant_coef[qp_rem][0][0] + 2 * qp_const) >> (q_bits+1); + + // case 2 + c_err2 = m1[coeff_ctr] - mp1[coeff_ctr]; + level2 = (absm(c_err2) * quant_coef[qp_rem][0][0] + 2 * qp_const) >> (q_bits+1); + + if (level1 != level2 && level1 != 0 && level2 != 0) + { + D_dis1 = m1[coeff_ctr] - ((sign(level1,c_err1)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5)- mp1[coeff_ctr]; + levrun_linfo_c2x2(level1, run, &len, &info); + D_dis1 = D_dis1 * D_dis1 + lambda_mode * len; + + D_dis2 = m1[coeff_ctr] - ((sign(level2,c_err2)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5)- mp1[coeff_ctr]; + levrun_linfo_c2x2(level2, run, &len, &info); + D_dis2 = D_dis2 * D_dis2 + lambda_mode * len; + + if (D_dis1 == D_dis2) + level = (absm(level1) < absm(level2)) ? level1 : level2; + else + { + if (D_dis1 < D_dis2) + level = level1; + else + level = level2; + } + c_err = (level == level1) ? c_err1 : c_err2; + } + else if (level1 == level2) + { + level = level1; + c_err = c_err1; + } + else + { + level = (level1 == 0) ? level1 : level2; + c_err = (level1 == 0) ? c_err1 : c_err2; + } + + if (input->symbol_mode == UVLC && img->qp < 4) + { + if (level > CAVLC_LEVEL_LIMIT) + { + level = CAVLC_LEVEL_LIMIT; + } + } + + if (level != 0) + { + currMB->cbp_blk |= 0xf0000 << (uv << 2) ; // if one of the 2x2-DC levels is != 0 the coded-bit + cr_cbp=max(1,cr_cbp); + DCLevel[scan_pos] = sign(level ,c_err); + DCRun [scan_pos] = run; + scan_pos++; + run=-1; + ilev=((sign(level,c_err)*dequant_coef[qp_rem][0][0]*A[0][0]<< qp_per) >>5); + } + ilev+= mp1[coeff_ctr]; + m1[coeff_ctr]=sign((absm(ilev) * quant_coef[qp_rem_sp][0][0] + 2 * qp_const2) >> (q_bits_sp+1), ilev) * dequant_coef[qp_rem_sp][0][0] << qp_per_sp; + } + DCLevel[scan_pos] = 0; + + // Invers transform of 2x2 DC levels + + img->m7[0][0]=(m1[0]+m1[1]+m1[2]+m1[3])/2; + img->m7[0][4]=(m1[0]-m1[1]+m1[2]-m1[3])/2; + img->m7[4][0]=(m1[0]+m1[1]-m1[2]-m1[3])/2; + img->m7[4][4]=(m1[0]-m1[1]-m1[2]+m1[3])/2; + + // Quant of chroma AC-coeffs. + coeff_cost=0; + cr_cbp_tmp=0; + + for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE) + { + for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE) + { + b4 = 2*(n2 >> 2) + (n1 >> 2); + ACLevel = img->cofAC[uv+4][b4][0]; + ACRun = img->cofAC[uv+4][b4][1]; + + run = -1; + scan_pos = 0; + + for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant + { + + if (is_field_mode) + { // Alternate scan for field coding + i=FIELD_SCAN[coeff_ctr][0]; + j=FIELD_SCAN[coeff_ctr][1]; + } + else + { + i=SNGL_SCAN[coeff_ctr][0]; + j=SNGL_SCAN[coeff_ctr][1]; + } + ++run; + ilev=0; + + // quantization on prediction + c_err1 = (absm(predicted_chroma_block[n1+i][n2+j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp; + c_err1 = (c_err1 << q_bits_sp) / quant_coef[qp_rem_sp][i][j]; + c_err1 = img->m7[n2+j][n1+i] - sign(c_err1, predicted_chroma_block[n1+i][n2+j]); + level1 = (absm(c_err1) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits; + + // no quantization on prediction + c_err2 = img->m7[n2+j][n1+i] - predicted_chroma_block[n1+i][n2+j]; + level2 = (absm(c_err2) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits; + + if (level1 != level2 && level1 != 0 && level2 != 0) + { + D_dis1 = img->m7[n2+j][n1+i] - ((sign(level1,c_err1)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_chroma_block[n1+i][n2+j]; + + levrun_linfo_inter(level1, run, &len, &info); + D_dis1 = D_dis1 * D_dis1 + lambda_mode * len; + + D_dis2 = img->m7[n2+j][n1+i] - ((sign(level2,c_err2)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_chroma_block[n1+i][n2+j]; + levrun_linfo_inter(level2, run, &len, &info); + D_dis2 = D_dis2 * D_dis2 + lambda_mode * len; + + if (D_dis1 == D_dis2) + level = (absm(level1) < absm(level2)) ? level1 : level2; + else + { + if (D_dis1 < D_dis2) + level = level1; + else + level = level2; + } + c_err = (level == level1) ? c_err1 : c_err2; + } + else if (level1 == level2) + { + level = level1; + c_err = c_err1; + } + else + { + level = (level1 == 0) ? level1 : level2; + c_err = (level1 == 0) ? c_err1 : c_err2; + } + + if (level != 0) + { + currMB->cbp_blk |= 1 << (16 + (uv << 2) + ((n2 >> 1) + (n1 >> 2))) ; + if (level > 1) + coeff_cost += MAX_VALUE; // set high cost, shall not be discarded + else + coeff_cost += COEFF_COST[input->disthres][run]; + + cr_cbp_tmp=2; + ACLevel[scan_pos] = sign(level,c_err); + ACRun [scan_pos] = run; + ++scan_pos; + run=-1; + ilev=((sign(level,c_err)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6); + } + ilev+=predicted_chroma_block[n1+i][n2+j]; + img->m7[n2+j][n1+i] = sign((absm(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp,ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp; + } + ACLevel[scan_pos] = 0; + } + } + + // * reset chroma coeffs + + if(cr_cbp_tmp==2) + cr_cbp=2; + // IDCT. + + // Horizontal. + for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE) + { + for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE) + { + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < BLOCK_SIZE; i++) + { + m5[i]=img->m7[n2+j][n1+i]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (i=0; i < 2; i++) + { + i1=3-i; + img->m7[n2+j][n1+i]=m6[i]+m6[i1]; + img->m7[n2+j][n1+i1]=m6[i]-m6[i1]; + } + } + + // Vertical. + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < BLOCK_SIZE; j++) + { + m5[j]=img->m7[n2+j][n1+i]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (j=0; j < 2; j++) + { + j2=3-j; + img->m7[n2+j][n1+i] =min(img->max_imgpel_value_uv,max(0,(m6[j]+m6[j2]+DQ_ROUND)>>DQ_BITS)); + img->m7[n2+j2][n1+i]=min(img->max_imgpel_value_uv,max(0,(m6[j]-m6[j2]+DQ_ROUND)>>DQ_BITS)); + } + } + } + } + + // Decoded block moved to memory + for (j=0; j < BLOCK_SIZE*2; j++) + for (i=0; i < BLOCK_SIZE*2; i++) + { + enc_picture->imgUV[uv][img->pix_c_y+j][img->pix_c_x+i]= img->m7[j][i]; + } + + return cr_cbp; + } + + /*! + ************************************************************************ + * \brief + * The routine performs transform,quantization,inverse transform, adds the diff. + * to the prediction and writes the result to the decoded luma frame. Includes the + * RD constrained quantization also. + * + * \par Input: + * block_x,block_y: Block position inside a macro block (0,4,8,12). + * + * \par Output: + * nonzero: 0 if no levels are nonzero. 1 if there are nonzero levels. \n + * coeff_cost: Counter for nonzero coefficients, used to discard expencive levels. + ************************************************************************ + */ + void copyblock_sp(int block_x,int block_y) + { + int sign(int a,int b); + + int i,j,i1,j1,m5[4],m6[4]; + + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + int predicted_block[BLOCK_SIZE][BLOCK_SIZE]; + int qp_per = (currMB->qpsp-MIN_QP)/6; + int qp_rem = (currMB->qpsp-MIN_QP)%6; + int q_bits = Q_BITS+qp_per; + int qp_const2=(1<mpr[j+block_y][i+block_x]; + } + + for (j=0; j < BLOCK_SIZE; j++) + { + for (i=0; i < 2; i++) + { + i1=3-i; + m5[i]=predicted_block[i][j]+predicted_block[i1][j]; + m5[i1]=predicted_block[i][j]-predicted_block[i1][j]; + } + predicted_block[0][j]=(m5[0]+m5[1]); + predicted_block[2][j]=(m5[0]-m5[1]); + predicted_block[1][j]=m5[3]*2+m5[2]; + predicted_block[3][j]=m5[3]-m5[2]*2; + } + + // Vertival transform + + for (i=0; i < BLOCK_SIZE; i++) + { + for (j=0; j < 2; j++) + { + j1=3-j; + m5[j]=predicted_block[i][j]+predicted_block[i][j1]; + m5[j1]=predicted_block[i][j]-predicted_block[i][j1]; + } + predicted_block[i][0]=(m5[0]+m5[1]); + predicted_block[i][2]=(m5[0]-m5[1]); + predicted_block[i][1]=m5[3]*2+m5[2]; + predicted_block[i][3]=m5[3]-m5[2]*2; + } + + // Quant + for (j=0;j < BLOCK_SIZE; j++) + for (i=0; i < BLOCK_SIZE; i++) + img->m7[j][i]=sign((absm(predicted_block[i][j])* quant_coef[qp_rem][i][j]+qp_const2)>> q_bits,predicted_block[i][j])*dequant_coef[qp_rem][i][j]<m7[j][i]; + } + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (i=0;i<2;i++) + { + i1=3-i; + img->m7[j][i]=m6[i]+m6[i1]; + img->m7[j][i1]=m6[i]-m6[i1]; + } + } + // vertical + for (i=0;im7[j][i]; + + m6[0]=(m5[0]+m5[2]); + m6[1]=(m5[0]-m5[2]); + m6[2]=(m5[1]>>1)-m5[3]; + m6[3]=m5[1]+(m5[3]>>1); + + for (j=0;j<2;j++) + { + j1=3-j; + img->m7[j][i] =min(img->max_imgpel_value,max(0,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS)); + img->m7[j1][i]=min(img->max_imgpel_value,max(0,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS)); + } + } + + // Decoded block moved to frame memory + + for (j=0; j < BLOCK_SIZE; j++) + for (i=0; i < BLOCK_SIZE; i++) + enc_picture->imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=img->m7[j][i]; + } + + + + int writeIPCMBytes(Bitstream *currStream) + { + int i,j, jj; + int len = 0, uv; + int mb_nr = img->current_mb_nr; + Macroblock* currMB = &img->mb_data[mb_nr]; + SyntaxElement *currSE = &img->MB_SyntaxElements[currMB->currSEnr]; + + + for (j=0;j<16;j++) + { + jj = img->pix_y+j; + for (i=0;i<16;i++) + { + currSE->len = img->bitdepth_luma; + len += currSE->len; + currSE->bitpattern = enc_picture->imgY[jj][img->pix_x+i]; + writeSyntaxElement2Buf_Fixed(currSE, currStream); + } + } + + for (uv = 0; uv < 2; uv ++) + { + for (j=0;jmb_cr_size_y;j++) + { + jj = img->pix_c_y+j; + for (i=0;imb_cr_size_x;i++) + { + currSE->len = img->bitdepth_chroma; + len += currSE->len; + currSE->bitpattern = enc_picture->imgUV[uv][jj][img->pix_c_x+i]; + writeSyntaxElement2Buf_Fixed(currSE, currStream); + } + } + } + return len; + } + + int writePCMByteAlign(Bitstream *currStream) + { + int len = 0; + if (currStream->bits_to_go < 8) + { // trailing bits to process + len = 8 - currStream->bits_to_go; + currStream->byte_buf = (currStream->byte_buf <bits_to_go) | (0xff >> (8 - currStream->bits_to_go)); + stats->bit_use_stuffingBits[img->type]+=currStream->bits_to_go; + currStream->streamBuffer[currStream->byte_pos++]=currStream->byte_buf; + currStream->bits_to_go = 8; + } + return len; + } + Index: llvm-test/MultiSource/Applications/JM/lencod/block.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/block.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/block.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,179 ---- + + /*! + ************************************************************************ + * \file block.h + * + * \brief + * constant arrays for single block processing + * + * \author + * Inge Lille-Langoy \n + * Telenor Satellite Services \n + * P.O.Box 6914 St.Olavs plass \n + * N-0130 Oslo, Norway + * + ************************************************************************ + */ + + #ifndef _BLOCK_H_ + #define _BLOCK_H_ + + //! make chroma QP from quant + const byte QP_SCALE_CR[52]= + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, + 12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27, + 28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37, + 37,38,38,38,39,39,39,39 + }; + + + //! single scan pattern + const byte SNGL_SCAN[16][2] = + { + {0,0},{1,0},{0,1},{0,2}, + {1,1},{2,0},{3,0},{2,1}, + {1,2},{0,3},{1,3},{2,2}, + {3,1},{3,2},{2,3},{3,3} + }; + + //! field scan pattern + const byte FIELD_SCAN[16][2] = + { + {0,0},{0,1},{1,0},{0,2}, + {0,3},{1,1},{1,2},{1,3}, + {2,0},{2,1},{2,2},{2,3}, + {3,0},{3,1},{3,2},{3,3} + }; + + + //! array used to find expencive coefficients + const byte COEFF_COST[2][16] = + { + {3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0}, + {9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9} + }; + + + + //! bit cost for coefficients + const byte COEFF_BIT_COST[3][16][16]= + { + { // 2x2 scan (corrested per Gisle's Email 11/23/2000 by StW + { 3, 5, 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13}, + { 5, 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13}, + { 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13,15}, + { 7, 9, 9,11,11,11,11,13,13,13,13,13,13,13,13,15}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + { 7, 7, 9, 9, 9, 9,11,11,11,11,11,11,11,11,13,13}, + }, + { // double scan + { 3, 5, 7, 7, 7, 9, 9, 9, 9,11,11,13,13,13,13,15}, + { 5, 9, 9,11,11,13,13,13,13,15,15,15,15,15,15,15}, + { 7,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17}, + { 9,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17}, + { 9,11,11,13,13,13,13,15,15,15,15,15,15,15,15,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + {11,11,13,13,13,13,15,15,15,15,15,15,15,15,17,17}, + }, + { // single scan + { 3, 7, 9, 9,11,13,13,15,15,15,15,17,17,17,17,17}, + { 5, 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17}, + { 5, 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17}, + { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17}, + { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17}, + { 7,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17}, + { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17}, + { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17}, + { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17}, + { 9,11,13,13,15,15,15,15,17,17,17,17,17,17,17,17}, + {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19}, + {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19}, + {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19}, + {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19}, + {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19}, + {11,13,13,15,15,15,15,17,17,17,17,17,17,17,17,19}, + }, + }; + + //! single scan pattern + const byte SCAN_YUV422 [8][2] = + { + {0,0},{0,1}, + {1,0},{0,2}, + {0,3},{1,1}, + {1,2},{1,3} + }; + + //! look up tables for FRExt-chroma support + const unsigned char hor_offset[4][4][4] = + {{{0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}, + + {{0, 4, 0, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}, + + {{0, 4, 0, 4}, + {0, 4, 0, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}, + + {{0, 4, 0, 4}, + {8,12, 8,12}, + {0, 4, 0, 4}, + {8,12, 8,12}}}; + + const unsigned char ver_offset[4][4][4] = + { {{0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}, + + {{0, 0, 4, 4}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}, + + {{0, 0, 4, 4}, + {8, 8,12,12}, + {0, 0, 0, 0}, + {0, 0, 0, 0}}, + + {{0, 0, 4, 4}, + {0, 0, 4, 4}, + {8, 8,12,12}, + {8, 8,12,12}}}; + + static unsigned char cbp_blk_chroma[8][4] = + { {16, 17, 18, 19}, + {20, 21, 22, 23}, + {24, 25, 26, 27}, + {28, 29, 30, 31}, + {32, 33, 34, 35}, + {36, 37, 38, 39}, + {40, 41, 42, 43}, + {44, 45, 46, 47} }; + + #endif + Index: llvm-test/MultiSource/Applications/JM/lencod/cabac.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/cabac.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/cabac.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1503 ---- + + /*! + ************************************************************************************* + * \file cabac.c + * + * \brief + * CABAC entropy coding routines + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + ************************************************************************************** + */ + + #include + #include + #include + #include "global.h" + + #include "cabac.h" + #include "image.h" + #include "mb_access.h" + + int last_dquant = 0; + + /*********************************************************************** + * L O C A L L Y D E F I N E D F U N C T I O N P R O T O T Y P E S + *********************************************************************** + */ + + + void unary_bin_encode(EncodingEnvironmentPtr eep_frame, + unsigned int symbol, + BiContextTypePtr ctx, + int ctx_offset); + + void unary_bin_max_encode(EncodingEnvironmentPtr eep_frame, + unsigned int symbol, + BiContextTypePtr ctx, + int ctx_offset, + unsigned int max_symbol); + + void unary_exp_golomb_level_encode( EncodingEnvironmentPtr eep_dp, + unsigned int symbol, + BiContextTypePtr ctx); + + void unary_exp_golomb_mv_encode(EncodingEnvironmentPtr eep_dp, + unsigned int symbol, + BiContextTypePtr ctx, + unsigned int max_bin); + + + void cabac_new_slice() + { + last_dquant=0; + } + + + /*! + ************************************************************************ + * \brief + * Check for available neighbouring blocks + * and set pointers in current macroblock + ************************************************************************ + */ + void CheckAvailabilityOfNeighborsCABAC() + { + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + PixelPos up, left; + + getNeighbour(img->current_mb_nr, -1, 0, 1, &left); + getNeighbour(img->current_mb_nr, 0, -1, 1, &up); + + if (up.available) + currMB->mb_available_up = &img->mb_data[up.mb_addr]; + else + currMB->mb_available_up = NULL; + + if (left.available) + currMB->mb_available_left = &img->mb_data[left.mb_addr]; + else + currMB->mb_available_left = NULL; + } + + /*! + ************************************************************************ + * \brief + * Allocation of contexts models for the motion info + * used for arithmetic encoding + ************************************************************************ + */ + MotionInfoContexts* create_contexts_MotionInfo(void) + { + MotionInfoContexts* enco_ctx; + + enco_ctx = (MotionInfoContexts*) calloc(1, sizeof(MotionInfoContexts) ); + if( enco_ctx == NULL ) + no_mem_exit("create_contexts_MotionInfo: enco_ctx"); + + return enco_ctx; + } + + + /*! + ************************************************************************ + * \brief + * Allocates of contexts models for the texture info + * used for arithmetic encoding + ************************************************************************ + */ + TextureInfoContexts* create_contexts_TextureInfo(void) + { + TextureInfoContexts* enco_ctx; + + enco_ctx = (TextureInfoContexts*) calloc(1, sizeof(TextureInfoContexts) ); + if( enco_ctx == NULL ) + no_mem_exit("create_contexts_TextureInfo: enco_ctx"); + + return enco_ctx; + } + + + + + /*! + ************************************************************************ + * \brief + * Frees the memory of the contexts models + * used for arithmetic encoding of the motion info. + ************************************************************************ + */ + void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx) + { + if( enco_ctx == NULL ) + return; + + free( enco_ctx ); + + return; + } + + /*! + ************************************************************************ + * \brief + * Frees the memory of the contexts models + * used for arithmetic encoding of the texture info. + ************************************************************************ + */ + void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx) + { + if( enco_ctx == NULL ) + return; + + free( enco_ctx ); + + return; + } + + + /*! + ************************************************************************** + * \brief + * generates arithmetic code and passes the code to the buffer + ************************************************************************** + */ + int writeSyntaxElement_CABAC(SyntaxElement *se, DataPartition *this_dataPart) + { + EncodingEnvironmentPtr eep_dp = &(this_dataPart->ee_cabac); + int curr_len = arienco_bits_written(eep_dp); + + // perform the actual coding by calling the appropriate method + se->writing(se, eep_dp); + + if(se->type != SE_HEADER) + this_dataPart->bitstream->write_flag = 1; + + return (se->len = (arienco_bits_written(eep_dp) - curr_len)); + } + + /*! + *************************************************************************** + * \brief + * This function is used to arithmetically encode the field + * mode info of a given MB in the case of mb-based frame/field decision + *************************************************************************** + */ + void writeFieldModeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + int a,b,act_ctx; + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int mb_field = se->value1; + + a = currMB->mbAvailA ? img->mb_data[currMB->mbAddrA].mb_field : 0; + b = currMB->mbAvailB ? img->mb_data[currMB->mbAddrB].mb_field : 0; + + act_ctx = a + b; + + biari_encode_symbol(eep_dp, (signed short) (mb_field != 0),&ctx->mb_aff_contexts[act_ctx]); + + se->context = act_ctx; + + return; + } + + /*! + *************************************************************************** + * \brief + * This function is used to arithmetically encode the mb_skip_flag. + *************************************************************************** + */ + void writeMB_skip_flagInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + int a,b,act_ctx; + int bframe = (img->type==B_SLICE); + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int curr_mb_type = se->value1; + + if (bframe) + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (currMB->mb_available_up->skip_flag==0 ? 1 : 0); + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (currMB->mb_available_left->skip_flag==0 ? 1 : 0); + + act_ctx = 7 + a + b; + + if (se->value1==0 && se->value2==0) // DIRECT mode, no coefficients + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]); + else + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][act_ctx]); + + currMB->skip_flag = (se->value1==0 && se->value2==0)?1:0; + } + else + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = (( (currMB->mb_available_up)->skip_flag == 0) ? 1 : 0 ); + if (currMB->mb_available_left == NULL) + a = 0; + else + a = (( (currMB->mb_available_left)->skip_flag == 0) ? 1 : 0 ); + + act_ctx = a + b; + + if (curr_mb_type==0) // SKIP + biari_encode_symbol(eep_dp, 1,&ctx->mb_type_contexts[1][act_ctx]); + else + biari_encode_symbol(eep_dp, 0,&ctx->mb_type_contexts[1][act_ctx]); + + currMB->skip_flag = (curr_mb_type==0)?1:0; + } + se->context = act_ctx; + + return; + } + + /*! + *************************************************************************** + * \brief + * This function is used to arithmetically encode the macroblock + * intra_pred_size flag info of a given MB. + *************************************************************************** + */ + + void writeMB_transform_size_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + int a, b; + int act_ctx = 0; + int act_sym; + + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + + b = (currMB->mb_available_up == NULL) ? 0 : currMB->mb_available_up->luma_transform_size_8x8_flag; + a = (currMB->mb_available_left == NULL) ? 0 :currMB->mb_available_left->luma_transform_size_8x8_flag; + + act_ctx = a + b; + act_sym = currMB->luma_transform_size_8x8_flag; + se->context = act_ctx; // store context + biari_encode_symbol(eep_dp, (signed short) (act_sym != 0), ctx->transform_size_contexts + act_ctx ); + } + + /*! + *************************************************************************** + * \brief + * This function is used to arithmetically encode the macroblock + * type info of a given MB. + *************************************************************************** + */ + + void writeMB_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + int a, b; + int act_ctx = 0; + int act_sym; + signed short csym; + int bframe = (img->type==B_SLICE); + int mode_sym = 0; + int mode16x16; + + + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int curr_mb_type = se->value1; + + if(img->type == I_SLICE) // INTRA-frame + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = ((currMB->mb_available_up->mb_type != I4MB && currMB->mb_available_up->mb_type != I8MB) ? 1 : 0 ); + + if (currMB->mb_available_left == NULL) + a = 0; + else + a = ((currMB->mb_available_left->mb_type != I4MB && currMB->mb_available_left->mb_type != I8MB) ? 1 : 0 ); + + act_ctx = a + b; + act_sym = curr_mb_type; + se->context = act_ctx; // store context + + if (act_sym==0) // 4x4 Intra + { + biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[0] + act_ctx ); + } + else if( act_sym == 25 ) // PCM-MODE + { + biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx ); + biari_encode_symbol_final(eep_dp, 1); + } + else // 16x16 Intra + { + biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx ); + + biari_encode_symbol_final(eep_dp, 0); + + mode_sym = act_sym-1; // Values in the range of 0...23 + act_ctx = 4; + act_sym = mode_sym/12; + biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx ); // coding of AC/no AC + mode_sym = mode_sym % 12; + act_sym = mode_sym / 4; // coding of cbp: 0,1,2 + act_ctx = 5; + if (act_sym==0) + { + biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[0] + act_ctx ); + } + else + { + biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[0] + act_ctx ); + act_ctx=6; + biari_encode_symbol(eep_dp, (signed short) (act_sym!=1), ctx->mb_type_contexts[0] + act_ctx ); + } + mode_sym = mode_sym & 0x03; // coding of I pred-mode: 0,1,2,3 + act_sym = mode_sym >> 1; + act_ctx = 7; + biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx ); + act_ctx = 8; + act_sym = mode_sym & 0x01; + biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[0] + act_ctx ); + } + } + else // INTER + { + + if (bframe) + { + if (currMB->mb_available_up == NULL) + b = 0; + else + b = ((currMB->mb_available_up->mb_type != 0) ? 1 : 0 ); + + if (currMB->mb_available_left == NULL) + a = 0; + else + a = ((currMB->mb_available_left->mb_type != 0) ? 1 : 0 ); + act_ctx = a + b; + se->context = act_ctx; // store context + } + act_sym = curr_mb_type; + + if (act_sym>=(mode16x16=(bframe?24:7))) + { + mode_sym = act_sym-mode16x16; + act_sym = mode16x16; // 16x16 mode info + } + + if (!bframe) + { + switch (act_sym) + { + case 0: + break; + case 1: + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]); + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][5]); + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][6]); + break; + case 2: + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][5]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][7]); + break; + case 3: + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][5]); + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][7]); + break; + case 4: + case 5: + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][4]); + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][5]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][6]); + break; + case 6: + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][4]); + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[1][7]); + break; + case 7: + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][4]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[1][7]); + break; + default: + printf ("Unsupported MB-MODE in writeMB_typeInfo_CABAC!\n"); + exit (1); + } + } + else //===== B-FRAMES ===== + { + if (act_sym==0) + { + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][act_ctx]); + } + else if (act_sym<=2) + { + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]); + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][4]); + csym = (act_sym-1 != 0); + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + } + else if (act_sym<=10) + { + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]); + biari_encode_symbol (eep_dp, 0, &ctx->mb_type_contexts[2][5]); + csym=(((act_sym-3)>>2)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + csym=(((act_sym-3)>>1)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + csym=((act_sym-3)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + } + else if (act_sym==11 || act_sym==22) + { + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][5]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][6]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][6]); + csym = (act_sym != 11); + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + } + else + { + if (act_sym > 22) act_sym--; + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][act_ctx]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][4]); + biari_encode_symbol (eep_dp, 1, &ctx->mb_type_contexts[2][5]); + csym=(((act_sym-12)>>3)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + csym=(((act_sym-12)>>2)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + csym=(((act_sym-12)>>1)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + csym=((act_sym-12)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->mb_type_contexts[2][6]); + if (act_sym >=22) act_sym++; + } + } + + if(act_sym==mode16x16) // additional info for 16x16 Intra-mode + { + if( mode_sym==25 ) + { + biari_encode_symbol_final(eep_dp, 1 ); + return; + } + biari_encode_symbol_final(eep_dp, 0 ); + + act_ctx = 8; + act_sym = mode_sym/12; + biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx ); // coding of AC/no AC + mode_sym = mode_sym % 12; + + act_sym = mode_sym / 4; // coding of cbp: 0,1,2 + act_ctx = 9; + if (act_sym==0) + { + biari_encode_symbol(eep_dp, 0, ctx->mb_type_contexts[1] + act_ctx ); + } + else + { + biari_encode_symbol(eep_dp, 1, ctx->mb_type_contexts[1] + act_ctx ); + biari_encode_symbol(eep_dp, (signed short) (act_sym!=1), ctx->mb_type_contexts[1] + act_ctx ); + } + + mode_sym = mode_sym % 4; // coding of I pred-mode: 0,1,2,3 + act_ctx = 10; + act_sym = mode_sym/2; + biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx ); + act_sym = mode_sym%2; + biari_encode_symbol(eep_dp, (signed short) act_sym, ctx->mb_type_contexts[1] + act_ctx ); + } + } + } + + + /*! + *************************************************************************** + * \brief + * This function is used to arithmetically encode the 8x8 block + * type info + *************************************************************************** + */ + void writeB8_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + int act_ctx; + int act_sym; + signed short csym; + int bframe=(img->type==B_SLICE); + + MotionInfoContexts *ctx = (img->currentSlice)->mot_ctx; + + act_sym = se->value1; + act_ctx = 0; + + if (!bframe) + { + switch (act_sym) + { + case 0: + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][1]); + break; + case 1: + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]); + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][3]); + break; + case 2: + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]); + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][3]); + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][4]); + break; + case 3: + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][1]); + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[0][3]); + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[0][4]); + break; + } + } + else //===== B-FRAME ===== + { + if (act_sym==0) + { + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][0]); + return; + } + else + { + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][0]); + act_sym--; + } + if (act_sym<2) + { + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][1]); + biari_encode_symbol (eep_dp, (signed short) (act_sym!=0), &ctx->b8_type_contexts[1][3]); + } + else if (act_sym<6) + { + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][1]); + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][2]); + csym=(((act_sym-2)>>1)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]); + csym=((act_sym-2)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]); + } + else + { + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][1]); + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][2]); + csym=(((act_sym-6)>>2)&0x01); + if (csym) + { + biari_encode_symbol (eep_dp, 1, &ctx->b8_type_contexts[1][3]); + csym=((act_sym-6)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]); + } + else + { + biari_encode_symbol (eep_dp, 0, &ctx->b8_type_contexts[1][3]); + csym=(((act_sym-6)>>1)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]); + csym=((act_sym-6)&0x01) != 0; + biari_encode_symbol (eep_dp, csym, &ctx->b8_type_contexts[1][3]); + } + } + } + } + + + /*! + **************************************************************************** + * \brief + * This function is used to arithmetically encode a pair of + * intra prediction modes of a given MB. + **************************************************************************** + */ + void writeIntraPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + TextureInfoContexts *ctx = img->currentSlice->tex_ctx; + + // use_most_probable_mode + if (se->value1 == -1) + biari_encode_symbol(eep_dp, 1, ctx->ipr_contexts); + else + { + biari_encode_symbol(eep_dp, 0, ctx->ipr_contexts); + + // remaining_mode_selector + biari_encode_symbol(eep_dp,(signed short)( se->value1 & 0x1 ), ctx->ipr_contexts+1); + biari_encode_symbol(eep_dp,(signed short)((se->value1 & 0x2)>>1), ctx->ipr_contexts+1); + biari_encode_symbol(eep_dp,(signed short)((se->value1 & 0x4)>>2), ctx->ipr_contexts+1); + } + } + /*! + **************************************************************************** + * \brief + * This function is used to arithmetically encode the reference + * parameter of a given MB. + **************************************************************************** + */ + void writeRefFrame_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + MotionInfoContexts *ctx = img->currentSlice->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int addctx = 0; + + int a, b; + int act_ctx; + int act_sym; + char** refframe_array = enc_picture->ref_idx[se->value2]; + + int bslice = (img->type==B_SLICE); + + int b8a, b8b; + + PixelPos block_a, block_b; + + getLuma4x4Neighbour(img->current_mb_nr, img->subblock_x, img->subblock_y, -1, 0, &block_a); + getLuma4x4Neighbour(img->current_mb_nr, img->subblock_x, img->subblock_y, 0, -1, &block_b); + + b8a=((block_a.x >> 1) & 0x01)+2*((block_a.y >> 1) & 0x01); + b8b=((block_b.x >> 1) & 0x01)+2*((block_b.y >> 1) & 0x01); + + + if (!block_b.available) + b=0; + else if (IS_DIRECT(&img->mb_data[block_b.mb_addr]) || (img->mb_data[block_b.mb_addr].b8mode[b8b]==0 && bslice)) + b=0; + else + { + if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_b.mb_addr].mb_field == 1)) + b = (refframe_array[block_b.pos_y][block_b.pos_x] > 1 ? 1 : 0); + else + b = (refframe_array[block_b.pos_y][block_b.pos_x] > 0 ? 1 : 0); + } + + if (!block_a.available) + a=0; + else if (IS_DIRECT(&img->mb_data[block_a.mb_addr]) || (img->mb_data[block_a.mb_addr].b8mode[b8a]==0 && bslice)) + a=0; + else + { + if (img->MbaffFrameFlag && (currMB->mb_field == 0) && (img->mb_data[block_a.mb_addr].mb_field == 1)) + a = (refframe_array[block_a.pos_y][block_a.pos_x] > 1 ? 1 : 0); + else + a = (refframe_array[block_a.pos_y][block_a.pos_x] > 0 ? 1 : 0); + } + + act_ctx = a + 2*b; + se->context = act_ctx; // store context + act_sym = se->value1; + + if (act_sym==0) + { + biari_encode_symbol(eep_dp, 0, ctx->ref_no_contexts[addctx] + act_ctx ); + } + else + { + biari_encode_symbol(eep_dp, 1, ctx->ref_no_contexts[addctx] + act_ctx); + act_sym--; + act_ctx=4; + unary_bin_encode(eep_dp, act_sym,ctx->ref_no_contexts[addctx]+act_ctx,1); + } + } + + /*! + **************************************************************************** + * \brief + * This function is used to arithmetically encode the coded + * block pattern of a given delta quant. + **************************************************************************** + */ + void writeDquant_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + MotionInfoContexts *ctx = img->currentSlice->mot_ctx; + + int act_ctx; + int act_sym; + int dquant = se->value1; + int sign=0; + + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + last_dquant=currMB->prev_delta_qp; + + if (dquant <= 0) + sign = 1; + act_sym = absm(dquant) << 1; + + act_sym += sign; + act_sym --; + + act_ctx = ( (last_dquant != 0) ? 1 : 0); + + if (act_sym==0) + { + biari_encode_symbol(eep_dp, 0, ctx->delta_qp_contexts + act_ctx ); + } + else + { + biari_encode_symbol(eep_dp, 1, ctx->delta_qp_contexts + act_ctx); + act_ctx=2; + act_sym--; + unary_bin_encode(eep_dp, act_sym,ctx->delta_qp_contexts+act_ctx,1); + } + } + + /*! + **************************************************************************** + * \brief + * This function is used to arithmetically encode the motion + * vector data of a B-frame MB. + **************************************************************************** + */ + void writeMVD_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + int i = img->subblock_x; + int j = img->subblock_y; + int a, b; + int act_ctx; + int act_sym; + int mv_pred_res; + int mv_local_err; + int mv_sign; + int list_idx = se->value2 & 0x01; + int k = (se->value2>>1); // MVD component + + PixelPos block_a, block_b; + + MotionInfoContexts *ctx = img->currentSlice->mot_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + getLuma4x4Neighbour(img->current_mb_nr, i, j, -1, 0, &block_a); + getLuma4x4Neighbour(img->current_mb_nr, i, j, 0, -1, &block_b); + + if (block_b.available) + { + b = absm(img->mb_data[block_b.mb_addr].mvd[list_idx][block_b.y][block_b.x][k]); + if (img->MbaffFrameFlag && (k==1)) + { + if ((currMB->mb_field==0) && (img->mb_data[block_b.mb_addr].mb_field==1)) + b *= 2; + else if ((currMB->mb_field==1) && (img->mb_data[block_b.mb_addr].mb_field==0)) + b /= 2; + } + } + else + b=0; + + if (block_a.available) + { + a = absm(img->mb_data[block_a.mb_addr].mvd[list_idx][block_a.y][block_a.x][k]); + if (img->MbaffFrameFlag && (k==1)) + { + if ((currMB->mb_field==0) && (img->mb_data[block_a.mb_addr].mb_field==1)) + a *= 2; + else if ((currMB->mb_field==1) && (img->mb_data[block_a.mb_addr].mb_field==0)) + a /= 2; + } + } + else + a = 0; + + if ((mv_local_err=a+b)<3) + act_ctx = 5*k; + else + { + if (mv_local_err>32) + act_ctx=5*k+3; + else + act_ctx=5*k+2; + } + + mv_pred_res = se->value1; + se->context = act_ctx; + + act_sym = absm(mv_pred_res); + + if (act_sym == 0) + biari_encode_symbol(eep_dp, 0, &ctx->mv_res_contexts[0][act_ctx] ); + else + { + biari_encode_symbol(eep_dp, 1, &ctx->mv_res_contexts[0][act_ctx] ); + act_sym--; + act_ctx=5*k; + unary_exp_golomb_mv_encode(eep_dp,act_sym,ctx->mv_res_contexts[1]+act_ctx,3); + mv_sign = (mv_pred_res<0) ? 1: 0; + biari_encode_symbol_eq_prob(eep_dp, (signed short) mv_sign); + } + } + + + /*! + **************************************************************************** + * \brief + * This function is used to arithmetically encode the chroma + * intra prediction mode of an 8x8 block + **************************************************************************** + */ + void writeCIPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + TextureInfoContexts *ctx = img->currentSlice->tex_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + int act_ctx,a,b; + int act_sym = se->value1; + + if (currMB->mb_available_up == NULL) b = 0; + else b = ( ((currMB->mb_available_up)->c_ipred_mode != 0) ? 1 : 0); + + if (currMB->mb_available_left == NULL) a = 0; + else a = ( ((currMB->mb_available_left)->c_ipred_mode != 0) ? 1 : 0); + + act_ctx = a+b; + + if (act_sym==0) + biari_encode_symbol(eep_dp, 0, ctx->cipr_contexts + act_ctx ); + else + { + biari_encode_symbol(eep_dp, 1, ctx->cipr_contexts + act_ctx ); + unary_bin_max_encode(eep_dp,(unsigned int) (act_sym-1),ctx->cipr_contexts+3,0,2); + } + } + + + /*! + **************************************************************************** + * \brief + * This function is used to arithmetically encode the coded + * block pattern of an 8x8 block + **************************************************************************** + */ + void writeCBP_BIT_CABAC (int b8, int bit, int cbp, Macroblock* currMB, int inter, EncodingEnvironmentPtr eep_dp) + { + PixelPos block_a; + int a, b; + + int mb_x=(b8 & 0x01)<<1; + int mb_y=(b8 >> 1)<<1; + + if (mb_y == 0) + { + if (currMB->mb_available_up == NULL) + b = 0; + else + { + if((currMB->mb_available_up)->mb_type==IPCM) + b=0; + else + b = (( ((currMB->mb_available_up)->cbp & (1<<(2+(mb_x>>1)))) == 0) ? 1 : 0); //VG-ADD + } + + } + else + b = ( ((cbp & (1<<(mb_x/2))) == 0) ? 1: 0); + + if (mb_x == 0) + { + getLuma4x4Neighbour(img->current_mb_nr, mb_x, mb_y, -1, 0, &block_a); + if (block_a.available) + { + { + if(img->mb_data[block_a.mb_addr].mb_type==IPCM) + a=0; + else + a = (( (img->mb_data[block_a.mb_addr].cbp & (1<<(2*(block_a.y>>1)+1))) == 0) ? 1 : 0); //VG-ADD + } + + } + else + a=0; + } + else + a = ( ((cbp & (1<currentSlice->tex_ctx->cbp_contexts[0] + a+2*b); + } + + /*! + **************************************************************************** + * \brief + * This function is used to arithmetically encode the coded + * block pattern of a macroblock + **************************************************************************** + */ + void writeCBP_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + TextureInfoContexts *ctx = img->currentSlice->tex_ctx; + Macroblock *currMB = &img->mb_data[img->current_mb_nr]; + + int a, b; + int curr_cbp_ctx, curr_cbp_idx; + int cbp = se->value1; // symbol to encode + int cbp_bit; + int b8; + + for (b8=0; b8<4; b8++) + { + curr_cbp_idx = (currMB->b8mode[b8] == IBLOCK ? 0 : 1); + writeCBP_BIT_CABAC (b8, cbp&(1<yuv_format != YUV400) + { + // coding of chroma part + b = 0; + if (currMB->mb_available_up != NULL) + { + if((currMB->mb_available_up)->mb_type==IPCM) + b=1; + else + b = ((currMB->mb_available_up)->cbp > 15) ? 1 : 0; + } + + + a = 0; + if (currMB->mb_available_left != NULL) + { + if((currMB->mb_available_left)->mb_type==IPCM) + a=1; + else + a = ((currMB->mb_available_left)->cbp > 15) ? 1 : 0; + } + + curr_cbp_ctx = a+2*b; + cbp_bit = (cbp > 15 ) ? 1 : 0; + biari_encode_symbol(eep_dp, (signed short) cbp_bit, ctx->cbp_contexts[1] + curr_cbp_ctx ); + + if (cbp > 15) + { + b = 0; + if (currMB->mb_available_up != NULL) + { + if((currMB->mb_available_up)->mb_type==IPCM) + b=1; + else + if ((currMB->mb_available_up)->cbp > 15) + b = (( ((currMB->mb_available_up)->cbp >> 4) == 2) ? 1 : 0); + } + + + a = 0; + if (currMB->mb_available_left != NULL) + { + if((currMB->mb_available_left)->mb_type==IPCM) + a=1; + else + if ((currMB->mb_available_left)->cbp > 15) + a = (( ((currMB->mb_available_left)->cbp >> 4) == 2) ? 1 : 0); + } + + curr_cbp_ctx = a+2*b; + cbp_bit = ((cbp>>4) == 2) ? 1 : 0; + biari_encode_symbol(eep_dp, (signed short) cbp_bit, ctx->cbp_contexts[2] + curr_cbp_ctx ); + } + } + } + + static const int maxpos [] = {16, 15, 64, 32, 32, 16, 4, 15, 8, 16}; + static const int c1isdc [] = { 1, 0, 1, 1, 1, 1, 1, 0, 1, 1}; + + static const int type2ctx_bcbp[] = { 0, 1, 2, 2, 3, 4, 5, 6, 5, 5}; // 7 + static const int type2ctx_map [] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6}; // 8 + static const int type2ctx_last[] = { 0, 1, 2, 3, 4, 5, 6, 7, 6, 6}; // 8 + static const int type2ctx_one [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5}; // 7 + static const int type2ctx_abs [] = { 0, 1, 2, 3, 3, 4, 5, 6, 5, 5}; // 7 + static const int max_c2 [] = { 4, 4, 4, 4, 4, 4, 3, 4, 3, 3}; // 9 + + + + /*! + **************************************************************************** + * \brief + * Write CBP4-BIT + **************************************************************************** + */ + void write_and_store_CBP_block_bit (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int cbp_bit) + { + #define BIT_SET(x,n) ((int)(((x)&((int64)1<<(n)))>>(n))) + + int y_ac = (type==LUMA_16AC || type==LUMA_8x8 || type==LUMA_8x4 || type==LUMA_4x8 || type==LUMA_4x4); + int y_dc = (type==LUMA_16DC); + int u_ac = (type==CHROMA_AC && !img->is_v_block); + int v_ac = (type==CHROMA_AC && img->is_v_block); + int chroma_dc = (type==CHROMA_DC || type==CHROMA_DC_2x4 || type==CHROMA_DC_4x4); + int u_dc = (chroma_dc && !img->is_v_block); + int v_dc = (chroma_dc && img->is_v_block); + int j = (y_ac || u_ac || v_ac ? img->subblock_y : 0); + int i = (y_ac || u_ac || v_ac ? img->subblock_x : 0); + int bit = (y_dc ? 0 : y_ac ? 1 : u_dc ? 17 : v_dc ? 18 : u_ac ? 19 : 23); + int default_bit = (img->is_intra_block ? 1 : 0); + int upper_bit = default_bit; + int left_bit = default_bit; + int ctx; + + int bit_pos_a = 0; + int bit_pos_b = 0; + + PixelPos block_a, block_b; + + if (y_ac || y_dc) + { + getLuma4x4Neighbour(img->current_mb_nr, i, j, -1, 0, &block_a); + getLuma4x4Neighbour(img->current_mb_nr, i, j, 0, -1, &block_b); + if (y_ac) + { + if (block_a.available) + bit_pos_a = 4*block_a.y + block_a.x; + if (block_b.available) + bit_pos_b = 4*block_b.y + block_b.x; + } + } + else + { + getChroma4x4Neighbour(img->current_mb_nr, i, j, -1, 0, &block_a); + getChroma4x4Neighbour(img->current_mb_nr, i, j, 0, -1, &block_b); + if (u_ac||v_ac) + { + if (block_a.available) + bit_pos_a = 4*block_a.y + block_a.x; + if (block_b.available) + bit_pos_b = 4*block_b.y + block_b.x; + } + } + + bit = (y_dc ? 0 : y_ac ? 1+4*j+i : u_dc ? 17 : v_dc ? 18 : u_ac ? 19+4*j+i : 35+4*j+i); + //--- set bits for current block --- + if (cbp_bit) + { + if (type==LUMA_8x8) + { + currMB->cbp_bits |= (1<< bit ); + currMB->cbp_bits |= (1<<(bit+1)); + currMB->cbp_bits |= (1<<(bit+4)); + currMB->cbp_bits |= (1<<(bit+5)); + } + else if (type==LUMA_8x4) + { + currMB->cbp_bits |= (1<< bit ); + currMB->cbp_bits |= (1<<(bit+1)); + } + else if (type==LUMA_4x8) + { + currMB->cbp_bits |= (1<< bit ); + currMB->cbp_bits |= (1<<(bit+4)); + } + else + { + currMB->cbp_bits |= ((int64)1<mb_data[block_b.mb_addr].mb_type==IPCM) + upper_bit=1; + else + upper_bit = BIT_SET(img->mb_data[block_b.mb_addr].cbp_bits,bit+bit_pos_b); + } + + + if (block_a.available) + { + if(img->mb_data[block_a.mb_addr].mb_type==IPCM) + left_bit=1; + else + left_bit = BIT_SET(img->mb_data[block_a.mb_addr].cbp_bits,bit+bit_pos_a); + } + + ctx = 2*upper_bit+left_bit; + + //===== encode symbol ===== + biari_encode_symbol (eep_dp, (short)cbp_bit, img->currentSlice->tex_ctx->bcbp_contexts[type2ctx_bcbp[type]]+ctx); + } + } + + + + + //===== position -> ctx for MAP ===== + //--- zig-zag scan ---- + static const int pos2ctx_map8x8 [] = { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9, 10, 9, 8, 7, + 7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6, 11, + 12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11 ,14, 10, 12, 14}; // 15 CTX + static const int pos2ctx_map8x4 [] = { 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 9, 8, 6, 7, 8, + 9, 10, 11, 9, 8, 6, 12, 8, 9, 10, 11, 9, 13, 13, 14, 14}; // 15 CTX + static const int pos2ctx_map4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14}; // 15 CTX + static const int pos2ctx_map2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int pos2ctx_map4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int* pos2ctx_map [] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8, pos2ctx_map8x4, + pos2ctx_map8x4, pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4, + pos2ctx_map2x4c, pos2ctx_map4x4c}; + + //--- interlace scan ---- + //Taken from ABT + static const int pos2ctx_map8x8i[] = { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, + 6, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 11, 12, 11, + 9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 13, 13, 9, + 9, 10, 10, 8, 13, 13, 9, 9, 10, 10, 14, 14, 14, 14, 14, 14}; // 15 CTX + + static const int pos2ctx_map8x4i[] = { 0, 1, 2, 3, 4, 5, 6, 3, 4, 5, 6, 3, 4, 7, 6, 8, + 9, 7, 6, 8, 9, 10, 11, 12, 12, 10, 11, 13, 13, 14, 14, 14}; // 15 CTX + static const int pos2ctx_map4x8i[] = { 0, 1, 1, 1, 2, 3, 3, 4, 4, 4, 5, 6, 2, 7, 7, 8, + 8, 8, 5, 6, 9, 10, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14}; // 15 CTX + static const int* pos2ctx_map_int[] = {pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map8x8i,pos2ctx_map8x4i, + pos2ctx_map4x8i,pos2ctx_map4x4, pos2ctx_map4x4, pos2ctx_map4x4, + pos2ctx_map2x4c, pos2ctx_map4x4c}; + + + //===== position -> ctx for LAST ===== + static const int pos2ctx_last8x8 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8}; // 9 CTX + static const int pos2ctx_last8x4 [] = { 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8}; // 9 CTX + static const int pos2ctx_last4x4 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; // 15 CTX + static const int pos2ctx_last2x4c[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int pos2ctx_last4x4c[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}; // 15 CTX + static const int* pos2ctx_last [] = {pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last8x8, pos2ctx_last8x4, + pos2ctx_last8x4, pos2ctx_last4x4, pos2ctx_last4x4, pos2ctx_last4x4, + pos2ctx_last2x4c, pos2ctx_last4x4c}; + + + + + /*! + **************************************************************************** + * \brief + * Write Significance MAP + **************************************************************************** + */ + void write_significance_map (Macroblock* currMB, EncodingEnvironmentPtr eep_dp, int type, int coeff[], int coeff_ctr) + { + int k; + unsigned short sig, last; + int k0 = 0; + int k1 = maxpos[type]-1; + + int fld = ( img->structure!=FRAME || currMB->mb_field ); + BiContextTypePtr map_ctx = ( fld ? img->currentSlice->tex_ctx->fld_map_contexts[type2ctx_map [type]] + : img->currentSlice->tex_ctx->map_contexts[type2ctx_map [type]] ); + BiContextTypePtr last_ctx = ( fld ? img->currentSlice->tex_ctx->fld_last_contexts[type2ctx_last[type]] + : img->currentSlice->tex_ctx->last_contexts[type2ctx_last[type]] ); + + if (!c1isdc[type]) + { + k0++; k1++; coeff--; + } + + if (!fld) + { + for (k=k0; k=0; i--) + { + if (coeff[i]!=0) + { + if (coeff[i]>0) {absLevel = coeff[i]; sign = 0;} + else {absLevel = -coeff[i]; sign = 1;} + + greater_one = (absLevel>1); + + //--- if coefficient is one --- + ctx = min(c1,4); + biari_encode_symbol (eep_dp, greater_one, img->currentSlice->tex_ctx->one_contexts[type2ctx_one[type]] + ctx); + + if (greater_one) + { + ctx = min(c2, max_c2[type]); + unary_exp_golomb_level_encode(eep_dp, absLevel-2, img->currentSlice->tex_ctx->abs_contexts[type2ctx_abs[type]] + ctx); + c1 = 0; + c2++; + } + else if (c1) + { + c1++; + } + biari_encode_symbol_eq_prob (eep_dp, sign); + } + } + } + + + + /*! + **************************************************************************** + * \brief + * Write Block-Transform Coefficients + **************************************************************************** + */ + void writeRunLevel_CABAC (SyntaxElement *se, EncodingEnvironmentPtr eep_dp) + { + static int coeff[64]; + static int coeff_ctr = 0; + static int pos = 0; + + //--- accumulate run-level information --- + if (se->value1 != 0) + { + pos += se->value2; + coeff[pos++] = se->value1; + coeff_ctr++; + //return; + } + else + { + Macroblock* currMB = &img->mb_data[img->current_mb_nr]; + //===== encode CBP-BIT ===== + if (coeff_ctr>0) + { + write_and_store_CBP_block_bit (currMB, eep_dp, se->context, 1); + //===== encode significance map ===== + write_significance_map (currMB, eep_dp, se->context, coeff, coeff_ctr); + //===== encode significant coefficients ===== + write_significant_coefficients (currMB, eep_dp, se->context, coeff); + } + else + write_and_store_CBP_block_bit (currMB, eep_dp, se->context, 0); + + //--- reset counters --- + pos = coeff_ctr = 0; + memset(coeff, 0 , 64 * sizeof(int)); + } + } + + + + + /*! + ************************************************************************ + * \brief + * Unary binarization and encoding of a symbol by using + * one or two distinct models for the first two and all + * remaining bins + * + ************************************************************************/ + void unary_bin_encode(EncodingEnvironmentPtr eep_dp, + unsigned int symbol, + BiContextTypePtr ctx, + int ctx_offset) + { + unsigned int l; + BiContextTypePtr ictx; + + if (symbol==0) + { + biari_encode_symbol(eep_dp, 0, ctx ); + return; + } + else + { + biari_encode_symbol(eep_dp, 1, ctx ); + l = symbol; + ictx = ctx+ctx_offset; + while ((--l)>0) + biari_encode_symbol(eep_dp, 1, ictx); + biari_encode_symbol(eep_dp, 0, ictx); + } + return; + } + + /*! + ************************************************************************ + * \brief + * Unary binarization and encoding of a symbol by using + * one or two distinct models for the first two and all + * remaining bins; no terminating "0" for max_symbol + * (finite symbol alphabet) + ************************************************************************ + */ + void unary_bin_max_encode(EncodingEnvironmentPtr eep_dp, + unsigned int symbol, + BiContextTypePtr ctx, + int ctx_offset, + unsigned int max_symbol) + { + unsigned int l; + BiContextTypePtr ictx; + + if (symbol==0) + { + biari_encode_symbol(eep_dp, 0, ctx ); + return; + } + else + { + biari_encode_symbol(eep_dp, 1, ctx ); + l=symbol; + ictx=ctx+ctx_offset; + while ((--l)>0) + biari_encode_symbol(eep_dp, 1, ictx); + if (symbol= (unsigned int)(1<>k)&1)); + break; + } + } + + return; + } + + /*! + ************************************************************************ + * \brief + * Exp-Golomb for Level Encoding + * + ************************************************************************/ + void unary_exp_golomb_level_encode( EncodingEnvironmentPtr eep_dp, + unsigned int symbol, + BiContextTypePtr ctx) + { + unsigned int l,k; + unsigned int exp_start = 13; // 15-2 : 0,1 level decision always sent + + if (symbol==0) + { + biari_encode_symbol(eep_dp, 0, ctx ); + return; + } + else + { + biari_encode_symbol(eep_dp, 1, ctx ); + l=symbol; + k=1; + while (((--l)>0) && (++k <= exp_start)) + biari_encode_symbol(eep_dp, 1, ctx); + if (symbol < exp_start) biari_encode_symbol(eep_dp, 0, ctx); + else exp_golomb_encode_eq_prob(eep_dp,symbol-exp_start,0); + } + return; + } + + + + /*! + ************************************************************************ + * \brief + * Exp-Golomb for MV Encoding + * + ************************************************************************/ + void unary_exp_golomb_mv_encode(EncodingEnvironmentPtr eep_dp, + unsigned int symbol, + BiContextTypePtr ctx, + unsigned int max_bin) + { + unsigned int l,k; + unsigned int bin=1; + BiContextTypePtr ictx=ctx; + unsigned int exp_start = 8; // 9-1 : 0 mvd decision always sent + + if (symbol==0) + { + biari_encode_symbol(eep_dp, 0, ictx ); + return; + } + else + { + biari_encode_symbol(eep_dp, 1, ictx ); + l=symbol; + k=1; + ictx++; + while (((--l)>0) && (++k <= exp_start)) + { + biari_encode_symbol(eep_dp, 1, ictx ); + if ((++bin)==2) ictx++; + if (bin==max_bin) ictx++; + } + if (symbol < exp_start) biari_encode_symbol(eep_dp, 0, ictx); + else exp_golomb_encode_eq_prob(eep_dp,symbol-exp_start,3); + } + return; + } + Index: llvm-test/MultiSource/Applications/JM/lencod/cabac.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/cabac.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/cabac.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,65 ---- + + /*! + *************************************************************************** + * \file + * cabac.h + * + * \brief + * Headerfile for entropy coding routines + * + * \author + * Detlev Marpe \n + * Copyright (C) 2000 HEINRICH HERTZ INSTITUTE All Rights Reserved. + * + * \date + * 21. Oct 2000 (Changes by Tobias Oelbaum 28.08.2001) + *************************************************************************** + */ + + + #ifndef _CABAC_H_ + #define _CABAC_H_ + + // CABAC + int get_pic_bin_count(); + void reset_pic_bin_count(); + + void arienco_start_encoding(EncodingEnvironmentPtr eep, unsigned char *code_buffer, int *code_len); + int arienco_bits_written(EncodingEnvironmentPtr eep); + void arienco_done_encoding(EncodingEnvironmentPtr eep); + void biari_init_context (BiContextTypePtr ctx, const int* ini); + void rescale_cum_freq(BiContextTypePtr bi_ct); + void biari_encode_symbol(EncodingEnvironmentPtr eep, signed short symbol, BiContextTypePtr bi_ct ); + void biari_encode_symbol_eq_prob(EncodingEnvironmentPtr eep, signed short symbol); + void biari_encode_symbol_final(EncodingEnvironmentPtr eep, signed short symbol); + MotionInfoContexts* create_contexts_MotionInfo(void); + TextureInfoContexts* create_contexts_TextureInfo(void); + void init_contexts_MotionInfo (MotionInfoContexts *enco_ctx); + void init_contexts_TextureInfo(TextureInfoContexts *enco_ctx); + void delete_contexts_MotionInfo(MotionInfoContexts *enco_ctx); + void delete_contexts_TextureInfo(TextureInfoContexts *enco_ctx); + void writeHeaderToBuffer(); + int writeSyntaxElement_CABAC(SyntaxElement *se, DataPartition *this_dataPart); + void writeMB_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeIntraPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeB8_typeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeRefFrame2Buffer_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeRefFrame_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeMVD_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeCBP_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeDquant_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeRunLevel_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeBiDirBlkSize_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeCIPredMode_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void print_ctx_TextureInfo(TextureInfoContexts *enco_ctx); + void writeMB_skip_flagInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + void writeFieldModeInfo_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); //GB + void writeCBP_BIT_CABAC (int b8, int bit, int cbp, Macroblock* currMB, int inter, EncodingEnvironmentPtr eep_dp); + void cabac_new_slice(); + void CheckAvailabilityOfNeighborsCABAC(); + + void writeMB_transform_size_CABAC(SyntaxElement *se, EncodingEnvironmentPtr eep_dp); + + + #endif // CABAC_H + Index: llvm-test/MultiSource/Applications/JM/lencod/configfile.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/configfile.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/configfile.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,1162 ---- + + /*! + *********************************************************************** + * \file + * configfile.c + * \brief + * Configuration handling. + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Stephan Wenger + * \note + * In the future this module should hide the Parameters and offer only + * Functions for their access. Modules which make frequent use of some parameters + * (e.g. picture size in macroblocks) are free to buffer them on local variables. + * This will not only avoid global variable and make the code more readable, but also + * speed it up. It will also greatly facilitate future enhancements such as the + * handling of different picture sizes in the same sequence. \n + * \n + * For now, everything is just copied to the inp_par structure (gulp) + * + ************************************************************************************** + * \par Configuration File Format + ************************************************************************************** + * Format is line oriented, maximum of one parameter per line \n + * \n + * Lines have the following format: \n + * \ = \ # Comments \\n \n + * Whitespace is space and \\t + * \par + * \ are the predefined names for Parameters and are case sensitive. + * See configfile.h for the definition of those names and their mapping to + * configinput->values. + * \par + * \ are either integers [0..9]* or strings. + * Integers must fit into the wordlengths, signed values are generally assumed. + * Strings containing no whitespace characters can be used directly. Strings containing + * whitespace characters are to be inclosed in double quotes ("string with whitespace") + * The double quote character is forbidden (may want to implement something smarter here). + * \par + * Any Parameters whose ParameterName is undefined lead to the termination of the program + * with an error message. + * + * \par Known bug/Shortcoming: + * zero-length strings (i.e. to signal an non-existing file + * have to be coded as "". + * + * \par Rules for using command files + * \n + * All Parameters are initially taken from DEFAULTCONFIGFILENAME, defined in configfile.h. + * If an -f \ parameter is present in the command line then this file is used to + * update the defaults of DEFAULTCONFIGFILENAME. There can be more than one -f parameters + * present. If -p parameters are present then these + * override the default and the additional config file's settings, and are themselves + * overridden by future -p parameters. There must be whitespace between -f and -p commands + * and their respective parameters + *********************************************************************** + */ + + #define INCLUDED_BY_CONFIGFILE_C + + #include + #include + #include + + #if defined WIN32 + #include + #define strcasecmp strcmpi + #else + #include + #endif + #include + #include + + #include "global.h" + #include "configfile.h" + + #include "fmo.h" + + char *GetConfigFileContent (char *Filename); + static void ParseContent (char *buf, int bufsize); + static int ParameterNameToMapIndex (char *s); + static int InitEncoderParams(); + static int TestEncoderParams(int bitdepth_qp_scale); + static int DisplayEncoderParams(); + static void PatchInp (); + static void ProfileCheck(); + static void LevelCheck(); + + + #define MAX_ITEMS_TO_PARSE 10000 + + + /*! + *********************************************************************** + * \brief + * print help message and exit + *********************************************************************** + */ + void JMHelpExit () + { + fprintf( stderr, "\n lencod [-h] [-d defenc.cfg] {[-f curenc1.cfg]...[-f curencN.cfg]}" + " {[-p EncParam1=EncValue1]..[-p EncParamM=EncValueM]}\n\n" + "## Parameters\n\n" + + "## Options\n" + " -h : prints function usage\n" + " -d : use as default file for parameter initializations.\n" + " If not used then file defaults to encoder.cfg in local directory.\n" + " -f : read for reseting selected encoder parameters.\n" + " Multiple files could be used that set different parameters\n" + " -p : Set parameter to .\n" + " See default encoder.cfg file for description of all parameters.\n\n" + + "## Supported video file formats\n" + " RAW: .yuv -> YUV 4:2:0\n\n" + + "## Examples of usage:\n" + " lencod\n" + " lencod -h\n" + " lencod -d default.cfg\n" + " lencod -f curenc1.cfg\n" + " lencod -f curenc1.cfg -p InputFile=\"e:\\data\\container_qcif_30.yuv\" -p SourceWidth=176 -p SourceHeight=144\n" + " lencod -f curenc1.cfg -p FramesToBeEncoded=30 -p QPISlice=28 -p QPPSlice=28 -p QPBSlice=30\n"); + + exit(-1); + } + + /*! + *********************************************************************** + * \brief + * Parse the command line parameters and read the config files. + * \param ac + * number of command line parameters + * \param av + * command line parameters + *********************************************************************** + */ + void Configure (int ac, char *av[]) + { + char *content; + int CLcount, ContentLen, NumberParams; + char *filename=DEFAULTCONFIGFILENAME; + + memset (&configinput, 0, sizeof (InputParameters)); + //Set default parameters. + printf ("Setting Default Parameters...\n"); + InitEncoderParams(); + + // Process default config file + CLcount = 1; + + if (ac==2) + { + if (0 == strncmp (av[1], "-h", 2)) + { + JMHelpExit(); + } + } + + if (ac>=3) + { + if (0 == strncmp (av[1], "-d", 2)) + { + filename=av[2]; + CLcount = 3; + } + if (0 == strncmp (av[1], "-h", 2)) + { + JMHelpExit(); + } + } + printf ("Parsing Configfile %s", filename); + content = GetConfigFileContent (filename); + if (NULL==content) + error (errortext, 300); + ParseContent (content, strlen(content)); + printf ("\n"); + free (content); + + // Parse the command line + + while (CLcount < ac) + { + if (0 == strncmp (av[CLcount], "-h", 2)) + { + JMHelpExit(); + } + + if (0 == strncmp (av[CLcount], "-f", 2)) // A file parameter? + { + content = GetConfigFileContent (av[CLcount+1]); + if (NULL==content) + error (errortext, 300); + printf ("Parsing Configfile %s", av[CLcount+1]); + ParseContent (content, strlen (content)); + printf ("\n"); + free (content); + CLcount += 2; + } else + { + if (0 == strncmp (av[CLcount], "-p", 2)) // A config change? + { + // Collect all data until next parameter (starting with - (x is any character)), + // put it into content, and parse content. + + CLcount++; + ContentLen = 0; + NumberParams = CLcount; + + // determine the necessary size for content + while (NumberParams < ac && av[NumberParams][0] != '-') + ContentLen += strlen (av[NumberParams++]); // Space for all the strings + ContentLen += 1000; // Additional 1000 bytes for spaces and \0s + + + if ((content = malloc (ContentLen))==NULL) no_mem_exit("Configure: content");; + content[0] = '\0'; + + // concatenate all parameters identified before + + while (CLcount < NumberParams) + { + char *source = &av[CLcount][0]; + char *destin = &content[strlen (content)]; + + while (*source != '\0') + { + if (*source == '=') // The Parser expects whitespace before and after '=' + { + *destin++=' '; *destin++='='; *destin++=' '; // Hence make sure we add it + } else + *destin++=*source; + source++; + } + *destin = '\0'; + CLcount++; + } + printf ("Parsing command line string '%s'", content); + ParseContent (content, strlen(content)); + free (content); + printf ("\n"); + } + else + { + snprintf (errortext, ET_SIZE, "Error in command line, ac %d, around string '%s', missing -f or -p parameters?", CLcount, av[CLcount]); + error (errortext, 300); + } + } + } + printf ("\n"); + PatchInp(); + if (input->DisplayEncParams) + DisplayEncoderParams(); + } + + /*! + *********************************************************************** + * \brief + * allocates memory buf, opens file Filename in f, reads contents into + * buf and returns buf + * \param Filename + * name of config file + * \return + * if successfull, content of config file + * NULL in case of error. Error message will be set in errortext + *********************************************************************** + */ + char *GetConfigFileContent (char *Filename) + { + long FileSize; + FILE *f; + char *buf; + + if (NULL == (f = fopen (Filename, "r"))) + { + snprintf (errortext, ET_SIZE, "Cannot open configuration file %s.", Filename); + return NULL; + } + + if (0 != fseek (f, 0, SEEK_END)) + { + snprintf (errortext, ET_SIZE, "Cannot fseek in configuration file %s.", Filename); + return NULL; + } + + FileSize = ftell (f); + if (FileSize < 0 || FileSize > 60000) + { + snprintf (errortext, ET_SIZE, "Unreasonable Filesize %ld reported by ftell for configuration file %s.", FileSize, Filename); + return NULL; + } + if (0 != fseek (f, 0, SEEK_SET)) + { + snprintf (errortext, ET_SIZE, "Cannot fseek in configuration file %s.", Filename); + return NULL; + } + + if ((buf = malloc (FileSize + 1))==NULL) no_mem_exit("GetConfigFileContent: buf"); + + // Note that ftell() gives us the file size as the file system sees it. The actual file size, + // as reported by fread() below will be often smaller due to CR/LF to CR conversion and/or + // control characters after the dos EOF marker in the file. + + FileSize = fread (buf, 1, FileSize, f); + buf[FileSize] = '\0'; + + + fclose (f); + return buf; + } + + + /*! + *********************************************************************** + * \brief + * Parses the character array buf and writes global variable input, which is defined in + * configfile.h. This hack will continue to be necessary to facilitate the addition of + * new parameters through the Map[] mechanism (Need compiler-generated addresses in map[]). + * \param buf + * buffer to be parsed + * \param bufsize + * buffer size of buffer + *********************************************************************** + */ + void ParseContent (char *buf, int bufsize) + { + + char *items[MAX_ITEMS_TO_PARSE]; + int MapIdx; + int item = 0; + int InString = 0, InItem = 0; + char *p = buf; + char *bufend = &buf[bufsize]; + int IntContent; + double DoubleContent; + int i; + + // Stage one: Generate an argc/argv-type list in items[], without comments and whitespace. + // This is context insensitive and could be done most easily with lex(1). + + while (p < bufend) + { + switch (*p) + { + case 13: + p++; + break; + case '#': // Found comment + *p = '\0'; // Replace '#' with '\0' in case of comment immediately following integer or string + while (*p != '\n' && p < bufend) // Skip till EOL or EOF, whichever comes first + p++; + InString = 0; + InItem = 0; + break; + case '\n': + InItem = 0; + InString = 0; + *p++='\0'; + break; + case ' ': + case '\t': // Skip whitespace, leave state unchanged + if (InString) + p++; + else + { // Terminate non-strings once whitespace is found + *p++ = '\0'; + InItem = 0; + } + break; + + case '"': // Begin/End of String + *p++ = '\0'; + if (!InString) + { + items[item++] = p; + InItem = ~InItem; + } + else + InItem = 0; + InString = ~InString; // Toggle + break; + + default: + if (!InItem) + { + items[item++] = p; + InItem = ~InItem; + } + p++; + } + } + + item--; + + for (i=0; i (MapIdx = ParameterNameToMapIndex (items[i]))) + { + snprintf (errortext, ET_SIZE, " Parsing error in config file: Parameter Name '%s' not recognized.", items[i]); + error (errortext, 300); + } + if (strcasecmp ("=", items[i+1])) + { + snprintf (errortext, ET_SIZE, " Parsing error in config file: '=' expected as the second token in each line."); + error (errortext, 300); + } + + // Now interpret the Value, context sensitive... + + switch (Map[MapIdx].Type) + { + case 0: // Numerical + if (1 != sscanf (items[i+2], "%d", &IntContent)) + { + snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+2]); + error (errortext, 300); + } + * (int *) (Map[MapIdx].Place) = IntContent; + printf ("."); + break; + case 1: + strncpy ((char *) Map[MapIdx].Place, items [i+2], FILE_NAME_SIZE); + printf ("."); + break; + case 2: // Numerical double + if (1 != sscanf (items[i+2], "%lf", &DoubleContent)) + { + snprintf (errortext, ET_SIZE, " Parsing error: Expected numerical value for Parameter of %s, found '%s'.", items[i], items[i+2]); + error (errortext, 300); + } + * (double *) (Map[MapIdx].Place) = DoubleContent; + printf ("."); + break; + default: + assert ("Unknown value type in the map definition of configfile.h"); + } + } + memcpy (input, &configinput, sizeof (InputParameters)); + } + + /*! + *********************************************************************** + * \brief + * Returns the index number from Map[] for a given parameter name. + * \param s + * parameter name string + * \return + * the index number if the string is a valid parameter name, \n + * -1 for error + *********************************************************************** + */ + static int ParameterNameToMapIndex (char *s) + { + int i = 0; + + while (Map[i].TokenName != NULL) + if (0==strcasecmp (Map[i].TokenName, s)) + return i; + else + i++; + return -1; + }; + + /*! + *********************************************************************** + * \brief + * Sets initial values for encoding parameters. + * \return + * -1 for error + *********************************************************************** + */ + static int InitEncoderParams() + { + int i = 0; + + while (Map[i].TokenName != NULL) + { + if (Map[i].Type == 0) + * (int *) (Map[i].Place) = (int) Map[i].Default; + else if (Map[i].Type == 2) + * (double *) (Map[i].Place) = Map[i].Default; + i++; + } + return -1; + }; + + /*! + *********************************************************************** + * \brief + * Validates encoding parameters. + * \return + * -1 for error + *********************************************************************** + */ + static int TestEncoderParams(int bitdepth_qp_scale) + { + int i = 0; + + while (Map[i].TokenName != NULL) + { + if (Map[i].param_limits == 1) + { + if (Map[i].Type == 0) + { + if ( * (int *) (Map[i].Place) < (int) Map[i].min_limit || * (int *) (Map[i].Place) > (int) Map[i].max_limit ) + { + snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%d, %d] range.", Map[i].TokenName, (int) Map[i].min_limit,(int)Map[i].max_limit ); + error (errortext, 400); + } + + } + else if (Map[i].Type == 2) + { + if ( * (double *) (Map[i].Place) < Map[i].min_limit || * (double *) (Map[i].Place) > Map[i].max_limit ) + { + snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%.2f, %.2f] range.", Map[i].TokenName,Map[i].min_limit ,Map[i].max_limit ); + error (errortext, 400); + } + } + } + else if (Map[i].param_limits == 2) + { + if (Map[i].Type == 0) + { + if ( * (int *) (Map[i].Place) < (int) Map[i].min_limit ) + { + snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should not be smaller than %d.", Map[i].TokenName, (int) Map[i].min_limit); + error (errortext, 400); + } + + } + else if (Map[i].Type == 2) + { + if ( * (double *) (Map[i].Place) < Map[i].min_limit ) + { + snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should not be smaller than %2.f.", Map[i].TokenName,Map[i].min_limit); + error (errortext, 400); + } + } + } + else if (Map[i].param_limits == 3) // Only used for QPs + { + if (Map[i].Type == 0) + { + if ( * (int *) (Map[i].Place) < (int) (Map[i].min_limit - bitdepth_qp_scale) || * (int *) (Map[i].Place) > (int) Map[i].max_limit ) + { + snprintf(errortext, ET_SIZE, "Error in input parameter %s. Check configuration file. Value should be in [%d, %d] range.", Map[i].TokenName, (int) (Map[i].min_limit - bitdepth_qp_scale),(int)Map[i].max_limit ); + error (errortext, 400); + } + + } + } + + i++; + } + return -1; + }; + + + + /*! + *********************************************************************** + * \brief + * Outputs encoding parameters. + * \return + * -1 for error + *********************************************************************** + */ + static int DisplayEncoderParams() + { + int i = 0; + + printf("******************************************************\n"); + printf("* Encoder Parameters *\n"); + printf("******************************************************\n"); + while (Map[i].TokenName != NULL) + { + if (Map[i].Type == 0) + printf("Parameter %s = %d\n",Map[i].TokenName,* (int *) (Map[i].Place)); + else if (Map[i].Type == 1) + printf("Parameter %s = ""%s""\n",Map[i].TokenName,(char *) (Map[i].Place)); + else if (Map[i].Type == 2) + printf("Parameter %s = %.2f\n",Map[i].TokenName,* (double *) (Map[i].Place)); + i++; + } + printf("******************************************************\n"); + return -1; + }; + + /*! + ************************************************************************ + * \brief + * calculate Ceil(Log2(uiVal)) + ************************************************************************ + */ + unsigned CeilLog2( unsigned uiVal) + { + unsigned uiTmp = uiVal-1; + unsigned uiRet = 0; + + while( uiTmp != 0 ) + { + uiTmp >>= 1; + uiRet++; + } + return uiRet; + } + + + /*! + *********************************************************************** + * \brief + * Checks the input parameters for consistency. + *********************************************************************** + */ + static void PatchInp () + { + int bitdepth_qp_scale = 6*(input->BitDepthLuma - 8); + + // These variables are added for FMO + FILE * sgfile=NULL; + int i,j; + int frame_mb_only; + int mb_width, mb_height, mapunit_height; + int storedBplus1; + + TestEncoderParams(bitdepth_qp_scale); + + if (input->FrameRate == 0.0) + input->FrameRate = INIT_FRAME_RATE; + + // Set block sizes + + // Skip/Direct16x16 + input->part_size[0][0] = 4; + input->part_size[0][1] = 4; + // 16x16 + input->part_size[1][0] = 4; + input->part_size[1][1] = 4; + // 16x8 + input->part_size[2][0] = 4; + input->part_size[2][1] = 2; + // 8x16 + input->part_size[3][0] = 2; + input->part_size[3][1] = 4; + // 8x8 + input->part_size[4][0] = 2; + input->part_size[4][1] = 2; + // 8x4 + input->part_size[5][0] = 2; + input->part_size[5][1] = 1; + // 4x8 + input->part_size[6][0] = 1; + input->part_size[6][1] = 2; + // 4x4 + input->part_size[7][0] = 1; + input->part_size[7][1] = 1; + + for (j = 0; j<8;j++) + { + for (i = 0; i<2; i++) + { + input->blc_size[j][i] = input->part_size[j][i] * BLOCK_SIZE; + } + } + + // set proper log2_max_frame_num_minus4. + storedBplus1 = (input->BRefPictures ) ? input->successive_Bframe + 1: 1; + + if (input->Log2MaxFNumMinus4 == -1) + log2_max_frame_num_minus4 = Clip3(0,12, (int) (CeilLog2(input->no_frames * storedBplus1) - 4)); + else + log2_max_frame_num_minus4 = input->Log2MaxFNumMinus4; + + if (log2_max_frame_num_minus4 == 0 && input->num_ref_frames == 16) + { + snprintf(errortext, ET_SIZE, " NumberReferenceFrames=%d and Log2MaxFNumMinus4=%d may lead to an invalid value of frame_num.", input->num_ref_frames, input-> Log2MaxFNumMinus4); + error (errortext, 500); + } + + // set proper log2_max_pic_order_cnt_lsb_minus4. + if (input->Log2MaxPOCLsbMinus4 == - 1) + log2_max_pic_order_cnt_lsb_minus4 = Clip3(0,12, (int) (CeilLog2( 2*input->no_frames * (input->jumpd + 1)) - 4)); + else + log2_max_pic_order_cnt_lsb_minus4 = input->Log2MaxPOCLsbMinus4; + + if (((1<<(log2_max_pic_order_cnt_lsb_minus4 + 3)) < input->jumpd * 4) && input->Log2MaxPOCLsbMinus4 != -1) + error("log2_max_pic_order_cnt_lsb_minus4 might not be sufficient for encoding. Increase value.",400); + + // B picture consistency check + if(input->successive_Bframe > input->jumpd) + { + snprintf(errortext, ET_SIZE, "Number of B-frames %d can not exceed the number of frames skipped", input->successive_Bframe); + error (errortext, 400); + } + + // Direct Mode consistency check + if(input->successive_Bframe && input->direct_spatial_mv_pred_flag != DIR_SPATIAL && input->direct_spatial_mv_pred_flag != DIR_TEMPORAL) + { + snprintf(errortext, ET_SIZE, "Unsupported direct mode=%d, use TEMPORAL=0 or SPATIAL=1", input->direct_spatial_mv_pred_flag); + error (errortext, 400); + } + + if (input->PicInterlace>0 || input->MbInterlace>0) + { + if (input->directInferenceFlag==0) + printf("\nDirectInferenceFlag set to 1 due to interlace coding."); + input->directInferenceFlag=1; + } + + if (input->PicInterlace>0) + { + if (input->IntraBottom!=0 && input->IntraBottom!=1) + { + snprintf(errortext, ET_SIZE, "Incorrect value %d for IntraBottom. Use 0 (disable) or 1 (enable).", input->IntraBottom); + error (errortext, 400); + } + } + // Cabac/UVLC consistency check + if (input->symbol_mode != UVLC && input->symbol_mode != CABAC) + { + snprintf (errortext, ET_SIZE, "Unsupported symbol mode=%d, use UVLC=0 or CABAC=1",input->symbol_mode); + error (errortext, 400); + } + + // Open Files + if ((p_in=open(input->infile, OPENFLAGS_READ))==-1) + { + snprintf(errortext, ET_SIZE, "Input file %s does not exist",input->infile); + error (errortext, 500); + } + + if (strlen (input->ReconFile) > 0 && (p_dec=open(input->ReconFile, OPENFLAGS_WRITE, OPEN_PERMISSIONS))==-1) + { + snprintf(errortext, ET_SIZE, "Error open file %s", input->ReconFile); + error (errortext, 500); + } + + #if TRACE + if (strlen (input->TraceFile) > 0 && (p_trace=fopen(input->TraceFile,"w"))==NULL) + { + snprintf(errortext, ET_SIZE, "Error open file %s", input->TraceFile); + error (errortext, 500); + } + #endif + + if (input->img_width % 16 != 0) + { + img->auto_crop_right = 16-(input->img_width % 16); + } + else + { + img->auto_crop_right=0; + } + if (input->PicInterlace || input->MbInterlace) + { + if (input->img_height % 2 != 0) + { + error ("even number of lines required for interlaced coding", 500); + } + if (input->img_height % 32 != 0) + { + img->auto_crop_bottom = 32-(input->img_height % 32); + } + else + { + img->auto_crop_bottom=0; + } + } + else + { + if (input->img_height % 16 != 0) + { + img->auto_crop_bottom = 16-(input->img_height % 16); + } + else + { + img->auto_crop_bottom=0; + } + } + if (img->auto_crop_bottom || img->auto_crop_right) + { + printf ("Warning: Automatical cropping activated: Coded frame Size: %dx%d\n", input->img_width+img->auto_crop_right, input->img_height+img->auto_crop_bottom); + } + + /* + // add check for MAXSLICEGROUPIDS + if(input->num_slice_groups_minus1>=MAXSLICEGROUPIDS) + { + snprintf(errortext, ET_SIZE, "num_slice_groups_minus1 exceeds MAXSLICEGROUPIDS"); + error (errortext, 500); + } + */ + + // Following codes are to read slice group configuration from SliceGroupConfigFileName for slice group type 0,2 or 6 + if( (input->num_slice_groups_minus1!=0)&& + ((input->slice_group_map_type == 0) || (input->slice_group_map_type == 2) || (input->slice_group_map_type == 6)) ) + { + if (strlen (input->SliceGroupConfigFileName) > 0 && (sgfile=fopen(input->SliceGroupConfigFileName,"r"))==NULL) + { + snprintf(errortext, ET_SIZE, "Error open file %s", input->SliceGroupConfigFileName); + error (errortext, 500); + } + else + { + if (input->slice_group_map_type == 0) + { + input->run_length_minus1=(int *)malloc(sizeof(int)*(input->num_slice_groups_minus1+1)); + if (NULL==input->run_length_minus1) + no_mem_exit("PatchInp: input->run_length_minus1"); + + // each line contains one 'run_length_minus1' value + for(i=0;i<=input->num_slice_groups_minus1;i++) + { + fscanf(sgfile,"%d",(input->run_length_minus1+i)); + fscanf(sgfile,"%*[^\n]"); + + } + } + else if (input->slice_group_map_type == 2) + { + input->top_left=(int *)malloc(sizeof(int)*input->num_slice_groups_minus1); + input->bottom_right=(int *)malloc(sizeof(int)*input->num_slice_groups_minus1); + if (NULL==input->top_left) + no_mem_exit("PatchInp: input->top_left"); + if (NULL==input->bottom_right) + no_mem_exit("PatchInp: input->bottom_right"); + + // every two lines contain 'top_left' and 'bottom_right' value + for(i=0;inum_slice_groups_minus1;i++) + { + fscanf(sgfile,"%d",(input->top_left+i)); + fscanf(sgfile,"%*[^\n]"); + fscanf(sgfile,"%d",(input->bottom_right+i)); + fscanf(sgfile,"%*[^\n]"); + } + + } + else if (input->slice_group_map_type == 6) + { + int tmp; + + frame_mb_only = !(input->PicInterlace || input->MbInterlace); + mb_width= (input->img_width+img->auto_crop_right)/16; + mb_height= (input->img_height+img->auto_crop_bottom)/16; + mapunit_height=mb_height/(2-frame_mb_only); + + input->slice_group_id=(byte * ) malloc(sizeof(byte)*mapunit_height*mb_width); + if (NULL==input->slice_group_id) + no_mem_exit("PatchInp: input->slice_group_id"); + + // each line contains slice_group_id for one Macroblock + for (i=0;islice_group_id[i]= (byte) tmp; + if ( *(input->slice_group_id+i) > input->num_slice_groups_minus1 ) + { + snprintf(errortext, ET_SIZE, "Error read slice group information from file %s", input->SliceGroupConfigFileName); + error (errortext, 500); + } + fscanf(sgfile,"%*[^\n]"); + } + } + fclose(sgfile); + } + } + + + if (input->PyramidRefReorder && input->PyramidCoding && (input->PicInterlace || input->MbInterlace)) + { + snprintf(errortext, ET_SIZE, "PyramidRefReorder Not supported with Interlace encoding methods\n"); + error (errortext, 400); + } + + if (input->PocMemoryManagement && input->PyramidCoding && (input->PicInterlace || input->MbInterlace)) + { + snprintf(errortext, ET_SIZE, "PocMemoryManagement not supported with Interlace encoding methods\n"); + error (errortext, 400); + } + + + // frame/field consistency check + if (input->PicInterlace != FRAME_CODING && input->PicInterlace != ADAPTIVE_CODING && input->PicInterlace != FIELD_CODING) + { + snprintf (errortext, ET_SIZE, "Unsupported PicInterlace=%d, use frame based coding=0 or field based coding=1 or adaptive=2",input->PicInterlace); + error (errortext, 400); + } + + // frame/field consistency check + if (input->MbInterlace != FRAME_CODING && input->MbInterlace != ADAPTIVE_CODING && input->MbInterlace != FIELD_CODING) + { + snprintf (errortext, ET_SIZE, "Unsupported MbInterlace=%d, use frame based coding=0 or field based coding=1 or adaptive=2",input->MbInterlace); + error (errortext, 400); + } + + + if ((!input->rdopt)&&(input->MbInterlace)) + { + snprintf(errortext, ET_SIZE, "MB AFF is not compatible with non-rd-optimized coding."); + error (errortext, 500); + } + + if (input->rdopt>2) + { + snprintf(errortext, ET_SIZE, "RDOptimization=3 mode has been deactivated do to diverging of real and simulated decoders."); + error (errortext, 500); + } + + // check RDoptimization mode and profile. FMD does not support Frex Profiles. + if (input->rdopt==2 && input->ProfileIDC>=FREXT_HP) + { + snprintf(errortext, ET_SIZE, "Fast Mode Decision methods does not support FREX Profiles"); + error (errortext, 500); + } + + // the two HEX FME schemes support FAST Subpel ME. EPZS does not but works fine with + // Hadamard reduction with similar speed up. Subpel FME may be added at a later stage + // for this scheme for further speed increase. + if (input->hadamard == 2 && input->FMEnable != 0 && input->FMEnable != 3) + { + snprintf(errortext, ET_SIZE, "UseHadamard=2 is not allowed when UseFME is set to 1 or 2."); + error (errortext, 500); + } + + // Tian Dong: May 31, 2002 + // The number of frames in one sub-seq in enhanced layer should not exceed + // the number of reference frame number. + if ( input->NumFramesInELSubSeq >= input->num_ref_frames || input->NumFramesInELSubSeq < 0 ) + { + snprintf(errortext, ET_SIZE, "NumFramesInELSubSeq (%d) is out of range [0,%d).", input->NumFramesInELSubSeq, input->num_ref_frames); + error (errortext, 500); + } + // Tian Dong: Enhanced GOP is not supported in bitstream mode. September, 2002 + if ( input->NumFramesInELSubSeq > 0 && input->of_mode == PAR_OF_ANNEXB ) + { + snprintf(errortext, ET_SIZE, "Enhanced GOP is not supported in bitstream mode and RTP mode yet."); + error (errortext, 500); + } + // Tian Dong (Sept 2002) + // The AFF is not compatible with spare picture for the time being. + if ((input->PicInterlace || input->MbInterlace) && input->SparePictureOption == TRUE) + { + snprintf(errortext, ET_SIZE, "AFF is not compatible with spare picture."); + error (errortext, 500); + } + + // Only the RTP mode is compatible with spare picture for the time being. + if (input->of_mode != PAR_OF_RTP && input->SparePictureOption == TRUE) + { + snprintf(errortext, ET_SIZE, "Only RTP output mode is compatible with spare picture features."); + error (errortext, 500); + } + + if( (input->WeightedPrediction > 0 || input->WeightedBiprediction > 0) && (input->MbInterlace)) + { + printf("Weighted prediction coding is not supported for MB AFF currently."); + error (errortext, 500); + } + if ( input->NumFramesInELSubSeq > 0 && input->WeightedPrediction > 0) + { + snprintf(errortext, ET_SIZE, "Enhanced GOP is not supported in weighted prediction coding mode yet."); + error (errortext, 500); + } + + //! the number of slice groups is forced to be 1 for slice group type 3-5 + if(input->num_slice_groups_minus1 > 0) + { + if( (input->slice_group_map_type >= 3) && (input->slice_group_map_type<=5) ) + input->num_slice_groups_minus1 = 1; + } + + // Rate control + if(input->RCEnable) + { + if ( ((input->img_height+img->auto_crop_bottom)*(input->img_width+img->auto_crop_right)/256)%input->basicunit!=0) + { + snprintf(errortext, ET_SIZE, "Frame size in macroblocks must be a multiple of BasicUnit."); + error (errortext, 500); + } + } + + if ((input->successive_Bframe)&&(input->BRefPictures)&&(input->idr_enable)&&(input->intra_period)&&(input->pic_order_cnt_type!=0)) + { + error("Stored B pictures combined with IDR pictures only supported in Picture Order Count type 0\n",-1000); + } + + if( !input->direct_spatial_mv_pred_flag && input->num_ref_frames<2 && input->successive_Bframe >0) + error("temporal direct needs at least 2 ref frames\n",-1000); + + // frext + if(input->Transform8x8Mode && input->sp_periodicity /*SP-frames*/) + { + snprintf(errortext, ET_SIZE, "\nThe new 8x8 mode is not implemented for sp-frames."); + error (errortext, 500); + } + + if(input->Transform8x8Mode && (input->ProfileIDCProfileIDC>FREXT_Hi444)) + { + snprintf(errortext, ET_SIZE, "\nTransform8x8Mode may be used only with ProfileIDC %d to %d.", FREXT_HP, FREXT_Hi444); + error (errortext, 500); + } + if(input->ScalingMatrixPresentFlag && (input->ProfileIDCProfileIDC>FREXT_Hi444)) + { + snprintf(errortext, ET_SIZE, "\nScalingMatrixPresentFlag may be used only with ProfileIDC %d to %d.", FREXT_HP, FREXT_Hi444); + error (errortext, 500); + } + + if(input->yuv_format==YUV422 && input->ProfileIDC < FREXT_Hi422) + { + snprintf(errortext, ET_SIZE, "\nFRExt Profile(YUV Format) Error!\nYUV422 can be used only with ProfileIDC %d or %d\n",FREXT_Hi422, FREXT_Hi444); + error (errortext, 500); + } + if(input->yuv_format==YUV444 && input->ProfileIDC < FREXT_Hi444) + { + snprintf(errortext, ET_SIZE, "\nFRExt Profile(YUV Format) Error!\nYUV444 can be used only with ProfileIDC %d.\n",FREXT_Hi444); + error (errortext, 500); + } + + // Residue Color Transform + if(input->yuv_format!=YUV444 && input->residue_transform_flag) + { + snprintf(errortext, ET_SIZE, "\nResidue color transform is supported only in YUV444."); + error (errortext, 500); + } + + if ((input->BiPredMotionEstimation) && (input->search_range < input->BiPredMESearchRange)) + { + snprintf(errortext, ET_SIZE, "\nBiPredMESearchRange must be smaller or equal SearchRange."); + error (errortext, 500); + } + + + if (input->EnableOpenGOP) input->PyramidRefReorder = 1; + if (input->EnableOpenGOP && input->PicInterlace) + { + snprintf(errortext, ET_SIZE, "Open Gop currently not supported for Field coded pictures."); + error (errortext, 500); + } + + ProfileCheck(); + LevelCheck(); + } + + void PatchInputNoFrames() + { + // Tian Dong: May 31, 2002 + // If the frames are grouped into two layers, "FramesToBeEncoded" in the config file + // will give the number of frames which are in the base layer. Here we let input->no_frames + // be the total frame numbers. + input->no_frames = 1+ (input->no_frames-1) * (input->NumFramesInELSubSeq+1); + if ( input->NumFrameIn2ndIGOP ) + input->NumFrameIn2ndIGOP = 1+(input->NumFrameIn2ndIGOP-1) * (input->NumFramesInELSubSeq+1); + FirstFrameIn2ndIGOP = input->no_frames; + } + + static void ProfileCheck() + { + if((input->ProfileIDC != 66 ) && + (input->ProfileIDC != 77 ) && + (input->ProfileIDC != 88 ) && + (input->ProfileIDC != FREXT_HP ) && + (input->ProfileIDC != FREXT_Hi10P ) && + (input->ProfileIDC != FREXT_Hi422 ) && + (input->ProfileIDC != FREXT_Hi444 )) + { + snprintf(errortext, ET_SIZE, "Profile must be baseline(66)/main(77)/extended(88) or FRExt (%d to %d).", FREXT_HP,FREXT_Hi444); + error (errortext, 500); + } + // baseline + if (input->ProfileIDC == 66 ) + { + if (input->successive_Bframe || input->BRefPictures==2) + { + snprintf(errortext, ET_SIZE, "B pictures are not allowed in baseline."); + error (errortext, 500); + } + if (input->sp_periodicity) + { + snprintf(errortext, ET_SIZE, "SP pictures are not allowed in baseline."); + error (errortext, 500); + } + if (input->partition_mode) + { + snprintf(errortext, ET_SIZE, "Data partitioning is not allowed in baseline."); + error (errortext, 500); + } + if (input->WeightedPrediction) + { + snprintf(errortext, ET_SIZE, "Weighted prediction is not allowed in baseline."); + error (errortext, 500); + } + if (input->WeightedBiprediction) + { + snprintf(errortext, ET_SIZE, "Weighted prediction is not allowed in baseline."); + error (errortext, 500); + } + if (input->symbol_mode == CABAC) + { + snprintf(errortext, ET_SIZE, "CABAC is not allowed in baseline."); + error (errortext, 500); + } + } + + // main + if (input->ProfileIDC == 77 ) + { + if (input->sp_periodicity) + { + snprintf(errortext, ET_SIZE, "SP pictures are not allowed in main."); + error (errortext, 500); + } + if (input->partition_mode) + { + snprintf(errortext, ET_SIZE, "Data partitioning is not allowed in main."); + error (errortext, 500); + } + if (input->num_slice_groups_minus1) + { + snprintf(errortext, ET_SIZE, "num_slice_groups_minus1>0 (FMO) is not allowed in main."); + error (errortext, 500); + } + if (input->redundant_slice_flag) + { + snprintf(errortext, ET_SIZE, "Redundant pictures are not allowed in main."); + error (errortext, 500); + } + } + + // extended + if (input->ProfileIDC == 88 ) + { + if (!input->directInferenceFlag) + { + snprintf(errortext, ET_SIZE, "direct_8x8_inference flag must be equal to 1 in extended."); + error (errortext, 500); + } + + if (input->symbol_mode == CABAC) + { + snprintf(errortext, ET_SIZE, "CABAC is not allowed in extended."); + error (errortext, 500); + } + } + + } + + static void LevelCheck() + { + if ( (input->LevelIDC>=30) && (input->directInferenceFlag==0)) + { + printf("\nLevelIDC 3.0 and above require direct_8x8_inference to be set to 1. Please check your settings.\n"); + input->directInferenceFlag=1; + } + if ( ((input->LevelIDC<21) || (input->LevelIDC>41)) && (input->PicInterlace > 0 || input->MbInterlace > 0) ) + { + snprintf(errortext, ET_SIZE, "nInterlace modes only supported for LevelIDC in the range of 2.1 and 4.1. Please check your settings.\n"); + error (errortext, 500); + } + + } + Index: llvm-test/MultiSource/Applications/JM/lencod/configfile.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/configfile.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/configfile.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,271 ---- + + /*! + *********************************************************************** + * \file + * configfile.h + * \brief + * Prototypes for configfile.c and definitions of used structures. + *********************************************************************** + */ + + #include "fmo.h" + + #ifndef _CONFIGFILE_H_ + #define _CONFIGFILE_H_ + + #define DEFAULTCONFIGFILENAME "encoder.cfg" + + #define PROFILE_IDC 88 + #define LEVEL_IDC 21 + + + typedef struct { + char *TokenName; + void *Place; + int Type; + double Default; + int param_limits; //! 0: no limits, 1: both min and max, 2: only min (i.e. no negatives), 3: specialcase for QPs since min needs bitdepth_qp_scale + double min_limit; + double max_limit; + } Mapping; + + + + InputParameters configinput; + + + #ifdef INCLUDED_BY_CONFIGFILE_C + + Mapping Map[] = { + {"ProfileIDC", &configinput.ProfileIDC, 0, (double) PROFILE_IDC, 0, 0.0, 0.0 }, + {"LevelIDC", &configinput.LevelIDC, 0, (double) LEVEL_IDC, 0, 0.0, 0.0 }, + {"FrameRate", &configinput.FrameRate, 2, (double) INIT_FRAME_RATE, 1, 0.0, 100.0 }, + {"IDRIntraEnable", &configinput.idr_enable, 0, 0.0, 1, 0.0, 1.0 }, + {"StartFrame", &configinput.start_frame, 0, 0.0, 2, 0.0, 0.0 }, + {"IntraPeriod", &configinput.intra_period, 0, 0.0, 2, 0.0, 0.0 }, + {"EnableOpenGOP", &configinput.EnableOpenGOP, 0, 0.0, 3, 0.0, 1.0 }, + {"FramesToBeEncoded", &configinput.no_frames, 0, 1.0, 2, 1.0, 0.0 }, + {"QPISlice", &configinput.qp0, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"QPPSlice", &configinput.qpN, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"QPBSlice", &configinput.qpB, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"FrameSkip", &configinput.jumpd, 0, 0.0, 2, 0.0, 0.0 }, + {"UseHadamard", &configinput.hadamard, 0, 0.0, 1, 0.0, 2.0 }, + {"DisableSubpelME", &configinput.DisableSubpelME, 0, 0.0, 1, 0.0, 1.0 }, + {"SearchRange", &configinput.search_range, 0, 16.0, 2, 0.0, 0.0 }, + {"NumberReferenceFrames", &configinput.num_ref_frames, 0, 1.0, 1, 1.0, 16.0 }, + {"PList0References", &configinput.P_List0_refs, 0, 0.0, 1, 0.0, 16.0 }, + {"BList0References", &configinput.B_List0_refs, 0, 0.0, 1, 0.0, 16.0 }, + {"BList1References", &configinput.B_List1_refs, 0, 1.0, 1, 0.0, 16.0 }, + {"Log2MaxFNumMinus4", &configinput.Log2MaxFNumMinus4, 0, 0.0, 1, -1.0, 12.0 }, + {"Log2MaxPOCLsbMinus4", &configinput.Log2MaxPOCLsbMinus4, 0, 2.0, 1, -1.0, 12.0 }, + {"GenerateMultiplePPS", &configinput.GenerateMultiplePPS, 0, 0.0, 1, 0.0, 1.0 }, + {"ResendPPS", &configinput.ResendPPS, 0, 0.0, 1, 0.0, 1.0 }, + {"SourceWidth", &configinput.img_width, 0, 176.0, 2, 16.0, 0.0 }, + {"SourceHeight", &configinput.img_height, 0, 144.0, 2, 16.0, 0.0 }, + {"MbLineIntraUpdate", &configinput.intra_upd, 0, 0.0, 1, 0.0, 1.0 }, + {"SliceMode", &configinput.slice_mode, 0, 0.0, 1, 0.0, 3.0 }, + {"SliceArgument", &configinput.slice_argument, 0, 0.0, 2, 1.0, 1.0 }, + {"UseConstrainedIntraPred", &configinput.UseConstrainedIntraPred, 0, 0.0, 1, 0.0, 1.0 }, + {"InputFile", &configinput.infile, 1, 0.0, 0, 0.0, 0.0 }, + {"InputHeaderLength", &configinput.infile_header, 0, 0.0, 2, 0.0, 1.0 }, + {"OutputFile", &configinput.outfile, 1, 0.0, 0, 0.0, 0.0 }, + {"ReconFile", &configinput.ReconFile, 1, 0.0, 0, 0.0, 0.0 }, + {"TraceFile", &configinput.TraceFile, 1, 0.0, 0, 0.0, 0.0 }, + {"DisposableP", &configinput.DisposableP, 0, 0.0, 1, 0.0, 1.0 }, + {"DispPQPOffset", &configinput.DispPQPOffset, 0, 0.0, 0,-51.0, 51.0 }, + {"NumberBFrames", &configinput.successive_Bframe, 0, 0.0, 2, 0.0, 0.0 }, + {"BRefPicQPOffset", &configinput.qpBRSOffset, 0, 0.0, 0,-51.0, 51.0 }, + {"DirectModeType", &configinput.direct_spatial_mv_pred_flag, 0, 0.0, 1, 0.0, 1.0 }, + {"DirectInferenceFlag", &configinput.directInferenceFlag, 0, 0.0, 1, 0.0, 1.0 }, + {"SPPicturePeriodicity", &configinput.sp_periodicity, 0, 0.0, 2, 0.0, 0.0 }, + {"QPSPSlice", &configinput.qpsp, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"QPSP2Slice", &configinput.qpsp_pred, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"SymbolMode", &configinput.symbol_mode, 0, 0.0, 1, (double) UVLC, (double) CABAC }, + {"OutFileMode", &configinput.of_mode, 0, 0.0, 1, 0.0, 1.0 }, + {"PartitionMode", &configinput.partition_mode, 0, 0.0, 1, 0.0, 1.0 }, + {"InterSearch16x16", &configinput.InterSearch16x16, 0, 1.0, 1, 0.0, 1.0 }, + {"InterSearch16x8", &configinput.InterSearch16x8 , 0, 1.0, 1, 0.0, 1.0 }, + {"InterSearch8x16", &configinput.InterSearch8x16, 0, 1.0, 1, 0.0, 1.0 }, + {"InterSearch8x8", &configinput.InterSearch8x8 , 0, 1.0, 1, 0.0, 1.0 }, + {"InterSearch8x4", &configinput.InterSearch8x4, 0, 1.0, 1, 0.0, 1.0 }, + {"InterSearch4x8", &configinput.InterSearch4x8, 0, 1.0, 1, 0.0, 1.0 }, + {"InterSearch4x4", &configinput.InterSearch4x4, 0, 1.0, 1, 0.0, 1.0 }, + {"IntraDisableInterOnly", &configinput.IntraDisableInterOnly, 0, 0.0, 1, 0.0, 1.0 }, + {"Intra4x4ParDisable", &configinput.Intra4x4ParDisable, 0, 0.0, 1, 0.0, 1.0 }, + {"Intra4x4DiagDisable", &configinput.Intra4x4DiagDisable, 0, 0.0, 1, 0.0, 1.0 }, + {"Intra4x4DirDisable", &configinput.Intra4x4DirDisable, 0, 0.0, 1, 0.0, 1.0 }, + {"Intra16x16ParDisable", &configinput.Intra16x16ParDisable, 0, 0.0, 1, 0.0, 1.0 }, + {"Intra16x16PlaneDisable", &configinput.Intra16x16PlaneDisable, 0, 0.0, 1, 0.0, 1.0 }, + {"EnableIPCM", &configinput.EnableIPCM, 0, 0.0, 1, 0.0, 1.0 }, + {"ChromaIntraDisable", &configinput.ChromaIntraDisable, 0, 0.0, 1, 0.0, 1.0 }, + + #ifdef _FULL_SEARCH_RANGE_ + {"RestrictSearchRange", &configinput.full_search, 0, 2.0, 1, 0.0, 2.0 }, + #endif + #ifdef _ADAPT_LAST_GROUP_ + {"LastFrameNumber", &configinput.last_frame, 0, 0.0, 2, 0.0, 0.0 }, + #endif + #ifdef _CHANGE_QP_ + {"ChangeQPI", &configinput.qp02, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"ChangeQPP", &configinput.qpN2, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"ChangeQPB", &configinput.qpB2, 0, 24.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"ChangeQPBSRefOffset", &configinput.qpBRS2Offset, 0, 0.0, 1,-51.0, 51.0 }, + {"ChangeQPStart", &configinput.qp2start, 0, 0.0, 2, 0.0, 0.0 }, + #endif + {"RDOptimization", &configinput.rdopt, 0, 0.0, 1, 0.0, 2.0 }, + {"DisableThresholding", &configinput.disthres, 0, 0.0, 1, 0.0, 1.0 }, + {"DisableBSkipRDO", &configinput.nobskip, 0, 0.0, 1, 0.0, 1.0 }, + {"LossRateA", &configinput.LossRateA, 0, 0.0, 2, 0.0, 0.0 }, + {"LossRateB", &configinput.LossRateB, 0, 0.0, 2, 0.0, 0.0 }, + {"LossRateC", &configinput.LossRateC, 0, 0.0, 2, 0.0, 0.0 }, + {"NumberOfDecoders", &configinput.NoOfDecoders, 0, 0.0, 2, 0.0, 0.0 }, + {"RestrictRefFrames", &configinput.RestrictRef , 0, 0.0, 1, 0.0, 1.0 }, + #ifdef _LEAKYBUCKET_ + {"NumberofLeakyBuckets", &configinput.NumberLeakyBuckets, 0, 2.0, 1, 2.0, 255.0 }, + {"LeakyBucketRateFile", &configinput.LeakyBucketRateFile, 1, 0.0, 0, 0.0, 0.0 }, + {"LeakyBucketParamFile", &configinput.LeakyBucketParamFile, 1, 0.0, 0, 0.0, 0.0 }, + #endif + {"PicInterlace", &configinput.PicInterlace, 0, 0.0, 1, 0.0, 2.0 }, + {"MbInterlace", &configinput.MbInterlace, 0, 0.0, 1, 0.0, 2.0 }, + + {"IntraBottom", &configinput.IntraBottom, 0, 0.0, 1, 0.0, 1.0 }, + + {"NumberFramesInEnhancementLayerSubSequence", &configinput.NumFramesInELSubSeq, 0, 0.0, 2, 0.0, 0.0 }, + {"NumberOfFrameInSecondIGOP",&configinput.NumFrameIn2ndIGOP, 0, 0.0, 2, 0.0, 0.0 }, + {"RandomIntraMBRefresh", &configinput.RandomIntraMBRefresh, 0, 0.0, 2, 0.0, 0.0 }, + + + {"WeightedPrediction", &configinput.WeightedPrediction, 0, 0.0, 1, 0.0, 1.0 }, + {"WeightedBiprediction", &configinput.WeightedBiprediction, 0, 0.0, 1, 0.0, 2.0 }, + {"UseWeightedReferenceME", &configinput.UseWeightedReferenceME, 0, 0.0, 1, 0.0, 1.0 }, + {"RDPictureDecision", &configinput.RDPictureDecision, 0, 0.0, 1, 0.0, 1.0 }, + {"RDPictureIntra", &configinput.RDPictureIntra, 0, 0.0, 1, 0.0, 1.0 }, + {"RDPSliceWeightOnly", &configinput.RDPSliceWeightOnly, 0, 1.0, 1, 0.0, 1.0 }, + {"RDPSliceBTest", &configinput.RDPSliceBTest, 0, 0.0, 1, 0.0, 1.0 }, + {"RDBSliceWeightOnly", &configinput.RDBSliceWeightOnly, 0, 0.0, 1, 0.0, 1.0 }, + + {"SkipIntraInInterSlices", &configinput.SkipIntraInInterSlices, 0, 0.0, 1, 0.0, 1.0 }, + {"BReferencePictures", &configinput.BRefPictures, 0, 0.0, 1, 0.0, 2.0 }, + {"PyramidCoding", &configinput.PyramidCoding, 0, 0.0, 1, 0.0, 3.0 }, + {"PyramidLevelQPEnable", &configinput.PyramidLevelQPEnable, 0, 0.0, 1, 0.0, 1.0 }, + {"ExplicitPyramidFormat", &configinput.ExplicitPyramidFormat, 1, 0.0, 0, 0.0, 0.0 }, + {"PyramidRefReorder", &configinput.PyramidRefReorder, 0, 0.0, 1, 0.0, 1.0 }, + {"PocMemoryManagement", &configinput.PocMemoryManagement, 0, 0.0, 1, 0.0, 1.0 }, + + {"BiPredMotionEstimation", &configinput.BiPredMotionEstimation, 0, 0.0, 1, 0.0, 1.0 }, + {"BiPredMERefinements", &configinput.BiPredMERefinements, 0, 0.0, 1, 0.0, 5.0 }, + {"BiPredMESearchRange", &configinput.BiPredMESearchRange, 0, 8.0, 2, 0.0, 0.0 }, + {"BiPredMESubPel", &configinput.BiPredMESubPel, 0, 1.0, 1, 0.0, 2.0 }, + + {"LoopFilterParametersFlag", &configinput.LFSendParameters, 0, 0.0, 1, 0.0, 1.0 }, + {"LoopFilterDisable", &configinput.LFDisableIdc, 0, 0.0, 1, 0.0, 2.0 }, + {"LoopFilterAlphaC0Offset", &configinput.LFAlphaC0Offset, 0, 0.0, 1, -6.0, 6.0 }, + {"LoopFilterBetaOffset", &configinput.LFBetaOffset, 0, 0.0, 1, -6.0, 6.0 }, + {"SparePictureOption", &configinput.SparePictureOption, 0, 0.0, 1, 0.0, 1.0 }, + {"SparePictureDetectionThr", &configinput.SPDetectionThreshold, 0, 0.0, 2, 0.0, 0.0 }, + {"SparePicturePercentageThr",&configinput.SPPercentageThreshold, 0, 0.0, 2, 0.0, 100.0 }, + + {"num_slice_groups_minus1", &configinput.num_slice_groups_minus1, 0, 0.0, 1, 0.0, (double)MAXSLICEGROUPIDS - 1 }, + {"slice_group_map_type", &configinput.slice_group_map_type, 0, 0.0, 1, 0.0, 6.0 }, + {"slice_group_change_direction_flag", &configinput.slice_group_change_direction_flag, 0, 0.0, 1, 0.0, 2.0 }, + {"slice_group_change_rate_minus1", &configinput.slice_group_change_rate_minus1, 0, 0.0, 2, 0.0, 1.0 }, + {"SliceGroupConfigFileName", &configinput.SliceGroupConfigFileName, 1, 0.0, 0, 0.0, 0.0 }, + + + {"UseRedundantSlice", &configinput.redundant_slice_flag, 0, 0.0, 1, 0.0, 1.0 }, + {"PicOrderCntType", &configinput.pic_order_cnt_type, 0, 0.0, 1, 0.0, 2.0 }, + + {"ContextInitMethod", &configinput.context_init_method, 0, 0.0, 1, 0.0, 1.0 }, + {"FixedModelNumber", &configinput.model_number, 0, 0.0, 1, 0.0, 2.0 }, + + {"Transform8x8Mode", &configinput.Transform8x8Mode, 0, 0.0, 1, 0.0, 2.0 }, + {"ReportFrameStats", &configinput.ReportFrameStats, 0, 0.0, 1, 0.0, 1.0 }, + {"DisplayEncParams", &configinput.DisplayEncParams, 0, 0.0, 1, 0.0, 1.0 }, + {"Verbose", &configinput.Verbose, 0, 1.0, 1, 0.0, 2.0 }, + // Rate Control + {"RateControlEnable", &configinput.RCEnable, 0, 0.0, 1, 0.0, 1.0 }, + {"Bitrate", &configinput.bit_rate, 0, 0.0, 2, 0.0, 0.0 }, + {"InitialQP", &configinput.SeinitialQP, 0, 0.0, 3, (double) MIN_QP, (double) MAX_QP }, + {"BasicUnit", &configinput.basicunit, 0, 0.0, 2, 0.0, 0.0 }, + {"ChannelType", &configinput.channel_type, 0, 0.0, 1, 0.0, 1.0 }, + + // Q_Matrix + {"QmatrixFile", &configinput.QmatrixFile, 1, 0.0, 0, 0.0, 0.0 }, + {"ScalingMatrixPresentFlag", &configinput.ScalingMatrixPresentFlag, 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag0", &configinput.ScalingListPresentFlag[0], 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag1", &configinput.ScalingListPresentFlag[1], 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag2", &configinput.ScalingListPresentFlag[2], 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag3", &configinput.ScalingListPresentFlag[3], 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag4", &configinput.ScalingListPresentFlag[4], 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag5", &configinput.ScalingListPresentFlag[5], 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag6", &configinput.ScalingListPresentFlag[6], 0, 0.0, 1, 0.0, 3.0 }, + {"ScalingListPresentFlag7", &configinput.ScalingListPresentFlag[7], 0, 0.0, 1, 0.0, 3.0 }, + + // Fast ME enable + {"UseFME", &configinput.FMEnable, 0, 0.0, 1, 0.0, 3.0 }, + {"EPZSPattern", &configinput.EPZSPattern, 0, 2.0, 1, 0.0, 3.0 }, + {"EPZSDualRefinement", &configinput.EPZSDual, 0, 3.0, 1, 0.0, 4.0 }, + {"EPZSFixedPredictors", &configinput.EPZSFixed, 0, 2.0, 1, 0.0, 2.0 }, + {"EPZSTemporal", &configinput.EPZSTemporal, 0, 1.0, 1, 0.0, 1.0 }, + {"EPZSSpatialMem", &configinput.EPZSSpatialMem, 0, 1.0, 1, 0.0, 1.0 }, + {"EPZSMinThresScale", &configinput.EPZSMinThresScale, 0, 0.0, 0, 0.0, 0.0 }, + {"EPZSMaxThresScale", &configinput.EPZSMaxThresScale, 0, 1.0, 0, 0.0, 0.0 }, + {"EPZSMedThresScale", &configinput.EPZSMedThresScale, 0, 1.0, 0, 0.0, 0.0 }, + {"ChromaQPOffset", &configinput.chroma_qp_index_offset, 0, 0.0, 1,-51.0, 51.0 }, + + // Fidelity Range Extensions + {"BitDepthLuma", &configinput.BitDepthLuma, 0, 8.0, 1, 8.0, 12.0 }, + {"BitDepthChroma", &configinput.BitDepthChroma, 0, 8.0, 1, 8.0, 12.0 }, + {"YUVFormat", &configinput.yuv_format, 0, 1.0, 1, 0.0, 3.0 }, + {"RGBInput", &configinput.rgb_input_flag, 0, 0.0, 1, 0.0, 1.0 }, + {"CbQPOffset", &configinput.cb_qp_index_offset, 0, 0.0, 1,-51.0, 51.0 }, + {"CrQPOffset", &configinput.cr_qp_index_offset, 0, 0.0, 1,-51.0, 51.0 }, + + // Lossless Coding + {"QPPrimeYZeroTransformBypassFlag", &configinput.lossless_qpprime_y_zero_flag, 0, 0.0, 1, 0.0, 1.0 }, + + // Residue Color Transform + {"ResidueTransformFlag", &configinput.residue_transform_flag , 0, 0.0, 1, 0.0, 1.0 }, + + // Explicit Lambda Usage + {"UseExplicitLambdaParams", &configinput.UseExplicitLambdaParams, 0, 0.0, 1, 0.0, 3.0 }, + {"LambdaWeightPslice", &configinput.LambdaWeight[0], 2, 0.68, 2, 0.0, 0.0 }, + {"LambdaWeightBslice", &configinput.LambdaWeight[1], 2, 2.00, 2, 0.0, 0.0 }, + {"LambdaWeightIslice", &configinput.LambdaWeight[2], 2, 0.65, 2, 0.0, 0.0 }, + {"LambdaWeightSPslice", &configinput.LambdaWeight[3], 2, 1.50, 2, 0.0, 0.0 }, + {"LambdaWeightSIslice", &configinput.LambdaWeight[4], 2, 0.65, 2, 0.0, 0.0 }, + {"LambdaWeightRefBslice", &configinput.LambdaWeight[5], 2, 1.50, 2, 0.0, 0.0 }, + + {"QOffsetMatrixFile", &configinput.QOffsetMatrixFile, 1, 0.0, 0, 0.0, 0.0 }, + {"OffsetMatrixPresentFlag", &configinput.OffsetMatrixPresentFlag, 0, 0.0, 1, 0.0, 1.0 }, + + // Fast Mode Decision + {"EarlySkipEnable", &configinput.EarlySkipEnable, 0, 0.0, 1, 0.0, 1.0 }, + {"SelectiveIntraEnable", &configinput.SelectiveIntraEnable, 0, 0.0, 1, 0.0, 1.0 }, + + // Adaptive rounding technique based on JVT-N011 + {"AdaptiveRounding", &configinput.AdaptiveRounding, 0, 0.0, 1, 0.0, 1.0 }, + {"AdaptRndPeriod", &configinput.AdaptRndPeriod, 0, 16.0, 2, 0.0, 0.0 }, + {"AdaptRndChroma", &configinput.AdaptRndChroma, 0, 0.0, 1, 0.0, 1.0 }, + {"AdaptRndWFactorIRef", &configinput.AdaptRndWFactor[1][I_SLICE], 0, 4.0, 1, 0.0, 1024.0 }, + {"AdaptRndWFactorPRef", &configinput.AdaptRndWFactor[1][P_SLICE], 0, 4.0, 1, 0.0, 1024.0 }, + {"AdaptRndWFactorBRef", &configinput.AdaptRndWFactor[1][B_SLICE], 0, 4.0, 1, 0.0, 1024.0 }, + {"AdaptRndWFactorINRef", &configinput.AdaptRndWFactor[0][I_SLICE], 0, 4.0, 1, 0.0, 1024.0 }, + {"AdaptRndWFactorPNRef", &configinput.AdaptRndWFactor[0][P_SLICE], 0, 4.0, 1, 0.0, 1024.0 }, + {"AdaptRndWFactorBNRef", &configinput.AdaptRndWFactor[0][B_SLICE], 0, 4.0, 1, 0.0, 1024.0 }, + + {NULL, NULL, -1, 0.0, 0, 0.0, 0.0 } + }; + + #endif + + #ifndef INCLUDED_BY_CONFIGFILE_C + extern Mapping Map[]; + #endif + + + void Configure (int ac, char *av[]); + void PatchInputNoFrames(); + + #endif + Index: llvm-test/MultiSource/Applications/JM/lencod/context_ini.c diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/context_ini.c:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/context_ini.c Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,365 ---- + + /*! + ************************************************************************************* + * \file context_ini.c + * + * \brief + * CABAC context initializations + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Heiko Schwarz + ************************************************************************************** + */ + + #define CONTEXT_INI_C + + #include + #include + + #include "global.h" + + #include "ctx_tables.h" + #include "cabac.h" + + #define DEFAULT_CTX_MODEL 0 + #define RELIABLE_COUNT 32.0 + #define FRAME_TYPES 4 + #define FIXED 0 + + + int num_mb_per_slice; + int number_of_slices; + int*** initialized; + int*** model_number; + + + double entropy [128]; + double probability[128] = + { + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + //-------------------------------------------------------------------------------- + 0.500000, 0.474609, 0.450507, 0.427629, 0.405912, 0.385299, 0.365732, 0.347159, + 0.329530, 0.312795, 0.296911, 0.281833, 0.267520, 0.253935, 0.241039, 0.228799, + 0.217180, 0.206151, 0.195682, 0.185744, 0.176312, 0.167358, 0.158859, 0.150792, + 0.143134, 0.135866, 0.128966, 0.122417, 0.116200, 0.110299, 0.104698, 0.099381, + 0.094334, 0.089543, 0.084996, 0.080680, 0.076583, 0.072694, 0.069002, 0.065498, + 0.062172, 0.059014, 0.056018, 0.053173, 0.050473, 0.047909, 0.045476, 0.043167, + 0.040975, 0.038894, 0.036919, 0.035044, 0.033264, 0.031575, 0.029972, 0.028450, + 0.027005, 0.025633, 0.024332, 0.023096, 0.021923, 0.020810, 0.019753, 0.018750 + }; + + + + void create_context_memory () + { + int i, j, k; + int num_mb = img->FrameSizeInMbs; // number of macroblocks for frame + + num_mb_per_slice = (input->slice_mode==1 ? input->slice_argument : num_mb); + number_of_slices = (num_mb + num_mb_per_slice - 1) / num_mb_per_slice; + + if ((initialized = (int***) malloc (3 * sizeof(int**))) == NULL) + { + no_mem_exit ("create_context_memory: initialized"); + } + if ((model_number = (int***) malloc (3 * sizeof(int**))) == NULL) + { + no_mem_exit ("create_context_memory: model_number"); + } + + for (k=0; k<3; k++) + { + if ((initialized[k] = (int**) malloc (FRAME_TYPES * sizeof(int*))) == NULL) + { + no_mem_exit ("create_context_memory: initialized"); + } + if ((model_number[k]= (int**) malloc (FRAME_TYPES * sizeof(int*))) == NULL) + { + no_mem_exit ("create_context_memory: model_number"); + } + + for (i=0; itype==I_SLICE) biari_init_context (&(ctx[i][j]), &(tab ## _I[num][i][j][0])); \ + else biari_init_context (&(ctx[i][j]), &(tab ## _P[num][i][j][0])); \ + } \ + } + #define BIARI_CTX_INIT1(jj,ctx,tab,num) \ + { \ + for (j=0; jtype==I_SLICE) biari_init_context (&(ctx[j]), &(tab ## _I[num][0][j][0])); \ + else biari_init_context (&(ctx[j]), &(tab ## _P[num][0][j][0])); \ + } \ + } + + + + void SetCtxModelNumber () + { + int frame_field = img->field_picture; + int img_type = img->type; + int ctx_number = img->currentSlice->start_mb_nr / num_mb_per_slice; + + if(img->type==I_SLICE) + { + img->model_number=DEFAULT_CTX_MODEL; + return; + } + if(input->context_init_method==FIXED) + { + img->model_number=input->model_number; + return; + } + + if (initialized [frame_field][img_type][ctx_number]) + { + img->model_number = model_number[frame_field][img_type][ctx_number]; + } + else if (ctx_number && initialized[frame_field][img_type][ctx_number-1]) + { + img->model_number = model_number[frame_field][img_type][ctx_number-1]; + } + else + { + img->model_number = DEFAULT_CTX_MODEL; + } + } + + + + void init_contexts () + { + MotionInfoContexts* mc = img->currentSlice->mot_ctx; + TextureInfoContexts* tc = img->currentSlice->tex_ctx; + int i, j; + + //--- motion coding contexts --- + BIARI_CTX_INIT2 (3, NUM_MB_TYPE_CTX, mc->mb_type_contexts, INIT_MB_TYPE, img->model_number); + BIARI_CTX_INIT2 (2, NUM_B8_TYPE_CTX, mc->b8_type_contexts, INIT_B8_TYPE, img->model_number); + BIARI_CTX_INIT2 (2, NUM_MV_RES_CTX, mc->mv_res_contexts, INIT_MV_RES, img->model_number); + BIARI_CTX_INIT2 (2, NUM_REF_NO_CTX, mc->ref_no_contexts, INIT_REF_NO, img->model_number); + BIARI_CTX_INIT1 ( NUM_DELTA_QP_CTX, mc->delta_qp_contexts, INIT_DELTA_QP, img->model_number); + BIARI_CTX_INIT1 ( NUM_MB_AFF_CTX, mc->mb_aff_contexts, INIT_MB_AFF, img->model_number); + BIARI_CTX_INIT1 ( NUM_TRANSFORM_SIZE_CTX, mc->transform_size_contexts, INIT_TRANSFORM_SIZE, img->model_number); + + //--- texture coding contexts --- + BIARI_CTX_INIT1 ( NUM_IPR_CTX, tc->ipr_contexts, INIT_IPR, img->model_number); + BIARI_CTX_INIT1 ( NUM_CIPR_CTX, tc->cipr_contexts, INIT_CIPR, img->model_number); + BIARI_CTX_INIT2 (3, NUM_CBP_CTX, tc->cbp_contexts, INIT_CBP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts, INIT_BCBP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts, INIT_MAP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts, INIT_LAST, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ONE_CTX, tc->one_contexts, INIT_ONE, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_ABS_CTX, tc->abs_contexts, INIT_ABS, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->fld_map_contexts, INIT_FLD_MAP, img->model_number); + BIARI_CTX_INIT2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->fld_last_contexts,INIT_FLD_LAST, img->model_number); + } + + + + + + double XRate (BiContextTypePtr ctx, const int* model) + { + int ctx_state, mod_state; + double weight, xr = 0.0; + int qp = max(0,img->qp); + + weight = min (1.0, (double)ctx->count/(double)RELIABLE_COUNT); + + mod_state = ((model[0]*qp)>>4)+model[1]; + mod_state = min (max (0, mod_state), 127); + ctx_state = (ctx->MPS ? 64+ctx->state : 63-ctx->state); + + xr -= weight * probability[ ctx_state] * entropy[ mod_state]; + xr -= weight * probability[127-ctx_state] * entropy[127-mod_state]; + + return xr; + } + + #define ADD_XRATE2(ii,jj,ctx,tab,num) \ + { \ + for (i=0; itype==I_SLICE) xr += XRate (&(ctx[i][j]), &(tab ## _I[num][i][j][0])); \ + else xr += XRate (&(ctx[i][j]), &(tab ## _P[num][i][j][0])); \ + } \ + } + #define ADD_XRATE1(jj,ctx,tab,num) \ + { \ + for (j=0; jtype==I_SLICE) xr += XRate (&(ctx[j]), &(tab ## _I[num][0][j][0])); \ + else xr += XRate (&(ctx[j]), &(tab ## _P[num][0][j][0])); \ + } \ + } + + + void GetCtxModelNumber (int* mnumber, MotionInfoContexts* mc, TextureInfoContexts* tc) + { + int model, j, i; + int num_models = (img->type==I_SLICE ? NUM_CTX_MODELS_I : NUM_CTX_MODELS_P); + double xr, min_xr = 1e30; + + for (model=0; modelmb_type_contexts, INIT_MB_TYPE, model); + ADD_XRATE2 (2, NUM_B8_TYPE_CTX, mc->b8_type_contexts, INIT_B8_TYPE, model); + ADD_XRATE2 (2, NUM_MV_RES_CTX, mc->mv_res_contexts, INIT_MV_RES, model); + ADD_XRATE2 (2, NUM_REF_NO_CTX, mc->ref_no_contexts, INIT_REF_NO, model); + ADD_XRATE1 ( NUM_DELTA_QP_CTX, mc->delta_qp_contexts, INIT_DELTA_QP, model); + ADD_XRATE1 ( NUM_MB_AFF_CTX, mc->mb_aff_contexts, INIT_MB_AFF, model); + ADD_XRATE1 ( NUM_TRANSFORM_SIZE_CTX, mc->transform_size_contexts, INIT_TRANSFORM_SIZE, model); + + //--- texture coding contexts --- + ADD_XRATE1 ( NUM_IPR_CTX, tc->ipr_contexts, INIT_IPR, model); + ADD_XRATE1 ( NUM_CIPR_CTX, tc->cipr_contexts, INIT_CIPR, model); + ADD_XRATE2 (3, NUM_CBP_CTX, tc->cbp_contexts, INIT_CBP, model); + ADD_XRATE2 (NUM_BLOCK_TYPES, NUM_BCBP_CTX, tc->bcbp_contexts, INIT_BCBP, model); + ADD_XRATE2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->map_contexts, INIT_MAP, model); + ADD_XRATE2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->last_contexts, INIT_LAST, model); + ADD_XRATE2 (NUM_BLOCK_TYPES, NUM_ONE_CTX, tc->one_contexts, INIT_ONE, model); + ADD_XRATE2 (NUM_BLOCK_TYPES, NUM_ABS_CTX, tc->abs_contexts, INIT_ABS, model); + ADD_XRATE2 (NUM_BLOCK_TYPES, NUM_MAP_CTX, tc->fld_map_contexts, INIT_FLD_MAP, model); + ADD_XRATE2 (NUM_BLOCK_TYPES, NUM_LAST_CTX, tc->fld_last_contexts, INIT_FLD_LAST, model); + + if (xrfield_picture; + int img_type = img->type; + int ctx_number = img->currentSlice->start_mb_nr / num_mb_per_slice; + + if( input->context_init_method ) + { + initialized [frame_field][img_type][ctx_number] = 1; + GetCtxModelNumber (model_number[frame_field][img_type]+ctx_number, img->currentSlice->mot_ctx, img->currentSlice->tex_ctx); + } + else + { + // do nothing + } + } + + + void update_field_frame_contexts (int field) + { + int i, j; + + if (field) + { + // set frame contexts + for (j=0; j>1]; + model_number[0][j][i] = model_number[1][j][i>>1]; + } + } + } + else + { + // set field contexts + for (j=0; j>1); i++) + { + initialized [1][j][i] = initialized [0][j][i<<1]; + model_number[1][j][i] = model_number[0][j][i<<1]; + } + } + } + } + Index: llvm-test/MultiSource/Applications/JM/lencod/context_ini.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/context_ini.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/context_ini.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,32 ---- + + /*! + ************************************************************************************* + * \file context_ini.h + * + * \brief + * CABAC context initializations + * + * \author + * Main contributors (see contributors.h for copyright, address and affiliation details) + * - Detlev Marpe + * - Heiko Schwarz + ************************************************************************************** + */ + + #ifndef _CONTEXT_INI_ + #define _CONTEXT_INI_ + + + void create_context_memory (); + void free_context_memory (); + + void init_contexts (); + void store_contexts (); + + void update_field_frame_contexts (int); + void update_rd_picture_contexts (int); + + void SetCtxModelNumber (); + + #endif + Index: llvm-test/MultiSource/Applications/JM/lencod/contributors.h diff -c /dev/null llvm-test/MultiSource/Applications/JM/lencod/contributors.h:1.1 *** /dev/null Sat Feb 11 04:33:42 2006 --- llvm-test/MultiSource/Applications/JM/lencod/contributors.h Sat Feb 11 04:33:22 2006 *************** *** 0 **** --- 1,212 ---- + + /*! \file + * contributors.h + * \brief + * List of contributors and copyright information. + * + * \par Copyright statements + \verbatim + H.264 JM coder/decoder + + Copyright (C) 2000 by + Telenor Broadband Services, Norway + Ericsson Radio Systems, Sweden + TELES AG, Germany + Nokia Inc., USA + Nokia Corporation, Finland + Siemens AG, Germany + Heinrich-Hertz-Institute for Communication Technology GmbH, Germany + University of Hannover, Institut of Communication Theory and Signal Processing,Germany + Videolocus, Canada + LSI Logic, Canada + Motorola Inc., USA + Microsoft Corp., USA + Apple Computer, Inc. + RealNetworks, Inc., USA + Thomson, Inc., USA + \endverbatim + \par Full Contact Information + \verbatim + + Lowell Winger + Guy C?t? + Michael Gallant + VideoLocus Inc. + 97 Randall Dr. + Waterloo, ON, Canada N2V1C5 + + Inge Lille-Lang?y + Telenor Broadband Services + P.O.Box 6914 St.Olavs plass + N-0130 Oslo, Norway + + Rickard Sjoberg + Ericsson Radio Systems