From brukman at cs.uiuc.edu Mon Apr 28 17:52:01 2003 From: brukman at cs.uiuc.edu (Michael Brukman) Date: Mon Apr 28 17:52:01 2003 Subject: [llvm-commits] CVS: llvm/www/www-index.html Message-ID: <200304282251.RAA20711@tank.cs.uiuc.edu> Changes in directory llvm/www: www-index.html updated: 1.20 -> 1.21 --- Log message: Removed generator=FrontPage (is this for real??) and added link to my page. --- Diffs of the changes: Index: llvm/www/www-index.html diff -u llvm/www/www-index.html:1.20 llvm/www/www-index.html:1.21 --- llvm/www/www-index.html:1.20 Sun Apr 27 22:40:20 2003 +++ llvm/www/www-index.html Mon Apr 28 17:50:56 2003 @@ -1,9 +1,7 @@ - - + The LLVM Compiler Infrastructure Project @@ -193,8 +191,9 @@ pass
  • Cameron Buschardt - Author of the mem2reg pass
  • -
  • Misha Brukman & Brian Gaeke - Portions of the X86 - Just-In-Time compiler
  • +
  • Misha Brukman + & Brian Gaeke - Portions of the X86 Just-In-Time + compiler
  • From jstanley at cs.uiuc.edu Tue Apr 29 13:31:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Tue Apr 29 13:31:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/InstManip.cpp InstManip.h Phases.cpp design.txt Message-ID: <200304291837.NAA23954@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: InstManip.cpp updated: 1.7 -> 1.8 InstManip.h updated: 1.8 -> 1.9 Phases.cpp updated: 1.12 -> 1.13 design.txt updated: 1.9 -> 1.10 --- Log message: Phase3-generated phase 4 slots now spill global registers properly. --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/InstManip.cpp diff -u llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.7 llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.8 --- llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.7 Tue Apr 15 16:26:19 2003 +++ llvm/lib/Reoptimizer/Inst/InstManip.cpp Tue Apr 29 13:36:53 2003 @@ -12,6 +12,7 @@ #include "InstManip.h" const unsigned InstManip::NOP_INST = 0x01000000; +uint64_t InstManip::sm_phase3SpillRegion[InstManip::SHARED_SIZE]; using std::cout; using std::cerr; @@ -154,16 +155,77 @@ "Unexpected number of instructions in code sequence for call"); } +// NB: Generate restore/save currently fill the snippet (which comes from a slot) with a +// bunch of code to save and restore the global registers. This blows up the size of the +// required slot quite a bit -- it would be better to generate a call to functions +// saveGlobalRegs() and restoreGlobalRegs(), for example. However, this works for now and +// writing those functions means determining what the inline assembly should look like. +// The ifdef'd-out region below is a start, but it is incomplete and generates errors at +// assembly time. In particular, the SPARC assembly requires a '.register' directive before +// it witnesses a use of %g2, %g3, %g6, or %g7, and that doesn't appear to be emitted simply +// by using the inline assembly. :( TODO. +// + +#if 0 +void restoreGlobRegs() +{ + // asm ("assembly template" : "output contraints", "input contraints") + // Restore the global registers %g[1-7] from the globalRegs array. + + asm("ldx %0, %%g1"::"o" (globalRegs)); + asm("ldx %0, %%g2"::"o" (globalRegs+1)); + asm("ldx %0, %%g3"::"o" (globalRegs+2)); + asm("ldx %0, %%g4"::"o" (globalRegs+3)); + asm("ldx %0, %%g5"::"o" (globalRegs+4)); + asm("ldx %0, %%g6"::"o" (globalRegs+5)); + asm("ldx %0, %%g7"::"o" (globalRegs+6)); +} +#endif + +void InstManip::generateRestoreShared(uint64_t restoreFromAddr, + std::vector& snippet, + TargetRegister reg) const +{ + generateLoad(restoreFromAddr, snippet, reg); + + unsigned destReg = (reg == REG_0) ? R_O0 : R_O1; + + snippet.push_back(MK_LOAD_IMM(R_G1, destReg, 8)); + snippet.push_back(MK_LOAD_IMM(R_G2, destReg, 16)); + snippet.push_back(MK_LOAD_IMM(R_G3, destReg, 24)); + snippet.push_back(MK_LOAD_IMM(R_G4, destReg, 32)); + snippet.push_back(MK_LOAD_IMM(R_G5, destReg, 40)); + snippet.push_back(MK_LOAD_IMM(R_G6, destReg, 48)); + snippet.push_back(MK_LOAD_IMM(R_G7, destReg, 56)); +} + void InstManip::generateRestore(std::vector& snippet) const { // restore %o0, 0, %o0 - snippet.push_back(MK_RESTORE(R_O0, R_O0, 0)); + snippet.push_back(MK_RESTORE_IMM(R_O0, R_O0, 0)); +} + +void InstManip::generateSpillShared(uint64_t spillToAddr, + std::vector& snippet, + TargetRegister reg) const +{ + generateLoad(spillToAddr, snippet, reg); + + unsigned destReg = (reg == REG_0) ? R_O0 : R_O1; + + snippet.push_back(MK_STORE_IMM(R_G1, destReg, 8)); + snippet.push_back(MK_STORE_IMM(R_G2, destReg, 16)); + snippet.push_back(MK_STORE_IMM(R_G3, destReg, 24)); + snippet.push_back(MK_STORE_IMM(R_G4, destReg, 32)); + snippet.push_back(MK_STORE_IMM(R_G5, destReg, 40)); + snippet.push_back(MK_STORE_IMM(R_G6, destReg, 48)); + snippet.push_back(MK_STORE_IMM(R_G7, destReg, 56)); } void InstManip::generateSave(std::vector& snippet) const { // save %o0, 0, %o0 - snippet.push_back(MK_SAVE(R_O0, R_O0, 0)); + snippet.push_back(MK_SAVE_IMM(R_O0, R_O0, 0)); } void InstManip::generateBranchAlways(uint64_t dest, Index: llvm/lib/Reoptimizer/Inst/InstManip.h diff -u llvm/lib/Reoptimizer/Inst/InstManip.h:1.8 llvm/lib/Reoptimizer/Inst/InstManip.h:1.9 --- llvm/lib/Reoptimizer/Inst/InstManip.h:1.8 Fri Apr 18 12:29:00 2003 +++ llvm/lib/Reoptimizer/Inst/InstManip.h Tue Apr 29 13:36:53 2003 @@ -43,6 +43,11 @@ return m_insts; } + const std::vector >& getInsts() const + { + return m_insts; + } + void push_back(uint64_t addr, unsigned inst) { m_insts.push_back(std::make_pair(addr, inst)); @@ -103,7 +108,15 @@ void generateRestore(std::vector& snippet) const; void generateSave(std::vector& snippet) const; + + void generateSpillShared(uint64_t spillFromAddr, + std::vector& snippet, + TargetRegister reg = REG_0) const; + void generateRestoreShared(uint64_t restorFromAddr, + std::vector& snippet, + TargetRegister reg = REG_0) const; + void generateBranchAlways(uint64_t dest, uint64_t slotBase, std::vector& snippet, @@ -120,16 +133,21 @@ // These are functions so when InstManip is superclassed, they'd become virtual, etc. // In the short term we could use class constants, but this is more clear. - unsigned getNOP() const { return NOP_INST; } - unsigned getGenLoadSize() const { return 6; } - unsigned getGenCallSize() const { return 2; } - unsigned getGenBranchAlwaysSize() const { return 2; } - unsigned getGenSaveSize() const { return 1; } - unsigned getGenRestoreSize() const { return 1; } - unsigned getInstWidth() const { return 4; } + unsigned getNOP() const { return NOP_INST; } + unsigned getGenLoadSize() const { return 6; } + unsigned getGenCallSize() const { return 2; } + unsigned getGenBranchAlwaysSize() const { return 2; } + unsigned getGenSaveSize() const { return 1; } + unsigned getGenSpillSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } + unsigned getGenRestoreSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } + unsigned getGenRestoreSize() const { return 1; } + unsigned getInstWidth() const { return 4; } + unsigned getSharedSize() const { return SHARED_SIZE; } inline unsigned getAddressCopySize(unsigned loadInst) const; + uint64_t getPhase3SpillAddr() { return (uint64_t) sm_phase3SpillRegion; } + private: InstManip() {} @@ -154,7 +172,18 @@ static const unsigned BRANCH_ALWAYS_BASE = 0x30480000; static const unsigned NOP_INST; + // Size (in number of 64-bit words) required for storing shared registers + static const unsigned SHARED_SIZE = 7; + VirtualMem* m_pVM; + + // Memory region into which to spill shared registers when executing a phase 4 slot + // (i.e., the slot that invokes the phase4 function, the slot written by phase 3 + // invocations). NB: One region is sufficient and we do not need stack semantics + // because only one activation of a phase 4 slot ever occurs at a given time (assuming + // single-threaded execution). + + static uint64_t sm_phase3SpillRegion[SHARED_SIZE]; }; void InstManip::printRange(uint64_t start, uint64_t end) const @@ -208,6 +237,5 @@ return 1; } - #endif // _INCLUDED_INSTMANIP_H Index: llvm/lib/Reoptimizer/Inst/Phases.cpp diff -u llvm/lib/Reoptimizer/Inst/Phases.cpp:1.12 llvm/lib/Reoptimizer/Inst/Phases.cpp:1.13 --- llvm/lib/Reoptimizer/Inst/Phases.cpp:1.12 Fri Apr 18 12:29:00 2003 +++ llvm/lib/Reoptimizer/Inst/Phases.cpp Tue Apr 29 13:36:53 2003 @@ -43,11 +43,13 @@ // 3c. In the new slot, write the contents of the phase 3 slot: // +---------------------------------------+ // | save registers | +// | save global registers | // | copy load-src addr to param1 register | // | load p4 struct ptr to param2 register | // | call to phase 4 | // | nop | // | restore registers | +// | restore global registers | // | branch back to orig code | // | nop | // +---------------------------------------+ @@ -56,6 +58,26 @@ // // PHASE 4: // +// 1. Examine the tag (i.e. load-src addr) passed by phase 3 +// 1a. If tag is in GBT, we have a valid candidate, so do step 2. +// 1b. If tag is not in GBT, our candidate is invalid, so delete slot and return to +// original code. +// +// 2. Set up the second phase 4 slot that will actually call the instrumentation function: +// +---------------------------------------+ +// | save registers | +// | save global registers | +// | call to inst func | +// | nop | +// | restore registers | +// | restore global registers | +// | branch back to orig code | +// | nop | +// +---------------------------------------+ +// This "instrumentation slot" may have to be expanded later to store the return value +// in an alloca'd temporary, unless the phase4 function itself can invoke the +// instrumentation function, would be *highly* ideal. +// #include #include @@ -79,11 +101,13 @@ // obtained in the same manner. extern unsigned ppGBTSize; -extern struct PrimInfo { +typedef struct PrimInfo { unsigned gbtType; unsigned short* loadVar; unsigned gbtStartIdx; -} ppGBT[]; +}; + +extern PrimInfo ppGBT[]; typedef std::pair AddressRange; @@ -184,8 +208,8 @@ }; // Phase3 is the class that is responsible for making the "phase 3" transformation; the -// global function phase3() is responsible for constructing a one Phase3 instance per -// invocation and for deallocating the originating slot. +// global function phase3() is responsible for constructing one Phase3 instance per +// invocation and invoking transform on it. class Phase3 { @@ -206,6 +230,28 @@ InstManip m_instManip; }; +// Phase4 is the class that is responsible for making the "phase 4" transformation; the +// global function phase4() is responsible for constructing one Phase4 instance per +// invocation and invoking transform on it. + +class Phase4 +{ + public: + Phase4(uint64_t tag, Phase4Info* p4info); + ~Phase4(); + + void transform(); + + private: + Phase4(): m_instManip(0) {} + + inline unsigned getSlotSize() const; + + Phase4Info* m_pPhase4Info; + TraceCache* m_pTraceCache; + InstManip m_instManip; + uint64_t m_tag; // Entry to look for in the GBT +}; //////////////// Phase 2 implementation //////////////// @@ -250,12 +296,12 @@ static void copySnippetToSlot(vector& snippet, uint64_t slotBase, VirtualMem* vm, - InstManip* im) + InstManip& im) { uint64_t currAddr = slotBase; for(vector::iterator i = snippet.begin(), e = snippet.end(); i != e; ++i) { vm->writeInstToVM(currAddr, *i); - currAddr += im->getInstWidth(); + currAddr += im.getInstWidth(); } } @@ -289,7 +335,7 @@ // Copy the snippet code into the slot assert(snippet.size() == getSlotSize() && "Snippet size does not match slot size"); - copySnippetToSlot(snippet, slotBase, vm, &m_instManip); + copySnippetToSlot(snippet, slotBase, vm, m_instManip); } unsigned Phase2::getSlotSize() const @@ -344,6 +390,21 @@ delete m_pPhase3Info; } +static uint64_t replaceInstWithBrToSlot(uint64_t srcAddr, + unsigned slotSize, + TraceCache* tc, + InstManip& im) +{ + // Obtain a new slot of the given size + uint64_t slotBase = tc->getMemMgr()->getMemory(slotSize); + assert(slotBase && "Unable to obtain memory from MemoryManager instance"); + + // Replace instruction at srcAddr with branch to start of new slot + tc->getVM()->writeInstToVM(srcAddr, im.getBranchAlways(slotBase, srcAddr)); + + return slotBase; +} + void Phase3::processCandidates(vector& candidates) { // For each load candidate, obtain a new slot and write the phase 3 slot region @@ -352,6 +413,7 @@ for(vector::iterator i = candidates.begin(), e = candidates.end(); i != e; ++i) { cerr << "Transforming " << *i << endl; +#if 0 uint64_t slotBase = m_pTraceCache->getMemMgr()->getMemory(getSlotSize(*i)); assert(slotBase && "Unable to obtain memory from MemoryManger instance"); @@ -359,6 +421,10 @@ VirtualMem* vm = m_pTraceCache->getVM(); uint64_t loadAddr = i->front().first; vm->writeInstToVM(loadAddr, m_instManip.getBranchAlways(slotBase, loadAddr)); +#endif + // Replace load candidate instruction with a branch to the start of a new slot. + uint64_t slotBase = replaceInstWithBrToSlot(i->front().first, getSlotSize(*i), + m_pTraceCache, m_instManip); // Generate a) code to save the registers, b) instruction(s) to store the load // source address into a phase4 parameter register, c) the load of (the @@ -368,11 +434,15 @@ Phase4Info* p4info = new Phase4Info(*i, slotBase, getSlotSize(*i), m_pTraceCache); + uint64_t spillAddr = m_instManip.getPhase3SpillAddr(); + vector snippet; m_instManip.generateSave(snippet); - m_instManip.generateAddressCopy(i->front().second, snippet); + m_instManip.generateAddressCopy(i->front().second, snippet); // Uses InstManip::REG_0, live to call + m_instManip.generateSpillShared(spillAddr, snippet, InstManip::REG_1); m_instManip.generateLoad((uint64_t) p4info, snippet, InstManip::REG_1); m_instManip.generateCall((uint64_t) &phase4, slotBase, snippet); + m_instManip.generateRestoreShared(spillAddr, snippet); m_instManip.generateRestore(snippet); m_instManip.generateBranchAlways(i->front().first, slotBase, snippet); @@ -387,7 +457,7 @@ // Copy the snippet code into the slot assert(snippet.size() == getSlotSize(*i) && "Snippet size does not match slot size"); - copySnippetToSlot(snippet, slotBase, vm, &m_instManip); + copySnippetToSlot(snippet, slotBase, m_pTraceCache->getVM(), m_instManip); // just one candidate for now break; @@ -401,8 +471,10 @@ return m_instManip.getGenSaveSize() + m_instManip.getAddressCopySize(cand.front().second) + + m_instManip.getGenSpillSharedSize() + m_instManip.getGenLoadSize() + m_instManip.getGenCallSize() + + m_instManip.getGenRestoreSharedSize() + m_instManip.getGenRestoreSize() + m_instManip.getGenBranchAlwaysSize(); } @@ -422,33 +494,120 @@ //////////////// Phase4 implementation //////////////// -void phase4(uint64_t tag, Phase4Info* p4info) +void phase4(uint64_t tag, Phase4Info* p4info) { - cerr << "phase4 invoked!" << endl; + cerr << "phase 4 fcn, tag is " << tag << endl; + Phase4 p4(tag, p4info); + p4.transform(); +} - cerr << "tag is " << std::hex << tag << endl; +Phase4::Phase4(uint64_t tag, Phase4Info* p4info): + m_pPhase4Info(p4info), + m_pTraceCache(p4info->getTraceCache()), + m_instManip(p4info->getTraceCache()->getVM()), + m_tag(tag) +{ + cerr << "phase4 ctor: tag is " << tag << endl; + cerr << "================ Begin Phase 4 ================\n"; +} - cerr << "inst candidate inside info structure is: " << endl; - cerr << p4info->getCandidate() << endl; +Phase4::~Phase4() +{ + // Deallocate the originating slot (i.e. the slot that invoked us). + // + // NB: Yes, we are, in fact, deallocating a memory segment (i.e., the slot obtained by + // the TraceCache's MemoryManager instance) before returning to it. This is not a + // problem for single-threaded codes, because no threads may claim that memory and + // write to it. However, it does indeed pose a problem for multi-threaded codes. A + // modification to the general mechanism itself is required to achieve thread-safety. - // (TEMP) For now, restore the candidate load to its original position for debugging - // purposes. + uint64_t slotBase = m_pPhase4Info->getSlot(); + unsigned slotSize = m_pPhase4Info->getSlotSize(); + m_pTraceCache->getMemMgr()->freeTraceMemory(slotBase, slotSize); - p4info->getTraceCache()->getVM()->writeInstToVM(p4info->getCandidate().front().first, - p4info->getCandidate().front().second); - delete p4info; + // Deallocate the parameter structure + delete m_pPhase4Info; +} - cerr << "ppGBT is: " << ppGBT << endl; - cerr << "ppGBTSize is: " << ppGBTSize << endl; +static void dumpGBT(std::ostream& ostr) +{ + ostr << "ppGBT is: " << ppGBT << endl; + ostr << "ppGBTSize is: " << ppGBTSize << endl; - for(int i = 0; i < ppGBTSize; ++i) { - cerr << "ppGBT[" << i << "]: " << ppGBT[i].gbtType << ", " + for(unsigned i = 0; i < ppGBTSize; ++i) { + ostr << "ppGBT[" << i << "]: " << ppGBT[i].gbtType << ", " << ppGBT[i].loadVar << ", " << ppGBT[i].gbtStartIdx << endl; } - - // tmp - if(tag == (uint64_t)(ppGBT[0].loadVar)) { - cerr << "TAG MATCHES, BOYYYYYYYYYYY!" << endl; +} + +static PrimInfo* searchGBT(uint64_t tag) +{ + // Traverse the GBT and determine if the tag is there. + for(unsigned i = 0; i < ppGBTSize; ++i) { + uint64_t tagInTable = (uint64_t) ppGBT[i].loadVar; + if(tagInTable == tag) + return &ppGBT[i]; } - // tmp + return 0; +} + +void fakeInstFunc(double* param) +{ + cerr << "I AM AN INSTRUMENTATION FUNCTION, FEAR ME!" << endl; + *param = 3.14; +} + +void Phase4::transform() +{ + cerr << "tag is " << m_tag << endl; + dumpGBT(cerr); + + if(PrimInfo* pi = searchGBT(m_tag)) { + cerr << "Tag matches." << endl; + + const InstCandidate& cand = m_pPhase4Info->getCandidate(); +#if 0 + // Make a new slot that calls the instrumentation function, inserting a branch to + // it over the original code. + + uint64_t slotBase = replaceInstWithBrToSlot(cand.front().first, getSlotSize(), + m_pTraceCache, m_instManip); +#endif + + // Write NOPs over the original instructions that were associated with the elected + // candidate, but leave the branch instruction intact. + + VirtualMem* vm = m_pTraceCache->getVM(); + for(vector >::const_iterator i = cand.getInsts().begin() + 1, + e = cand.getInsts().end(); i != e; ++i) + vm->writeInstToVM(i->first, m_instManip.getNOP()); + + // Write the instructions to call the instrumentation function + + void* instFuncVP = (void*) fakeInstFunc; // From the GBT eventually + void (*instFunc)(void*) = (void (*)(void*)) instFuncVP; + + void* mem = malloc(sizeof(double)); + instFunc(mem); + printf("%f\n", *((double*) mem)); + free(mem); + } + else { + cerr << "Could not find tag" << endl; + // The candidate failed to get elected, so pack up and go home. Restore the + // replaced instruction (i.e. the branch that invoked this code) with the original + // instruction at that location. + + VirtualMem* vm = m_pPhase4Info->getTraceCache()->getVM(); + vm->writeInstToVM(m_pPhase4Info->getCandidate().front().first, + m_pPhase4Info->getCandidate().front().second); + } + + // (TEMP) For now, restore the candidate load to its original position for debugging + // purposes. + + m_pPhase4Info->getTraceCache()->getVM()->writeInstToVM(m_pPhase4Info->getCandidate().front().first, + m_pPhase4Info->getCandidate().front().second); + + cerr << "================ End Phase 4 ================\n"; } Index: llvm/lib/Reoptimizer/Inst/design.txt diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.9 llvm/lib/Reoptimizer/Inst/design.txt:1.10 --- llvm/lib/Reoptimizer/Inst/design.txt:1.9 Fri Apr 18 12:29:00 2003 +++ llvm/lib/Reoptimizer/Inst/design.txt Tue Apr 29 13:36:53 2003 @@ -886,12 +886,15 @@ {{{ TODO - - Investigate trace-cache dummy function mechanisms, decide on approach A or B - in phase outline + - Get phase 2 allocation of spill space working, write spill code (to spill space) for + phase 3 invocation. (Currently NO spilling is being done, which is not safe) - - Implement phase outline + - Ensure phase 3 writes proper spill code for phase 4 invocation. (One spill space + should be sufficient) - - Read EEL paper to get a better feel for binary modification issues + - Start table-of-stacks implementation for phase4 authorship of phase 5 slots. + + - Write phase 5 slot generation code, phase 5 function itself, etc. }}} @@ -1006,13 +1009,10 @@ Approach A: 3e. Write phase 4 code in slot: - if(actually an instrumentation site) - rewrite branch at C to next instruction - call proper instrumentation fnction <- C branches to here - branch back to C - else - restore original instructions - branch back to C + + Load address being loaded by candidate load instruction. + Call phase 4 function + branch back to C Approach B: @@ -1025,7 +1025,134 @@ restore original instructions branch back to C - In phase 4: No special action needed. + In phase 4: + + Actions of phase 4 function. + + 1. Check tag to verify GBT membership. If not found in GBT, do nothing besides + return to the origial code, etc. + + 2. Assuming tag is valid, we must decide between one of two approaches at this + juncture: + + a) Try to invoke the instrumentation function directly from within phase 4. + b) Write code in yet another slot that will invoke the inst function. + + The primary problem to solve in both of these approaches is how to allocate space for + values that are stored to / read from by the instrumentation function. That is, for + point metrics, we must construct the semantic equivalent of a function call like: + + foo = someInstFunc(); + + where storage for foo has already been allocated by phase 1 (hence we can store its + address in the GBT). However, we know nothing about the *type* of the return value, + only its size. We must determine the conventions of the call mechanisms for passing + back large (i.e. bigger than a register size) objects by value. We can call + someInstFunc easily enough, but we must know how to write the code (using either + approach a or b above) to take the return value of the function and store it to the + metric variable. Phase 1 can store the address and the size of this variable, so it + should be simple enough to take the returned-by-value return value of the + instrumentation function and perform a memcpy to the appropriate location. This works + fine for point metrics, but the problem is worsened significantly by region metrics, + because we must have a temporary value around to store the return value of the start + function and pass it by address to the end function...this probably has to be + accomplished via heap storage -- we had previous thought that alloca would be + sufficient, but I don't think this works (the runtime stack is manipulated in between + the time that the alloca'd variable would be stored to and the time that it would be + read, in the case of interval metrics. + + Looks like we're going to have to do everything from the standpoint of parameter sizes, + memcpy's, and heap-allocated temporaries. The only way I can think of to do this in a + straightforward manner is to use the phase 4 function itself to do the call to the inst + function and the subsequent memcpy. But then we must compile the call to the inst + function, and we don't know what type it returns, because this information is not + preserved. If it returns, say, a scalar double, how do we store this value in a + temporary and copy it to the metric variable (in the case of point metrics, for + example)? The only thing I can think of at present is to change the signature + conventions...instead of an instrumentation function returning stuff by value, it is + instead passed a pointer parameter. Since we will know the sizes of the types from + phase 1, we can always heap-allocated the appropriately-sized parameter and pass this + raw pointer in to be used by the function as appropriate...but what kinds of problems + can be caused here? This is a good topic for conversation with Vikram...talked to + Vikram, and the problem is worse that I had originally thought. Heap allocation isn't + really an option because we would have to have one heap alloc/dealloc per interval + invocation, which is just too expensive. + + A more accurate assessment of the problem. + + We must have a stack-oriented way of saving temporary values between the start interval + function and the end interval function. We had thought that we could do this via + alloca (i.e. manipulation of the stack pointer to obtain new space). However, the only + way this can occur is if we use the current stack frame. Let's say that we want to + allocate n slots (i.e. n * 8 bytes). Then, we would do: + + %sp = %sp + (n * 8) + %reg = %sp + B + X + + Where reg is just some register (we must spill/restore it before we clobber it here), + and X is the offset from %sp + B to the location on the stack where the newly-alloca'd + region is to start. This must be "lower" than any previous allocas but "higher" than + the end of the contents starting at %sp + B (B is the bias). According to the SparcV9 + ABI, the size of X is equal to 128 bytes (for register spills) + 48 bytes (6 outgoing + registers, each with extended word size) + Q, where Q is the space required for "extra + outgoing arguments", that is, arguments to functions beyond the 6th. Q is equal to the + greatest number of parameters of any function call within the function body associated + with the stack frame (-6, or 0 if no call has parameter width exceeding 6). For + example, if S is the stack frame associated with a particular invocation of the + function foo, and foo called some function, bar, that took 10 parameters, and no other + function was called by foo that had greater than 10 parameters, Q would exactly equal + 4. + + The problem is that, although the value of Q is known at compile-time, determining it + during phase 1 is premature (the vendor compiler may arbitarily add arguments to + functions, for example), and determining it at runtime (on the assembly code itself) is + quite possibly not feasible (indirect functions, no way to really determine what are + parameters and what are not, etc). We currently do not have a way to obtain this value, + and so an attempt to solve this problem using the alloca approach must be abandoned. + + One easy solution that presents itself is to do heap-based stuff, but this is very + inefficient and also quite expensive. + + Idea: Manage a stack on the side. The objection to this is that it involves extra + function calls. + + In the meantime, in the interests of making forward progress, can we do anything with + heap allocation? Remember that we need a stack region into which we can spill the + global & FP registers, as well as the data between start- and end-interval functions. + + A note about saving/restoring the global and FP registers, we know that the slot + created by phase 4 executes only once. Hence, it is valid to have phase 3 heap-allocate + a region large enough to spill the registers (the spill code would be placed in the + slot that calls the phase 4 function) and restore the registers. The phase 4 function + would have to deallocate this heap region, which means that the epilogue in the phase 4 + slot would be restoring the registers from a deleted chunk. Or, a call to free the + chunk could be placed in the slot itself. This is really the same problem as slot + deallocation in general, and shouldn't be a problem in single-threaded codes. However, + we must determine a mechanism by which heap allocation can occur for the register + spills, and the use of the allocated regions must correspond on a per-invocation basis + (i.e. stack semantics) appropriately. One idea is to use a "one-off" approach -- for + example, the phase 4 function would heap-allocate a spill region (R) to be used by the + first "real" invocation of the instrumentation (phase 5?). Each phase 5 invocation + would spill to and restore from region R, and would allocate a heap region (R') to be + used by the next invocation of the instrumentation. Of course, there'd have to be a new + slot created that would spill to this new region, etc. The regions (heap and slot) + could only be recycled as the call stack was popped. This is so gross I don't think + that it is an option. So, spilling the global and FP registers is even more of a + problem than the data transfer between the start- and end-function invocations, and I + think we have to go back to a global stack approach. + + Phase 4 initially creates a (large) heap region which will act as the global stack. It + writes the phase-5 slot to use this address to spill to, and the stack base is held + onto somehow. The phase 5 slot spills to the current stack pointer, and invokes the + phase 5 function. The phase 5 function will allocate space at stack pointer + regsave + size for whatever data needs to pass between the start- and end- functions. A pointer + to the start of the storage region for start- function is passed into the function, + etc, and the OFFSET FROM THE STACK POINTER is stored in the field in the INTERVAL_START + record. The phase 5 slot (after the call to the phase 5 function) restores from the + current stack pointer, but *does not change the stack pointer*. The phase 5 invocation + would also reallocate the stack space if it detected that more space was needed + (important but not vital for the prototype implementation -- it can be "big enough" in + the initial implementation). See handwritten notes for more detail. {{{ Notes on using the total-copy approach in the prototype implementation. From jstanley at cs.uiuc.edu Tue Apr 29 14:43:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Tue Apr 29 14:43:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/ElfReader.cpp ElfReader.h Phases.cpp Message-ID: <200304291948.OAA24570@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: ElfReader.cpp updated: 1.4 -> 1.5 ElfReader.h updated: 1.3 -> 1.4 Phases.cpp updated: 1.13 -> 1.14 --- Log message: Restricted symtable search to those functions whose symbols are associated with the section labelled '.text', which is fixed according to the ELF spec. --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/ElfReader.cpp diff -u llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.4 llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.5 --- llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.4 Thu Apr 10 18:23:58 2003 +++ llvm/lib/Reoptimizer/Inst/ElfReader.cpp Tue Apr 29 14:48:29 2003 @@ -28,6 +28,8 @@ using std::cerr; using std::endl; +const std::string ElfReader::sm_codeSegmentName = ".text"; + ElfReader::ElfReader(const char* execName): m_execFD(-1), m_elfDes(0), @@ -46,11 +48,11 @@ m_elfHdr = elf64_getehdr(m_elfDes); assert(m_elfHdr && "Failed to obtain valid ELF header"); - LocateSymbolTable(); + LocateSections(); // Obtain pointer to string table associated with the symbol table Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_symTab->sh_link), 0); - assert(strTabHand && "Couldn't obtain ELF data handle to string table"); + assert(strTabHand && "Couldn't obtain ELF data handle to symbol-name string table"); m_strTab = (char*) strTabHand->d_buf; // Determine the size of each entry and the number of entries in the table @@ -85,23 +87,34 @@ rdcnt = read(m_execFD, &sym + rdcnt, m_entrySize); } while(rdcnt < m_entrySize); - // If it is a function, extract name, extents, and return + // If it is a function in the code segment, extract name, extents, and return. if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits - fname = m_strTab + sym.st_name; - range.first = sym.st_value; - range.second = sym.st_value + sym.st_size - 4; - return true; + if(sym.st_shndx == m_codeSectionIdx) { + fname = m_strTab + sym.st_name; + range.first = sym.st_value; + range.second = sym.st_value + sym.st_size - 4; + return true; + } } } return false; } -void ElfReader::LocateSymbolTable() +void ElfReader::LocateSections() { - // Examine the section header of each section, looking for the symbol table. When - // found, ensure that it is unique, otherwise, assert out. - + // Obtain the base pointer to the string table containing the names of the + // sections in the file. + + Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_elfHdr->e_shstrndx), 0); + assert(strTabHand && "Couldn't obtain ELF data handle to section-name string table"); + char* secNameTable = (char*) strTabHand->d_buf; + + // Examine the section header of each section, looking for: + // a) The symbol table. When found ensure that it is unique, otherwise, assert out. + // b) The code segment (i.e. the segment which has the spec-defined name ".text" + + bool codeSegmentFound = false; Elf64_Shdr* secHdr; for(Elf_Scn* currScn = 0; (currScn = elf_nextscn(m_elfDes, currScn)); ) { secHdr = elf64_getshdr(currScn); @@ -112,10 +125,23 @@ assert(!m_symTab && "Should only be one symbol table in the image"); m_symTab = secHdr; } + else if(SHT_PROGBITS == secHdr->sh_type) { + if(!codeSegmentFound) { + // Found section marked as "program-defined". Obtain section name and + // see if it matches the name of the code segment. + + char* sectionName = secNameTable + secHdr->sh_name; + if(sm_codeSegmentName == sectionName) { + m_codeSectionIdx = elf_ndxscn(currScn); + codeSegmentFound = true; + } + } + } // NB: May need to look for SHT_DYNSYM here later on. } assert(m_symTab && "Couldn't locate symbol table (stripped executable?)"); + assert(codeSegmentFound && "Couldn't locate code segment"); } ElfReader::~ElfReader() Index: llvm/lib/Reoptimizer/Inst/ElfReader.h diff -u llvm/lib/Reoptimizer/Inst/ElfReader.h:1.3 llvm/lib/Reoptimizer/Inst/ElfReader.h:1.4 --- llvm/lib/Reoptimizer/Inst/ElfReader.h:1.3 Tue Apr 8 22:35:57 2003 +++ llvm/lib/Reoptimizer/Inst/ElfReader.h Tue Apr 29 14:48:29 2003 @@ -21,7 +21,7 @@ private: ElfReader() {} - void LocateSymbolTable(); + void LocateSections(); int m_execFD; // Executable FD Elf* m_elfDes; // ELF descriptor @@ -31,6 +31,9 @@ Elf64_Xword m_numEntries; Elf64_Xword m_entriesProcessed; char* m_strTab; + unsigned m_codeSectionIdx; // Section index of code section + + static const std::string sm_codeSegmentName; }; #endif // _INCLUDED_ELFREADER_H Index: llvm/lib/Reoptimizer/Inst/Phases.cpp diff -u llvm/lib/Reoptimizer/Inst/Phases.cpp:1.13 llvm/lib/Reoptimizer/Inst/Phases.cpp:1.14 --- llvm/lib/Reoptimizer/Inst/Phases.cpp:1.13 Tue Apr 29 13:36:53 2003 +++ llvm/lib/Reoptimizer/Inst/Phases.cpp Tue Apr 29 14:48:29 2003 @@ -205,8 +205,14 @@ TraceCache* m_pTraceCache; InstManip m_instManip; + + static uint64_t* sm_pSpillRegion; // Base pointer to the spill region for phase 3 invocations + static uint64_t* sm_pCurrSpill; // Pointer to current location in the spill region }; +uint64_t* Phase2::sm_pSpillRegion = 0; +uint64_t* Phase2::sm_pCurrSpill = 0; + // Phase3 is the class that is responsible for making the "phase 3" transformation; the // global function phase3() is responsible for constructing one Phase3 instance per // invocation and invoking transform on it. @@ -279,13 +285,20 @@ std::string funcName; AddressRange range; - while(elfReader.GetNextFunction(funcName, range)) { - if(funcName == "fibs") { - //cerr << "Printing information about function " << funcName << endl; - //m_instManip.printRange(range.first, range.second); - - cerr << "Transforming function " << funcName << "..." << endl; - transformFunction(range); + // TODO: Come up with a better way to do this that doesn't involve storing the entire + // list of functions here -- this could be quite large. + + vector > funcs; + while(elfReader.GetNextFunction(funcName, range)) + funcs.push_back(std::make_pair(funcName, range)); + + cerr << "There are " << funcs.size() << " functions to process." << endl << endl; + + for(vector >::iterator i = funcs.begin(), + e = funcs.end(); i != e; ++i) { + if(i->first == "fibs") { + cerr << "Transforming function " << i->first << "..." << endl; + transformFunction(i->second); } } @@ -413,15 +426,6 @@ for(vector::iterator i = candidates.begin(), e = candidates.end(); i != e; ++i) { cerr << "Transforming " << *i << endl; -#if 0 - uint64_t slotBase = m_pTraceCache->getMemMgr()->getMemory(getSlotSize(*i)); - assert(slotBase && "Unable to obtain memory from MemoryManger instance"); - - // Replace load candidate instruction with a branch to start of slot. - VirtualMem* vm = m_pTraceCache->getVM(); - uint64_t loadAddr = i->front().first; - vm->writeInstToVM(loadAddr, m_instManip.getBranchAlways(slotBase, loadAddr)); -#endif // Replace load candidate instruction with a branch to the start of a new slot. uint64_t slotBase = replaceInstWithBrToSlot(i->front().first, getSlotSize(*i), m_pTraceCache, m_instManip); From lattner at cs.uiuc.edu Tue Apr 29 17:16:00 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Tue Apr 29 17:16:00 2003 Subject: [llvm-commits] CVS: llvm/www/www-index.html Message-ID: <200304292215.RAA28219@tank.cs.uiuc.edu> Changes in directory llvm/www: www-index.html updated: 1.21 -> 1.22 --- Log message: Add DSA tech report Kill off all of the "'s --- Diffs of the changes: Index: llvm/www/www-index.html diff -u llvm/www/www-index.html:1.21 llvm/www/www-index.html:1.22 --- llvm/www/www-index.html:1.21 Mon Apr 28 17:50:56 2003 +++ llvm/www/www-index.html Tue Apr 29 17:14:49 2003 @@ -101,31 +101,40 @@
      -
    1. "LLVM: +
    2. "Data + Structure Analysis: An Efficient Context-Sensitive + Heap Analysis"
      + Chris Lattner & Vikram Adve, Technical + Report #UIUCDCS-R-2003-2340, Computer + Science Dept., Univ. of Illinois, + Apr. 2003.
      +
    3. +
    4. "LLVM: An Infrastructure for Multi-Stage - Optimization"
      + Optimization"
      Chris Lattner. Masters Thesis, Dec. 2002
    5. -
    6. "Ensuring +
    7. "Ensuring Code Safety Without Runtime Checks for - Real-Time Control Systems"
      + Real-Time Control Systems"
      Sumant Kowshik, Dinakar Dhurjati, and Vikram Adve. Proc. Int’l Conf. on Compilers, Architecture and Synthesis for Embedded Systems (CASES02), Grenoble, France, Oct. 2002.
    8. -
    9. ""The LLVM Instruction Set and Compilation - Strategy"
      + Strategy"
      Chris Lattner & Vikram Adve, Technical Report #UIUCDCS-R-2002-2292, Computer Science Dept., Univ. of Illinois, Aug. 2002.
    10. -
    11. ""Automatic Pool Allocation for Disjoint Data - Structures,"
      + Structures,"
      Chris Lattner & Vikram Adve, ACM SIGPLAN Workshop on Memory System @@ -285,8 +294,7 @@ - Finished providing initial information about LLVM, we now - consider the site to have "gone - public". + consider the site to have "gone public". Jun 28th, 2002 From jstanley at cs.uiuc.edu Tue Apr 29 21:03:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Tue Apr 29 21:03:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/InstManip.cpp InstManip.h Phases.cpp design.txt Message-ID: <200304300208.VAA26163@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: InstManip.cpp updated: 1.8 -> 1.9 InstManip.h updated: 1.9 -> 1.10 Phases.cpp updated: 1.14 -> 1.15 design.txt updated: 1.10 -> 1.11 --- Log message: * A logical -> actual register mapping mechanism exists * Calling conventions are now adhered to * Slots in the tracecache now obtain a new stack frame --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/InstManip.cpp diff -u llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.8 llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.9 --- llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.8 Tue Apr 29 13:36:53 2003 +++ llvm/lib/Reoptimizer/Inst/InstManip.cpp Tue Apr 29 21:08:42 2003 @@ -12,6 +12,9 @@ #include "InstManip.h" const unsigned InstManip::NOP_INST = 0x01000000; +const unsigned InstManip::BRANCH_ALWAYS_BASE = 0x10480000; +const unsigned InstManip::BRANCH_ALWAYS_BASE_ANNUL = 0x30480000; +const unsigned InstManip::BIAS = 2047; uint64_t InstManip::sm_phase3SpillRegion[InstManip::SHARED_SIZE]; using std::cout; @@ -43,7 +46,40 @@ ostr << "}"; } -void InstManip::printRange(unsigned* start, unsigned* end) const +InstManip::InstManip(VirtualMem* vm): + m_pVM(vm), + m_pCurrSnippet(0) +{ + assert(vm && "InstManip requires valid VirtualMem instance"); + + // Populate logical->actual register map. Since this InstManip class is + // SparcV9-specific, we map to the values used by the BinInterface library and macros. + + m_logicalToActualReg[REG_0] = R_O0; + m_logicalToActualReg[REG_1] = R_O1; + m_logicalToActualReg[REG_2] = R_O2; + + // Populate output->input register map. This is SparcV9 specific and corresponds to + // the register mapping that occurs after a 'save' instruction is issued. Shared and + // local registers map to themselves. + + m_outputToInputReg[R_O0] = R_I0; + m_outputToInputReg[R_O1] = R_I1; + m_outputToInputReg[R_O2] = R_I2; + m_outputToInputReg[R_O3] = R_I3; + m_outputToInputReg[R_O4] = R_I4; + m_outputToInputReg[R_O5] = R_I5; + m_outputToInputReg[R_O6] = R_I6; + m_outputToInputReg[R_O7] = R_I7; + + for(unsigned i = R_G0; i <= R_G7; ++i) + m_outputToInputReg[i] = i; + for(unsigned i = R_L0; i <= R_L7; ++i) + m_outputToInputReg[i] = i; +} + +void InstManip::printRange(unsigned* start, + unsigned* end) const { // Dumps contents (and corresponding disassembly) of memory range given by range // to stdout. TODO: Parameterize by an ostream instance; cannot do this yet @@ -75,57 +111,60 @@ } void InstManip::generateLoad(uint64_t value, - std::vector& snippet, - TargetRegister reg) const + LogicalRegister dest, + LogicalRegister tmp) { // When reg == REG_0, load the 64-bit value into %o0, using %o0 and %o1. // When reg == REG_1, load the 64-bit value into %o1, using %o1 and %o2. // The sequence of instructions is placed into the provided instruction vector. + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + assert(dest != tmp && "Distinct logical registers required"); + std::vector& snippet = *m_pCurrSnippet; + unsigned initSize = snippet.size(); - unsigned destReg, tmpReg; - switch(reg) { - case REG_0: - destReg = R_O0; - tmpReg = R_O1; - break; - case REG_1: - destReg = R_O1; - tmpReg = R_O2; - break; - default: - assert(0 && "Invalid destination register"); - } + unsigned destReg = m_logicalToActualReg[dest]; + unsigned tmpReg = m_logicalToActualReg[tmp]; - // sethi (upper 22b of upper wrd), %o0 + // sethi (upper 22b of upper wrd), %destReg snippet.push_back(MK_SETHI(destReg, HIGH22(HIGHWORD(value)))); - // or %o0, (lower 10b of upper wrd), %o0 + // or %o0, (lower 10b of upper wrd), %destReg snippet.push_back(MK_LOGIC_IMM(OP3_OR, destReg, destReg, LOW10(HIGHWORD(value)))); - // sllx %o0, 32, %o0 + // sllx %o0, 32, %destReg snippet.push_back(MK_SHIFTX(OP3_SLL, destReg, destReg, 32)); - // sethi (upper 22b of lwr wrd), %o1 + // sethi (upper 22b of lwr wrd), %tmpReg snippet.push_back(MK_SETHI(tmpReg, HIGH22(LOWWORD(value)))); - // or %o0, %o1, %o0 + // or %destReg, %tmpReg, %destReg snippet.push_back(MK_LOGIC(OP3_OR, destReg, destReg, tmpReg)); - // add %o0, (lwr 10b of lwr wrd), %o0 + // add %destReg, (lwr 10b of lwr wrd), %destReg snippet.push_back(MK_ADD_R_I(destReg, destReg, LOW10(LOWWORD(value)))); assert(snippet.size() - initSize == getGenLoadSize() && - "Unexpected number of instructions in code sequence for 64-bit value -> %destReg"); + "Unexpected number of instructions in code sequence for 64-bit value -> %dest"); } void InstManip::generateAddressCopy(unsigned loadInst, - std::vector& snippet, - TargetRegister reg) const + LogicalRegister dest, + bool afterSave) { - unsigned destReg = (reg == REG_0) ? R_O0 : R_O1; + // NB: After save instruction has been issued, the output registers are mapped to the + // input registers. + + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + unsigned destReg = m_logicalToActualReg[dest]; unsigned rs1 = RD_FLD(loadInst, INSTR_RS1); - + + if(afterSave) + rs1 = m_outputToInputReg[rs1]; + if(RD_FLD(loadInst, INSTR_I)) { // Case 1: load is immediate-valued --> reg, imm value add instruction needed unsigned imm = RD_FLD(loadInst, INSTR_SIMM13); @@ -134,14 +173,38 @@ else { // Case 2: load is register-valued --> reg, reg add instruction needed unsigned rs2 = RD_FLD(loadInst, INSTR_RS2); + + if(afterSave) + rs2 = m_outputToInputReg[rs2]; + snippet.push_back(MK_ADD_R_R(destReg, rs1, rs2)); } + + assert(snippet.size() - initSize == getGenAddressCopySize(loadInst) && + "Unexpected number of instructions in code sequence for address copy"); +} + +void InstManip::generateParamStore(LogicalRegister src, + StackOffset off) +{ + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + unsigned srcReg = m_logicalToActualReg[src]; + + snippet.push_back(MK_STX_STACK(srcReg, BIAS + off)); + + assert(snippet.size() - initSize == getGenParamStoreSize() && + "Unexpected number of instructions in code sequence for parameter store"); } void InstManip::generateCall(uint64_t dest, - uint64_t slotBase, - std::vector& snippet) const + uint64_t slotBase) { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + unsigned initSize = snippet.size(); // Calculate address of call instruction from slotBase @@ -155,92 +218,105 @@ "Unexpected number of instructions in code sequence for call"); } -// NB: Generate restore/save currently fill the snippet (which comes from a slot) with a -// bunch of code to save and restore the global registers. This blows up the size of the -// required slot quite a bit -- it would be better to generate a call to functions -// saveGlobalRegs() and restoreGlobalRegs(), for example. However, this works for now and -// writing those functions means determining what the inline assembly should look like. -// The ifdef'd-out region below is a start, but it is incomplete and generates errors at -// assembly time. In particular, the SPARC assembly requires a '.register' directive before -// it witnesses a use of %g2, %g3, %g6, or %g7, and that doesn't appear to be emitted simply -// by using the inline assembly. :( TODO. -// - -#if 0 -void restoreGlobRegs() -{ - // asm ("assembly template" : "output contraints", "input contraints") - // Restore the global registers %g[1-7] from the globalRegs array. - - asm("ldx %0, %%g1"::"o" (globalRegs)); - asm("ldx %0, %%g2"::"o" (globalRegs+1)); - asm("ldx %0, %%g3"::"o" (globalRegs+2)); - asm("ldx %0, %%g4"::"o" (globalRegs+3)); - asm("ldx %0, %%g5"::"o" (globalRegs+4)); - asm("ldx %0, %%g6"::"o" (globalRegs+5)); - asm("ldx %0, %%g7"::"o" (globalRegs+6)); +unsigned InstManip::getRestoreInst() const +{ + // restore %g0, 0, %g0 + return MK_RESTORE_IMM(R_G0, R_G0, 0); } -#endif -void InstManip::generateRestoreShared(uint64_t restoreFromAddr, - std::vector& snippet, - TargetRegister reg) const +void InstManip::generateRestore() { - generateLoad(restoreFromAddr, snippet, reg); + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; - unsigned destReg = (reg == REG_0) ? R_O0 : R_O1; - - snippet.push_back(MK_LOAD_IMM(R_G1, destReg, 8)); - snippet.push_back(MK_LOAD_IMM(R_G2, destReg, 16)); - snippet.push_back(MK_LOAD_IMM(R_G3, destReg, 24)); - snippet.push_back(MK_LOAD_IMM(R_G4, destReg, 32)); - snippet.push_back(MK_LOAD_IMM(R_G5, destReg, 40)); - snippet.push_back(MK_LOAD_IMM(R_G6, destReg, 48)); - snippet.push_back(MK_LOAD_IMM(R_G7, destReg, 56)); + unsigned initSize = snippet.size(); + + snippet.push_back(getRestoreInst()); + + assert(snippet.size() - initSize == getGenRestoreSize() && + "Unexpected number of instructions in code sequence for restore"); } -void InstManip::generateRestore(std::vector& snippet) const +void InstManip::generateSave() { - // restore %o0, 0, %o0 - snippet.push_back(MK_RESTORE_IMM(R_O0, R_O0, 0)); + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + + // save %sp, -176, %sp + snippet.push_back(MK_SAVE_IMM(R_O6, R_O6, -176)); + + assert(snippet.size() - initSize == getGenSaveSize() && + "Unexpected number of instructions in code sequence for save"); } -void InstManip::generateSpillShared(uint64_t spillToAddr, - std::vector& snippet, - TargetRegister reg) const +// TODO: It will be worthwhile to generate calls to functions that spill/restore the +// shared registers instead of dumping all of the code into the current snippet. + +void InstManip::generateRestoreShared(uint64_t restoreFromAddr, + LogicalRegister tmp1, + LogicalRegister tmp2) { - generateLoad(spillToAddr, snippet, reg); + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + assert(tmp1 != tmp2 && "Distinct logical registers required"); + + std::vector& snippet = *m_pCurrSnippet; + unsigned initSize = snippet.size(); + unsigned tmpReg = m_logicalToActualReg[tmp1]; - unsigned destReg = (reg == REG_0) ? R_O0 : R_O1; + generateLoad(restoreFromAddr, tmp1, tmp2); + snippet.push_back(MK_LOAD_IMM(R_G1, tmpReg, 8)); + snippet.push_back(MK_LOAD_IMM(R_G2, tmpReg, 16)); + snippet.push_back(MK_LOAD_IMM(R_G3, tmpReg, 24)); + snippet.push_back(MK_LOAD_IMM(R_G4, tmpReg, 32)); + snippet.push_back(MK_LOAD_IMM(R_G5, tmpReg, 40)); + snippet.push_back(MK_LOAD_IMM(R_G6, tmpReg, 48)); + snippet.push_back(MK_LOAD_IMM(R_G7, tmpReg, 56)); - snippet.push_back(MK_STORE_IMM(R_G1, destReg, 8)); - snippet.push_back(MK_STORE_IMM(R_G2, destReg, 16)); - snippet.push_back(MK_STORE_IMM(R_G3, destReg, 24)); - snippet.push_back(MK_STORE_IMM(R_G4, destReg, 32)); - snippet.push_back(MK_STORE_IMM(R_G5, destReg, 40)); - snippet.push_back(MK_STORE_IMM(R_G6, destReg, 48)); - snippet.push_back(MK_STORE_IMM(R_G7, destReg, 56)); + assert(snippet.size() - initSize == getGenRestoreSharedSize() && + "Unexpected number of instructions in code sequence for restore shared"); } -void InstManip::generateSave(std::vector& snippet) const +void InstManip::generateSpillShared(uint64_t spillToAddr, + LogicalRegister tmp1, + LogicalRegister tmp2) { - // save %o0, 0, %o0 - snippet.push_back(MK_SAVE_IMM(R_O0, R_O0, 0)); + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + assert(tmp1 != tmp2 && "Distinct logical registers required"); + + std::vector& snippet = *m_pCurrSnippet; + unsigned initSize = snippet.size(); + unsigned tmpReg = m_logicalToActualReg[tmp1]; + + generateLoad(spillToAddr, tmp1, tmp2); + snippet.push_back(MK_STORE_IMM(R_G1, tmpReg, 8)); + snippet.push_back(MK_STORE_IMM(R_G2, tmpReg, 16)); + snippet.push_back(MK_STORE_IMM(R_G3, tmpReg, 24)); + snippet.push_back(MK_STORE_IMM(R_G4, tmpReg, 32)); + snippet.push_back(MK_STORE_IMM(R_G5, tmpReg, 40)); + snippet.push_back(MK_STORE_IMM(R_G6, tmpReg, 48)); + snippet.push_back(MK_STORE_IMM(R_G7, tmpReg, 56)); + + assert(snippet.size() - initSize == getGenSpillSharedSize() && + "Unexpected number of instructions in code sequence for spill shared"); } void InstManip::generateBranchAlways(uint64_t dest, uint64_t slotBase, - std::vector& snippet, - bool annul) const + unsigned delaySlotInstr) { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + unsigned initSize = snippet.size(); // Calculate address of branch instruction from slotBase uint64_t branchInstAddr = slotBase + getInstWidth() * snippet.size(); - // Add branch instruction and nop (for branch delay slot) to code snippet. - snippet.push_back(getBranchAlways(dest, branchInstAddr, annul)); - snippet.push_back(NOP_INST); + // Add branch instruction and the specified delay slot instruction to code snippet. + snippet.push_back(getBranchAlways(dest, branchInstAddr, false)); // annul bit low + snippet.push_back(delaySlotInstr); assert(snippet.size() - initSize == getGenBranchAlwaysSize() && "Unexpected number of instruction in code sequence for branch-always"); Index: llvm/lib/Reoptimizer/Inst/InstManip.h diff -u llvm/lib/Reoptimizer/Inst/InstManip.h:1.9 llvm/lib/Reoptimizer/Inst/InstManip.h:1.10 --- llvm/lib/Reoptimizer/Inst/InstManip.h:1.9 Tue Apr 29 13:36:53 2003 +++ llvm/lib/Reoptimizer/Inst/InstManip.h Tue Apr 29 21:08:42 2003 @@ -77,15 +77,24 @@ class InstManip { public: - InstManip(VirtualMem* vm): m_pVM(vm) - { - assert(vm && "InstManip requires valid VirtualMem instance"); - } + InstManip(VirtualMem* vm); typedef std::pair Inst; // (location, inst word) pair - enum TargetRegister { REG_0, REG_1 }; - + // Logical registers used by clients of this class, mapped to machine-specific IDs + // by the logical -> actual register map. + enum LogicalRegister { + REG_0, + REG_1, + REG_2 + }; + + // Offsets in stack frame for function parameters + enum StackOffset { + PARAM_0 = 128, + PARAM_1 = 136 + }; + void printRange(unsigned* start, unsigned* end) const; inline void printRange(uint64_t start, uint64_t end) const; @@ -94,38 +103,41 @@ uint64_t skipFunctionHdr(uint64_t addr) const; + void startCode(std::vector& snippet) { m_pCurrSnippet = &snippet; } + void endCode() { m_pCurrSnippet = 0; } + void generateAddressCopy(unsigned loadInst, - std::vector& snippet, - TargetRegister reg = REG_0) const; + LogicalRegister dest, + bool afterSave); + + void generateBranchAlways(uint64_t dest, + uint64_t slotBase, + unsigned delaySlotInstr = NOP_INST); + + void generateCall(uint64_t dest, uint64_t slotBase); void generateLoad(uint64_t value, - std::vector& snippet, - TargetRegister reg = REG_0) const; + LogicalRegister dest, + LogicalRegister tmp); - void generateCall(uint64_t dest, - uint64_t slotBase, - std::vector& snippet) const; + void generateParamStore(LogicalRegister src, StackOffset off); - void generateRestore(std::vector& snippet) const; - void generateSave(std::vector& snippet) const; + void generateRestore(); + void generateSave(); - void generateSpillShared(uint64_t spillFromAddr, - std::vector& snippet, - TargetRegister reg = REG_0) const; - - void generateRestoreShared(uint64_t restorFromAddr, - std::vector& snippet, - TargetRegister reg = REG_0) const; + void generateRestoreShared(uint64_t restoreFromAddr, + LogicalRegister tmp1 = REG_0, + LogicalRegister tmp2 = REG_1); - void generateBranchAlways(uint64_t dest, - uint64_t slotBase, - std::vector& snippet, - bool annul = true) const; + void generateSpillShared(uint64_t spillFromAddr, + LogicalRegister tmp1 = REG_0, + LogicalRegister tmp2 = REG_1); void findCandidates(uint64_t start, uint64_t end, std::vector& candidates); + unsigned getRestoreInst() const; inline unsigned getBranchAlways(uint64_t dest, uint64_t pc, bool annulHigh = true) const; inline unsigned getCallInst(uint64_t dest, uint64_t pc) const; inline bool isBranch(unsigned inst) const; @@ -138,18 +150,21 @@ unsigned getGenCallSize() const { return 2; } unsigned getGenBranchAlwaysSize() const { return 2; } unsigned getGenSaveSize() const { return 1; } + unsigned getGenParamStoreSize() const { return 1; } unsigned getGenSpillSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } unsigned getGenRestoreSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } unsigned getGenRestoreSize() const { return 1; } unsigned getInstWidth() const { return 4; } unsigned getSharedSize() const { return SHARED_SIZE; } - inline unsigned getAddressCopySize(unsigned loadInst) const; + inline unsigned getGenAddressCopySize(unsigned loadInst) const; uint64_t getPhase3SpillAddr() { return (uint64_t) sm_phase3SpillRegion; } private: InstManip() {} + typedef std::map LogicalToActualRegMap; + typedef std::map OutputToInputRegMap; bool isCandidateLoad(uint64_t addr, uint64_t end, @@ -168,22 +183,33 @@ uint64_t end, unsigned fpOffset); - // Branch-always (annul bit high) instruction base (i.e. address not filled in yet) - static const unsigned BRANCH_ALWAYS_BASE = 0x30480000; + VirtualMem* m_pVM; + std::vector* m_pCurrSnippet; + LogicalToActualRegMap m_logicalToActualReg; // Maps logical -> actual register + OutputToInputRegMap m_outputToInputReg; // Maps input register -> output register + + // Branch-always (annul bit high) instruction base (i.e., address not filled in yet) + static const unsigned BRANCH_ALWAYS_BASE_ANNUL; + + // Branch-always (annul bit low) instruction base (i.e., address not filled in yet) + static const unsigned BRANCH_ALWAYS_BASE; + + // NOP instruction static const unsigned NOP_INST; // Size (in number of 64-bit words) required for storing shared registers static const unsigned SHARED_SIZE = 7; - VirtualMem* m_pVM; - + // Sparc-specific constant used in SP manipulations + static const unsigned BIAS; + // Memory region into which to spill shared registers when executing a phase 4 slot // (i.e., the slot that invokes the phase4 function, the slot written by phase 3 // invocations). NB: One region is sufficient and we do not need stack semantics // because only one activation of a phase 4 slot ever occurs at a given time (assuming // single-threaded execution). - static uint64_t sm_phase3SpillRegion[SHARED_SIZE]; + static uint64_t sm_phase3SpillRegion[SHARED_SIZE]; }; void InstManip::printRange(uint64_t start, uint64_t end) const @@ -209,8 +235,9 @@ // branch instruction is executed (i.e., the address of the branch instruction). NB: // Only handles branch-always-annul-high at the moment - assert(annul && "Unhandled case: annul bit low"); - return getUndepJumpInstr(BRANCH_ALWAYS_BASE, dest, pc); + return getUndepJumpInstr(annul ? BRANCH_ALWAYS_BASE_ANNUL : BRANCH_ALWAYS_BASE, + dest, + pc); } unsigned InstManip::getCallInst(uint64_t dest, uint64_t pc) const @@ -227,7 +254,7 @@ return ::isBranchInstr(inst); } -unsigned InstManip::getAddressCopySize(unsigned loadInst) const +unsigned InstManip::getGenAddressCopySize(unsigned loadInst) const { // Determine the number of instructions required to load the address value used by the // load instruction into some register. Index: llvm/lib/Reoptimizer/Inst/Phases.cpp diff -u llvm/lib/Reoptimizer/Inst/Phases.cpp:1.14 llvm/lib/Reoptimizer/Inst/Phases.cpp:1.15 --- llvm/lib/Reoptimizer/Inst/Phases.cpp:1.14 Tue Apr 29 14:48:29 2003 +++ llvm/lib/Reoptimizer/Inst/Phases.cpp Tue Apr 29 21:08:42 2003 @@ -16,18 +16,18 @@ // slot (annulling bit should specify *not* to execute the branch delay slot) in // the dummy function. // -// 2b. In the new slot, write the contents of the phase 2 slot: -// +------------------------------+ -// | load parameter for phase 3 | -// | call to phase 3 | -// | nop | -// | branch back to orig code | -// | nop | -// +------------------------------+ +// 2b. In the new slot, write the contents of the phase 3 slot: +// +------------------------------------+ +// | save registers (new stack frame) | +// | load parameter for phase 3 | +// | call to phase 3 | +// | nop | +// | branch back to orig code | +// | restore registers | +// +------------------------------------+ // where the parameter to phase 3 is a pointer the heap-allocated Phase3Info // instance. // -// // PHASE 3: // // - Deallocate the parameter structure whenever it is convenient to do so. @@ -40,18 +40,17 @@ // 3. For each load-volatile candidate, // 3a. Obtain a new slot in the dummy function. // 3b. Replace the load candidate with branch to slot. -// 3c. In the new slot, write the contents of the phase 3 slot: +// 3c. In the new slot, write the contents of the phase 4 slot: // +---------------------------------------+ -// | save registers | +// | save registers (new stack frame) | // | save global registers | // | copy load-src addr to param1 register | // | load p4 struct ptr to param2 register | // | call to phase 4 | // | nop | -// | restore registers | // | restore global registers | // | branch back to orig code | -// | nop | +// | restore registers | // +---------------------------------------+ // // 4. Deallocate the slot that originated this invocation. @@ -62,21 +61,11 @@ // 1a. If tag is in GBT, we have a valid candidate, so do step 2. // 1b. If tag is not in GBT, our candidate is invalid, so delete slot and return to // original code. -// -// 2. Set up the second phase 4 slot that will actually call the instrumentation function: +// +// 2. Set up the phase 5 slot that will actually call the instrumentation function: // +---------------------------------------+ -// | save registers | -// | save global registers | -// | call to inst func | -// | nop | -// | restore registers | -// | restore global registers | -// | branch back to orig code | -// | nop | +// | ... | // +---------------------------------------+ -// This "instrumentation slot" may have to be expanded later to store the return value -// in an alloca'd temporary, unless the phase4 function itself can invoke the -// instrumentation function, would be *highly* ideal. // #include @@ -285,8 +274,9 @@ std::string funcName; AddressRange range; - // TODO: Come up with a better way to do this that doesn't involve storing the entire - // list of functions here -- this could be quite large. + // Obtain the list of functions to transform, from the ElfReader module. TODO: Come + // up with a better way to do this that doesn't involve storing the entire list of + // functions here -- this could be quite large. vector > funcs; while(elfReader.GetNextFunction(funcName, range)) @@ -294,6 +284,13 @@ cerr << "There are " << funcs.size() << " functions to process." << endl << endl; + // Heap-allocate a region of memory in which to spill shared registers before phase3 + // invocations. We allocate one unit of space (given by InstManip::getSharedSize()) + // for each function that we transform. + + sm_pSpillRegion = new uint64_t[m_instManip.getSharedSize() * funcs.size()]; + sm_pCurrSpill = sm_pSpillRegion; + for(vector >::iterator i = funcs.begin(), e = funcs.end(); i != e; ++i) { if(i->first == "fibs") { @@ -334,17 +331,34 @@ "Unhandled case: branch instruction first in function body"); vm->writeInstToVM(repInstAddr, m_instManip.getBranchAlways(slotBase, repInstAddr)); - // Generate a) code to load the address of the heap-allocated Phase3Info struct into a - // register, which will be used as a parameter to the phase3 call, b) the call to - // phase 3 itself, and c) the direct branch back to the original code. + // Generate the phase 3 slot. See picture of phase 3 slot contents for more info. Phase3Info* p3info = new Phase3Info(range, origInst, repInstAddr, slotBase, getSlotSize(), m_pTraceCache); vector snippet; - m_instManip.generateLoad((uint64_t) p3info, snippet); - m_instManip.generateCall((uint64_t) &phase3, slotBase, snippet); - m_instManip.generateBranchAlways(repInstAddr, slotBase, snippet); + m_instManip.startCode(snippet); + + m_instManip.generateSave(); + m_instManip.generateSpillShared((uint64_t) sm_pCurrSpill); + m_instManip.generateLoad((uint64_t) p3info, InstManip::REG_0, InstManip::REG_1); + m_instManip.generateCall((uint64_t) &phase3, slotBase); + m_instManip.generateRestoreShared((uint64_t) sm_pCurrSpill); + m_instManip.generateBranchAlways(repInstAddr, slotBase, m_instManip.getRestoreInst()); + + m_instManip.endCode(); + + // Dump snippet instructions: + cerr << "phase3 slot instructions:" << endl; + for(vector::iterator j = snippet.begin(), k = snippet.end(); j != k; ++j) { + m_instManip.printInst(*j); + cerr << endl; + } + + // Bump the current spill pointer to the next "spill slot" in the spill region used + // before/after phase3() invocations. + + sm_pCurrSpill += m_instManip.getSharedSize(); // Copy the snippet code into the slot assert(snippet.size() == getSlotSize() && "Snippet size does not match slot size"); @@ -356,8 +370,11 @@ // The following sum corresponds to the sizes consumed by the various regions of the // phase 2 slot. See picture of phase 2 contents for details. - return m_instManip.getGenLoadSize() + + return m_instManip.getGenSaveSize() + + m_instManip.getGenSpillSharedSize() + + m_instManip.getGenLoadSize() + m_instManip.getGenCallSize() + + m_instManip.getGenRestoreSharedSize() + m_instManip.getGenBranchAlwaysSize(); } @@ -430,25 +447,32 @@ uint64_t slotBase = replaceInstWithBrToSlot(i->front().first, getSlotSize(*i), m_pTraceCache, m_instManip); - // Generate a) code to save the registers, b) instruction(s) to store the load - // source address into a phase4 parameter register, c) the load of (the - // pointer-to) the heap-allocated Phase4Info structure into a phase4 parameter - // register, and d) code to call phase 3, restore regs, and branch back to - // original code. + // Generate the phase 4 slot. See picture of phase 4 slot contents for more info. Phase4Info* p4info = new Phase4Info(*i, slotBase, getSlotSize(*i), m_pTraceCache); uint64_t spillAddr = m_instManip.getPhase3SpillAddr(); - + vector snippet; - m_instManip.generateSave(snippet); - m_instManip.generateAddressCopy(i->front().second, snippet); // Uses InstManip::REG_0, live to call - m_instManip.generateSpillShared(spillAddr, snippet, InstManip::REG_1); - m_instManip.generateLoad((uint64_t) p4info, snippet, InstManip::REG_1); - m_instManip.generateCall((uint64_t) &phase4, slotBase, snippet); - m_instManip.generateRestoreShared(spillAddr, snippet); - m_instManip.generateRestore(snippet); - m_instManip.generateBranchAlways(i->front().first, slotBase, snippet); + m_instManip.startCode(snippet); + + // NB: We pass parameters to the phase4 function in REG_0 and REG_1 on the + // assumption that the input parameters will be looked for there. However, it is + // possible that the input parameters will be taken from the parameter array at + // fixed offsets from the stack pointer. Hence, we store the parameters there as + // well. + + m_instManip.generateSave(); + m_instManip.generateAddressCopy(i->front().second, InstManip::REG_0, true); // REG_0 live to call + m_instManip.generateParamStore(InstManip::REG_0, InstManip::PARAM_0); + m_instManip.generateSpillShared(spillAddr, InstManip::REG_1, InstManip::REG_2); + m_instManip.generateLoad((uint64_t) p4info, InstManip::REG_1, InstManip::REG_2); // REG_1 live to call + m_instManip.generateParamStore(InstManip::REG_1, InstManip::PARAM_1); + m_instManip.generateCall((uint64_t) &phase4, slotBase); + m_instManip.generateRestoreShared(spillAddr); + m_instManip.generateBranchAlways(i->front().first, slotBase, m_instManip.getRestoreInst()); + + m_instManip.endCode(); // Dump snippet instructions: @@ -474,12 +498,13 @@ // phase 3 slot. See picture of phase 3 contents for details. return m_instManip.getGenSaveSize() + - m_instManip.getAddressCopySize(cand.front().second) + + m_instManip.getGenAddressCopySize(cand.front().second) + + m_instManip.getGenParamStoreSize() + m_instManip.getGenSpillSharedSize() + m_instManip.getGenLoadSize() + + m_instManip.getGenParamStoreSize() + m_instManip.getGenCallSize() + m_instManip.getGenRestoreSharedSize() + - m_instManip.getGenRestoreSize() + m_instManip.getGenBranchAlwaysSize(); } Index: llvm/lib/Reoptimizer/Inst/design.txt diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.10 llvm/lib/Reoptimizer/Inst/design.txt:1.11 --- llvm/lib/Reoptimizer/Inst/design.txt:1.10 Tue Apr 29 13:36:53 2003 +++ llvm/lib/Reoptimizer/Inst/design.txt Tue Apr 29 21:08:42 2003 @@ -896,6 +896,10 @@ - Write phase 5 slot generation code, phase 5 function itself, etc. + - Optimizations: + - No need to save registers (other than those clobbered) in phase 3 slot, since phase 3 + is invoked at the start of the function. Must still spill/restore shared, though. + }}} {{{ PHASE OUTLINE From brukman at cs.uiuc.edu Tue Apr 29 21:49:00 2003 From: brukman at cs.uiuc.edu (Michael Brukman) Date: Tue Apr 29 21:49:00 2003 Subject: [llvm-commits] CVS: llvm/www/www-index.html Message-ID: <200304300248.VAA00814@tank.cs.uiuc.edu> Changes in directory llvm/www: www-index.html updated: 1.22 -> 1.23 --- Log message: Added URL to Brian Gaeke's web page. --- Diffs of the changes: Index: llvm/www/www-index.html diff -u llvm/www/www-index.html:1.22 llvm/www/www-index.html:1.23 --- llvm/www/www-index.html:1.22 Tue Apr 29 17:14:49 2003 +++ llvm/www/www-index.html Tue Apr 29 21:48:42 2003 @@ -201,7 +201,9 @@
    12. Cameron Buschardt - Author of the mem2reg pass
    13. Misha Brukman - & Brian Gaeke - Portions of the X86 Just-In-Time + & Brian + Gaeke - Portions of the X86 Just-In-Time compiler
    14. From jstanley at cs.uiuc.edu Tue Apr 29 22:02:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Tue Apr 29 22:02:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/SparcInstManip.cpp SparcInstManip.h InstManip.cpp InstManip.h Phases.cpp Message-ID: <200304300308.WAA26353@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: SparcInstManip.cpp added (r1.1) SparcInstManip.h added (r1.1) InstManip.cpp updated: 1.9 -> 1.10 InstManip.h updated: 1.10 -> 1.11 Phases.cpp updated: 1.15 -> 1.16 --- Log message: Intermediary refactoring point. Made SparcInstManip, a subclass of InstManip. --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/SparcInstManip.cpp diff -c /dev/null llvm/lib/Reoptimizer/Inst/SparcInstManip.cpp:1.1 *** /dev/null Tue Apr 29 22:08:13 2003 --- llvm/lib/Reoptimizer/Inst/SparcInstManip.cpp Tue Apr 29 22:08:03 2003 *************** *** 0 **** --- 1,522 ---- + //////////////// + // programmer: Joel Stanley + // date: Tue Apr 29 21:21:50 CDT 2003 + // fileid: SparcInstManip.cpp + // purpose: Implements the SparcInstManip class as described in SparcInstManip.h + + #include + #include + + #include "llvm/Reoptimizer/VirtualMem.h" + #include "llvm/Reoptimizer/BinInterface/sparc9.h" + #include "llvm/Reoptimizer/BinInterface/bitmath.h" + + #include "SparcInstManip.h" + + const unsigned SparcInstManip::NOP_INST = 0x01000000; + const unsigned SparcInstManip::BRANCH_ALWAYS_BASE = 0x10480000; + const unsigned SparcInstManip::BRANCH_ALWAYS_BASE_ANNUL = 0x30480000; + const unsigned SparcInstManip::BIAS = 2047; + uint64_t SparcInstManip::sm_phase3SpillRegion[SparcInstManip::SHARED_SIZE]; + + using std::cout; + using std::cerr; + using std::endl; + + SparcInstManip::SparcInstManip(VirtualMem* vm): + InstManip(vm), + m_pCurrSnippet(0) + { + assert(vm && "SparcInstManip requires valid VirtualMem instance"); + + // Populate logical->actual register map. Since this SparcInstManip class is + // SparcV9-specific, we map to the values used by the BinInterface library and macros. + + m_logicalToActualReg[REG_0] = R_O0; + m_logicalToActualReg[REG_1] = R_O1; + m_logicalToActualReg[REG_2] = R_O2; + + // Populate output->input register map. This is SparcV9 specific and corresponds to + // the register mapping that occurs after a 'save' instruction is issued. Shared and + // local registers map to themselves. + + m_outputToInputReg[R_O0] = R_I0; + m_outputToInputReg[R_O1] = R_I1; + m_outputToInputReg[R_O2] = R_I2; + m_outputToInputReg[R_O3] = R_I3; + m_outputToInputReg[R_O4] = R_I4; + m_outputToInputReg[R_O5] = R_I5; + m_outputToInputReg[R_O6] = R_I6; + m_outputToInputReg[R_O7] = R_I7; + + for(unsigned i = R_G0; i <= R_G7; ++i) + m_outputToInputReg[i] = i; + for(unsigned i = R_L0; i <= R_L7; ++i) + m_outputToInputReg[i] = i; + } + + void SparcInstManip::printRange(unsigned* start, + unsigned* end) const + { + // Dumps contents (and corresponding disassembly) of memory range given by range + // to stdout. TODO: Parameterize by an ostream instance; cannot do this yet + // because BinInterface is hard-coded to use printf and must be changed. + + cout << "Sparc dissassembly of range [" + << start << ", " << end << "]:" << endl; + + for(; start <= end; ++start) { + cout << start << " | " + << std::hex << std::setw(8) << std::setfill('0') + << *start << " | "; + sparc_print(*start); + cout << endl; + } + } + + void SparcInstManip::printInst(unsigned inst) const + { + sparc_print(inst); + fflush(stdout); + } + + uint64_t SparcInstManip::skipFunctionHdr(uint64_t addr) const + { + // For SparcV9, what we're calling the "function header" is the save instruction (if + // present) that occurs as the first instruction of the function. + + unsigned inst = m_pVM->readInstrFrmVm(addr); + assert(RD_FLD(inst, INSTR_OP) == OP_2 && + RD_FLD(inst, INSTR_OP3) == OP3_SAVE && + "Unhandled case: non-save instruction in function header"); + + return addr + getInstWidth(); + } + + void SparcInstManip::generateLoad(uint64_t value, + LogicalRegister dest, + LogicalRegister tmp) + { + // When reg == REG_0, load the 64-bit value into %o0, using %o0 and %o1. + // When reg == REG_1, load the 64-bit value into %o1, using %o1 and %o2. + // The sequence of instructions is placed into the provided instruction vector. + + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + assert(dest != tmp && "Distinct logical registers required"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + unsigned destReg = m_logicalToActualReg[dest]; + unsigned tmpReg = m_logicalToActualReg[tmp]; + + // sethi (upper 22b of upper wrd), %destReg + snippet.push_back(MK_SETHI(destReg, HIGH22(HIGHWORD(value)))); + + // or %o0, (lower 10b of upper wrd), %destReg + snippet.push_back(MK_LOGIC_IMM(OP3_OR, destReg, destReg, LOW10(HIGHWORD(value)))); + + // sllx %o0, 32, %destReg + snippet.push_back(MK_SHIFTX(OP3_SLL, destReg, destReg, 32)); + + // sethi (upper 22b of lwr wrd), %tmpReg + snippet.push_back(MK_SETHI(tmpReg, HIGH22(LOWWORD(value)))); + + // or %destReg, %tmpReg, %destReg + snippet.push_back(MK_LOGIC(OP3_OR, destReg, destReg, tmpReg)); + + // add %destReg, (lwr 10b of lwr wrd), %destReg + snippet.push_back(MK_ADD_R_I(destReg, destReg, LOW10(LOWWORD(value)))); + + assert(snippet.size() - initSize == getGenLoadSize() && + "Unexpected number of instructions in code sequence for 64-bit value -> %dest"); + } + + void SparcInstManip::generateAddressCopy(unsigned loadInst, + LogicalRegister dest, + bool afterSave) + { + // NB: After save instruction has been issued, the output registers are mapped to the + // input registers. + + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + unsigned destReg = m_logicalToActualReg[dest]; + unsigned rs1 = RD_FLD(loadInst, INSTR_RS1); + + if(afterSave) + rs1 = m_outputToInputReg[rs1]; + + if(RD_FLD(loadInst, INSTR_I)) { + // Case 1: load is immediate-valued --> reg, imm value add instruction needed + unsigned imm = RD_FLD(loadInst, INSTR_SIMM13); + snippet.push_back(MK_ADD_R_I(destReg, rs1, imm)); + } + else { + // Case 2: load is register-valued --> reg, reg add instruction needed + unsigned rs2 = RD_FLD(loadInst, INSTR_RS2); + + if(afterSave) + rs2 = m_outputToInputReg[rs2]; + + snippet.push_back(MK_ADD_R_R(destReg, rs1, rs2)); + } + + assert(snippet.size() - initSize == getGenAddressCopySize(loadInst) && + "Unexpected number of instructions in code sequence for address copy"); + } + + void SparcInstManip::generateParamStore(LogicalRegister src, + StackOffset off) + { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + unsigned srcReg = m_logicalToActualReg[src]; + + snippet.push_back(MK_STX_STACK(srcReg, BIAS + off)); + + assert(snippet.size() - initSize == getGenParamStoreSize() && + "Unexpected number of instructions in code sequence for parameter store"); + } + + void SparcInstManip::generateCall(uint64_t dest, + uint64_t slotBase) + { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + + // Calculate address of call instruction from slotBase + uint64_t callInstAddr = slotBase + getInstWidth() * snippet.size(); + + // Add call instruction and nop (for call delay slot) to code snippet. + snippet.push_back(getCallInst(dest, callInstAddr)); + snippet.push_back(NOP_INST); + + assert(snippet.size() - initSize == getGenCallSize() && + "Unexpected number of instructions in code sequence for call"); + } + + unsigned SparcInstManip::getRestoreInst() const + { + // restore %g0, 0, %g0 + return MK_RESTORE_IMM(R_G0, R_G0, 0); + } + + void SparcInstManip::generateRestore() + { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + + snippet.push_back(getRestoreInst()); + + assert(snippet.size() - initSize == getGenRestoreSize() && + "Unexpected number of instructions in code sequence for restore"); + } + + void SparcInstManip::generateSave() + { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + + // save %sp, -176, %sp + snippet.push_back(MK_SAVE_IMM(R_O6, R_O6, -176)); + + assert(snippet.size() - initSize == getGenSaveSize() && + "Unexpected number of instructions in code sequence for save"); + } + + // TODO: It will be worthwhile to generate calls to functions that spill/restore the + // shared registers instead of dumping all of the code into the current snippet. + + void SparcInstManip::generateRestoreShared(uint64_t restoreFromAddr, + LogicalRegister tmp1, + LogicalRegister tmp2) + { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + assert(tmp1 != tmp2 && "Distinct logical registers required"); + + std::vector& snippet = *m_pCurrSnippet; + unsigned initSize = snippet.size(); + unsigned tmpReg = m_logicalToActualReg[tmp1]; + + generateLoad(restoreFromAddr, tmp1, tmp2); + snippet.push_back(MK_LOAD_IMM(R_G1, tmpReg, 8)); + snippet.push_back(MK_LOAD_IMM(R_G2, tmpReg, 16)); + snippet.push_back(MK_LOAD_IMM(R_G3, tmpReg, 24)); + snippet.push_back(MK_LOAD_IMM(R_G4, tmpReg, 32)); + snippet.push_back(MK_LOAD_IMM(R_G5, tmpReg, 40)); + snippet.push_back(MK_LOAD_IMM(R_G6, tmpReg, 48)); + snippet.push_back(MK_LOAD_IMM(R_G7, tmpReg, 56)); + + assert(snippet.size() - initSize == getGenRestoreSharedSize() && + "Unexpected number of instructions in code sequence for restore shared"); + } + + void SparcInstManip::generateSpillShared(uint64_t spillToAddr, + LogicalRegister tmp1, + LogicalRegister tmp2) + { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + assert(tmp1 != tmp2 && "Distinct logical registers required"); + + std::vector& snippet = *m_pCurrSnippet; + unsigned initSize = snippet.size(); + unsigned tmpReg = m_logicalToActualReg[tmp1]; + + generateLoad(spillToAddr, tmp1, tmp2); + snippet.push_back(MK_STORE_IMM(R_G1, tmpReg, 8)); + snippet.push_back(MK_STORE_IMM(R_G2, tmpReg, 16)); + snippet.push_back(MK_STORE_IMM(R_G3, tmpReg, 24)); + snippet.push_back(MK_STORE_IMM(R_G4, tmpReg, 32)); + snippet.push_back(MK_STORE_IMM(R_G5, tmpReg, 40)); + snippet.push_back(MK_STORE_IMM(R_G6, tmpReg, 48)); + snippet.push_back(MK_STORE_IMM(R_G7, tmpReg, 56)); + + assert(snippet.size() - initSize == getGenSpillSharedSize() && + "Unexpected number of instructions in code sequence for spill shared"); + } + + void SparcInstManip::generateBranchAlways(uint64_t dest, + uint64_t slotBase, + unsigned delaySlotInstr) + { + assert(m_pCurrSnippet && "Invalid snippet for code generation"); + std::vector& snippet = *m_pCurrSnippet; + + unsigned initSize = snippet.size(); + + // Calculate address of branch instruction from slotBase + uint64_t branchInstAddr = slotBase + getInstWidth() * snippet.size(); + + // Add branch instruction and the specified delay slot instruction to code snippet. + snippet.push_back(getBranchAlways(dest, branchInstAddr, false)); // annul bit low + snippet.push_back(delaySlotInstr); + + assert(snippet.size() - initSize == getGenBranchAlwaysSize() && + "Unexpected number of instruction in code sequence for branch-always"); + } + + void SparcInstManip::findCandidates(uint64_t start, + uint64_t end, + std::vector& candidates) + { + for(uint64_t currAddr = start; currAddr <= end; currAddr += getInstWidth()) { + InstCandidate cand(this); + if(isCandidateLoad(currAddr, end, cand)) + candidates.push_back(cand); + } + } + + static inline bool isLoadHalfWord(unsigned inst) + { + // Returns true if inst is an LDUH instruction + return RD_FLD(inst, INSTR_OP) == OP_3 && + RD_FLD(inst, INSTR_OP3) == OP3_LDUH; + } + + static inline bool isLoadByte(unsigned inst) + { + // Returns true if inst is a LDUB instruction + return RD_FLD(inst, INSTR_OP) == OP_3 && + RD_FLD(inst, INSTR_OP3) == OP3_LDUB; + } + + static inline bool isFPRelative(unsigned inst) + { + return RD_FLD(inst, INSTR_RS1) == R_BP && RD_FLD(inst, INSTR_I) == 1; + } + + static inline bool isSTH(unsigned inst) + { + return RD_FLD(inst, INSTR_OP) == OP_3 && + RD_FLD(inst, INSTR_OP3) == OP3_STH; + } + + static inline bool isSTB(unsigned inst) + { + return RD_FLD(inst, INSTR_OP) == OP_3 && + RD_FLD(inst, INSTR_OP3) == OP3_STB; + } + + static inline unsigned getLoadDest(unsigned inst) + { + // Assumes that inst is a load instruction, and returns the register ID of its + // destination operand. + + return RD_FLD(inst, INSTR_RD); + } + + static inline unsigned getStoreSrc(unsigned inst) + { + // Assumes that inst is a stb/sth instruction, and returns the register ID of its + // source operand (by source, we don't mean rs1 or rs2, but rather rd, which specifies + // the register which contains the value being stored); + + return RD_FLD(inst, INSTR_RD); + } + + static inline unsigned getFPOffset(unsigned inst) + { + assert(isFPRelative(inst) && "Expect instruction to be FP-relative"); + return RD_FLD(inst, INSTR_SIMM13); + } + + bool SparcInstManip::determineSchema(InstCandidate& cand, + uint64_t end, + std::pair& load, + std::pair& store) + { + // inst1 contains the load instruction (the actual candidate). inst2 contains the + // corresponding store instruction, which is either STB or STH. If STB, take actions + // for schema 1, and if STH, schema 2. + + if(isSTB(store.second)) { + // Schema 1: "direct" pattern + cand.setType(InstCandidate::DIRECT); + cand.push_back(load); + cand.push_back(store); + return true; + } + else { + assert(isSTH(store.second) && "Instruction must be STH"); + + // We have potentially discovered an instance of schema 2, but must search + // more to determine if this is the case. + // + // KIS heuristic concession: The STH given by storeInst *must* be storing to the stack + // in an fp-relative manner; if not, we deny the originating load's candidacy. + + if(isFPRelative(store.second)) { + // Search forward until a LDUB from same stack location (+1) as the STH wrote to + // is encountered. The +1 in specified in the FP offset we're searching for is + // due to the fact that we stored a half-word but are loading a byte. + + if(uint64_t stkLoadAddr = findNextStackLoad(store.first, end, getFPOffset(store.second) + 1)) { + // Last schema-2 search: find the STB instruction that stores from the + // LDUB's destination register. + + unsigned ldubInst = m_pVM->readInstrFrmVm(stkLoadAddr); + uint64_t stbAddr = findNextStore(stkLoadAddr, end, getLoadDest(ldubInst)); + unsigned stbInst; + + if(stbAddr && isSTB((stbInst = m_pVM->readInstrFrmVm(stbAddr)))) { + + // All of the criteria have been met for Schema 2, the "stack transfer" + // pattern. + + cand.setType(InstCandidate::STACK_XFER); + cand.push_back(load); + cand.push_back(store); + cand.push_back(stkLoadAddr, ldubInst); + cand.push_back(stbAddr, stbInst); + return true; + } + } + } + } + + return false; + } + + bool SparcInstManip::isCandidateLoad(uint64_t addr, + uint64_t end, + InstCandidate& cand) + { + // {{{ Description of heuristic + + // A candidate load is the first instruction in a sequence (with an arbitrary number + // of instructions in between elements of this sequence) that is a "signature" for the + // particular load of a volatile variable which needs to be replaced with a call to an + // instrumentation function. + // + // Detecting this candidacy condition is accomplished via the application of a + // relatively simple heurstic. The signature sequence always begins with a "load + // half-word" and ends with a "store byte". However, we cannot guarantee that the + // sequence looks like: + // + // lduh [mem1], %r[d] | + // ... | "Schema 1" + // stb %r[d], [mem2] | + // + // although this is a perfectly valid pattern to look for. However, unoptimized code + // will frequently transfer this data using the stack, as in this instruction sequence: + // + // lduh [mem1] %r[d] | + // ... | + // sth %r[d], [stack loc] | + // ... | "Schema 2" + // lduh [stack loc], %r[d'] | + // ... | + // stb %r[d'], [mem2] | + // + // The current heurstic catches both of these patterns (designated "direct" and "stack + // transfer" respectively), and will be extended as insufficiencies in the heuristic + // are revealed. + + // }}} + + // Address of potential candidate load is given by 'addr', maximum search address is + // given by 'end' + + unsigned inst = m_pVM->readInstrFrmVm(addr); + + if(isLoadHalfWord(inst)) { + // Search forward until a sth/stb from inst's target register is encountered + if(uint64_t storeAddr = findNextStore(addr, end, getLoadDest(inst))) { + + // If STB, take actions for schema 1, otherwise check for schema 2 conditions. + + unsigned storeInst = m_pVM->readInstrFrmVm(storeAddr); + std::pair inst1(addr, inst); + std::pair inst2(storeAddr, storeInst); + + return determineSchema(cand, end, inst1, inst2); + } + } + + return false; + } + + uint64_t SparcInstManip::findNextStackLoad(uint64_t addr, + uint64_t end, + unsigned fpOffset) + { + // Sweep the range of addresses starting at addr, up to end, looking for a load byte + // that is loading from [%fp + fpOffset]. Return the first such instance, or 0 is such + // an instance cannot be found. + + for(uint64_t currAddr = addr; currAddr <= end; currAddr += getInstWidth()) { + unsigned inst = m_pVM->readInstrFrmVm(currAddr); + + if(isLoadByte(inst) && isFPRelative(inst) && getFPOffset(inst) == fpOffset) + return currAddr; + } + + return 0; + } + + uint64_t SparcInstManip::findNextStore(uint64_t addr, + uint64_t end, + unsigned srcReg) + { + // Sweep the range of addresses starting at addr, up to end, looking for stb or sth + // instructions that are storing _from_ 'fromReg'. Return the first such instance, or + // 0 if such an instance cannot be found. + + for(uint64_t currAddr = addr; currAddr <= end; currAddr += getInstWidth()) { + unsigned inst = m_pVM->readInstrFrmVm(currAddr); + if(isSTH(inst) || isSTB(inst) && getStoreSrc(inst) == srcReg) + return currAddr; + } + + return 0; + } Index: llvm/lib/Reoptimizer/Inst/SparcInstManip.h diff -c /dev/null llvm/lib/Reoptimizer/Inst/SparcInstManip.h:1.1 *** /dev/null Tue Apr 29 22:08:13 2003 --- llvm/lib/Reoptimizer/Inst/SparcInstManip.h Tue Apr 29 22:08:03 2003 *************** *** 0 **** --- 1,179 ---- + //////////////// + // programmer: Joel Stanley + // date: Tue Apr 29 21:17:33 CDT 2003 + // fileid: SparcInstManip.h + // purpose: Provides description SparcV9-specifc InstManip class. In particular, + // SparcInstManip wraps the BinInterface/TraceCache macros and utilities. + // + + #ifndef _INCLUDED_SPARCINSTMANIP_H + #define _INCLUDED_SPARCINSTMANIP_H + + #include "llvm/Reoptimizer/BinInterface/sparcdis.h" + #include "llvm/Reoptimizer/InstrUtils.h" // getCallInstr, getUndepJumpInstr, etc. + + #include "InstManip.h" + + class SparcInstManip : public InstManip + { + public: + SparcInstManip(VirtualMem* vm); + + // Offsets in stack frame for function parameters + enum StackOffset { + PARAM_0 = 128, + PARAM_1 = 136 + }; + + virtual void printRange(unsigned* start, unsigned* end) const; + virtual void printInst(unsigned inst) const; + + //// + + uint64_t skipFunctionHdr(uint64_t addr) const; + + void startCode(std::vector& snippet) { m_pCurrSnippet = &snippet; } + void endCode() { m_pCurrSnippet = 0; } + + void generateAddressCopy(unsigned loadInst, + LogicalRegister dest, + bool afterSave); + + void generateBranchAlways(uint64_t dest, + uint64_t slotBase, + unsigned delaySlotInstr = NOP_INST); + + void generateCall(uint64_t dest, uint64_t slotBase); + + void generateLoad(uint64_t value, + LogicalRegister dest, + LogicalRegister tmp); + + void generateParamStore(LogicalRegister src, StackOffset off); + + void generateRestore(); + void generateSave(); + + void generateRestoreShared(uint64_t restoreFromAddr, + LogicalRegister tmp1 = REG_0, + LogicalRegister tmp2 = REG_1); + + void generateSpillShared(uint64_t spillFromAddr, + LogicalRegister tmp1 = REG_0, + LogicalRegister tmp2 = REG_1); + + void findCandidates(uint64_t start, + uint64_t end, + std::vector& candidates); + + unsigned getRestoreInst() const; + inline unsigned getBranchAlways(uint64_t dest, uint64_t pc, bool annulHigh = true) const; + inline unsigned getCallInst(uint64_t dest, uint64_t pc) const; + inline bool isBranch(unsigned inst) const; + + // These are functions so when SparcInstManip is superclassed, they'd become virtual, etc. + // In the short term we could use class constants, but this is more clear. + + unsigned getNOP() const { return NOP_INST; } + unsigned getGenLoadSize() const { return 6; } + unsigned getGenCallSize() const { return 2; } + unsigned getGenBranchAlwaysSize() const { return 2; } + unsigned getGenSaveSize() const { return 1; } + unsigned getGenParamStoreSize() const { return 1; } + unsigned getGenSpillSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } + unsigned getGenRestoreSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } + unsigned getGenRestoreSize() const { return 1; } + virtual unsigned getInstWidth() const { return 4; } + unsigned getSharedSize() const { return SHARED_SIZE; } + + inline unsigned getGenAddressCopySize(unsigned loadInst) const; + + uint64_t getPhase3SpillAddr() { return (uint64_t) sm_phase3SpillRegion; } + + private: + SparcInstManip() {} + typedef std::map OutputToInputRegMap; + + bool isCandidateLoad(uint64_t addr, + uint64_t end, + InstCandidate& cand); + + bool determineSchema(InstCandidate& cand, + uint64_t end, + std::pair& load, + std::pair& store); + + uint64_t findNextStore(uint64_t addr, + uint64_t end, + unsigned srcReg); + + uint64_t findNextStackLoad(uint64_t addr, + uint64_t end, + unsigned fpOffset); + + std::vector* m_pCurrSnippet; + OutputToInputRegMap m_outputToInputReg; // Maps input register -> output register + + // Branch-always (annul bit high) instruction base (i.e., address not filled in yet) + static const unsigned BRANCH_ALWAYS_BASE_ANNUL; + + // Branch-always (annul bit low) instruction base (i.e., address not filled in yet) + static const unsigned BRANCH_ALWAYS_BASE; + + // NOP instruction + static const unsigned NOP_INST; + + // Size (in number of 64-bit words) required for storing shared registers + static const unsigned SHARED_SIZE = 7; + + // Sparc-specific constant used in SP manipulations + static const unsigned BIAS; + + // Memory region into which to spill shared registers when executing a phase 4 slot + // (i.e., the slot that invokes the phase4 function, the slot written by phase 3 + // invocations). NB: One region is sufficient and we do not need stack semantics + // because only one activation of a phase 4 slot ever occurs at a given time (assuming + // single-threaded execution). + + static uint64_t sm_phase3SpillRegion[SHARED_SIZE]; + }; + + unsigned SparcInstManip::getBranchAlways(uint64_t dest, uint64_t pc, bool annul) const + { + // dest is the destination address, pc is the value of the program counter when the + // branch instruction is executed (i.e., the address of the branch instruction). NB: + // Only handles branch-always-annul-high at the moment + + return getUndepJumpInstr(annul ? BRANCH_ALWAYS_BASE_ANNUL : BRANCH_ALWAYS_BASE, + dest, + pc); + } + + unsigned SparcInstManip::getCallInst(uint64_t dest, uint64_t pc) const + { + // dest is the destination address to call, pc is the value of the program counter + // when the call instruction is executed (i.e., the address of the branch + // instruction). + + return getCallInstr(dest, pc); + } + + bool SparcInstManip::isBranch(unsigned inst) const + { + return ::isBranchInstr(inst); + } + + unsigned SparcInstManip::getGenAddressCopySize(unsigned loadInst) const + { + // Determine the number of instructions required to load the address value used by the + // load instruction into some register. + + // Case 1: load is immediate-valued --> add-immediate instruction needed, size is 1 inst + // Case 2: load is register-valued --> add-registers instruction needed, size is 1 inst + + return 1; + } + + #endif // _INCLUDED_SPARCINSTMANIP_H + + Index: llvm/lib/Reoptimizer/Inst/InstManip.cpp diff -u llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.9 llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.10 --- llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.9 Tue Apr 29 21:08:42 2003 +++ llvm/lib/Reoptimizer/Inst/InstManip.cpp Tue Apr 29 22:08:03 2003 @@ -6,17 +6,10 @@ #include #include + #include "llvm/Reoptimizer/VirtualMem.h" -#include "llvm/Reoptimizer/BinInterface/sparc9.h" -#include "llvm/Reoptimizer/BinInterface/bitmath.h" #include "InstManip.h" -const unsigned InstManip::NOP_INST = 0x01000000; -const unsigned InstManip::BRANCH_ALWAYS_BASE = 0x10480000; -const unsigned InstManip::BRANCH_ALWAYS_BASE_ANNUL = 0x30480000; -const unsigned InstManip::BIAS = 2047; -uint64_t InstManip::sm_phase3SpillRegion[InstManip::SHARED_SIZE]; - using std::cout; using std::cerr; using std::endl; @@ -39,7 +32,8 @@ for(std::vector >::const_iterator i = m_insts.begin(), e = m_insts.end(); i != e; ++i) { ostr << std::hex << " (" << i->first << ", " << std::flush; - sparc_print(i->second); + m_pIM->printInst(i->second); + //sparc_print(i->second); // FIXME fflush(stdout); ostr << ")" << endl; } @@ -47,493 +41,11 @@ } InstManip::InstManip(VirtualMem* vm): - m_pVM(vm), - m_pCurrSnippet(0) + m_pVM(vm) { assert(vm && "InstManip requires valid VirtualMem instance"); - - // Populate logical->actual register map. Since this InstManip class is - // SparcV9-specific, we map to the values used by the BinInterface library and macros. - - m_logicalToActualReg[REG_0] = R_O0; - m_logicalToActualReg[REG_1] = R_O1; - m_logicalToActualReg[REG_2] = R_O2; - - // Populate output->input register map. This is SparcV9 specific and corresponds to - // the register mapping that occurs after a 'save' instruction is issued. Shared and - // local registers map to themselves. - - m_outputToInputReg[R_O0] = R_I0; - m_outputToInputReg[R_O1] = R_I1; - m_outputToInputReg[R_O2] = R_I2; - m_outputToInputReg[R_O3] = R_I3; - m_outputToInputReg[R_O4] = R_I4; - m_outputToInputReg[R_O5] = R_I5; - m_outputToInputReg[R_O6] = R_I6; - m_outputToInputReg[R_O7] = R_I7; - - for(unsigned i = R_G0; i <= R_G7; ++i) - m_outputToInputReg[i] = i; - for(unsigned i = R_L0; i <= R_L7; ++i) - m_outputToInputReg[i] = i; -} - -void InstManip::printRange(unsigned* start, - unsigned* end) const -{ - // Dumps contents (and corresponding disassembly) of memory range given by range - // to stdout. TODO: Parameterize by an ostream instance; cannot do this yet - // because BinInterface is hard-coded to use printf and must be changed. - - cout << "Sparc dissassembly of range [" - << start << ", " << end << "]:" << endl; - - for(; start <= end; ++start) { - cout << start << " | " - << std::hex << std::setw(8) << std::setfill('0') - << *start << " | "; - sparc_print(*start); - cout << endl; - } -} - -uint64_t InstManip::skipFunctionHdr(uint64_t addr) const -{ - // For SparcV9, what we're calling the "function header" is the save instruction (if - // present) that occurs as the first instruction of the function. - - unsigned inst = m_pVM->readInstrFrmVm(addr); - assert(RD_FLD(inst, INSTR_OP) == OP_2 && - RD_FLD(inst, INSTR_OP3) == OP3_SAVE && - "Unhandled case: non-save instruction in function header"); - - return addr + getInstWidth(); -} - -void InstManip::generateLoad(uint64_t value, - LogicalRegister dest, - LogicalRegister tmp) -{ - // When reg == REG_0, load the 64-bit value into %o0, using %o0 and %o1. - // When reg == REG_1, load the 64-bit value into %o1, using %o1 and %o2. - // The sequence of instructions is placed into the provided instruction vector. - - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - assert(dest != tmp && "Distinct logical registers required"); - std::vector& snippet = *m_pCurrSnippet; - - unsigned initSize = snippet.size(); - unsigned destReg = m_logicalToActualReg[dest]; - unsigned tmpReg = m_logicalToActualReg[tmp]; - - // sethi (upper 22b of upper wrd), %destReg - snippet.push_back(MK_SETHI(destReg, HIGH22(HIGHWORD(value)))); - - // or %o0, (lower 10b of upper wrd), %destReg - snippet.push_back(MK_LOGIC_IMM(OP3_OR, destReg, destReg, LOW10(HIGHWORD(value)))); - - // sllx %o0, 32, %destReg - snippet.push_back(MK_SHIFTX(OP3_SLL, destReg, destReg, 32)); - - // sethi (upper 22b of lwr wrd), %tmpReg - snippet.push_back(MK_SETHI(tmpReg, HIGH22(LOWWORD(value)))); - - // or %destReg, %tmpReg, %destReg - snippet.push_back(MK_LOGIC(OP3_OR, destReg, destReg, tmpReg)); - - // add %destReg, (lwr 10b of lwr wrd), %destReg - snippet.push_back(MK_ADD_R_I(destReg, destReg, LOW10(LOWWORD(value)))); - - assert(snippet.size() - initSize == getGenLoadSize() && - "Unexpected number of instructions in code sequence for 64-bit value -> %dest"); -} - -void InstManip::generateAddressCopy(unsigned loadInst, - LogicalRegister dest, - bool afterSave) -{ - // NB: After save instruction has been issued, the output registers are mapped to the - // input registers. - - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - std::vector& snippet = *m_pCurrSnippet; - - unsigned initSize = snippet.size(); - unsigned destReg = m_logicalToActualReg[dest]; - unsigned rs1 = RD_FLD(loadInst, INSTR_RS1); - - if(afterSave) - rs1 = m_outputToInputReg[rs1]; - - if(RD_FLD(loadInst, INSTR_I)) { - // Case 1: load is immediate-valued --> reg, imm value add instruction needed - unsigned imm = RD_FLD(loadInst, INSTR_SIMM13); - snippet.push_back(MK_ADD_R_I(destReg, rs1, imm)); - } - else { - // Case 2: load is register-valued --> reg, reg add instruction needed - unsigned rs2 = RD_FLD(loadInst, INSTR_RS2); - - if(afterSave) - rs2 = m_outputToInputReg[rs2]; - - snippet.push_back(MK_ADD_R_R(destReg, rs1, rs2)); - } - - assert(snippet.size() - initSize == getGenAddressCopySize(loadInst) && - "Unexpected number of instructions in code sequence for address copy"); -} - -void InstManip::generateParamStore(LogicalRegister src, - StackOffset off) -{ - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - std::vector& snippet = *m_pCurrSnippet; - - unsigned initSize = snippet.size(); - unsigned srcReg = m_logicalToActualReg[src]; - - snippet.push_back(MK_STX_STACK(srcReg, BIAS + off)); - - assert(snippet.size() - initSize == getGenParamStoreSize() && - "Unexpected number of instructions in code sequence for parameter store"); -} - -void InstManip::generateCall(uint64_t dest, - uint64_t slotBase) -{ - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - std::vector& snippet = *m_pCurrSnippet; - - unsigned initSize = snippet.size(); - - // Calculate address of call instruction from slotBase - uint64_t callInstAddr = slotBase + getInstWidth() * snippet.size(); - - // Add call instruction and nop (for call delay slot) to code snippet. - snippet.push_back(getCallInst(dest, callInstAddr)); - snippet.push_back(NOP_INST); - - assert(snippet.size() - initSize == getGenCallSize() && - "Unexpected number of instructions in code sequence for call"); -} - -unsigned InstManip::getRestoreInst() const -{ - // restore %g0, 0, %g0 - return MK_RESTORE_IMM(R_G0, R_G0, 0); -} - -void InstManip::generateRestore() -{ - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - std::vector& snippet = *m_pCurrSnippet; - - unsigned initSize = snippet.size(); - - snippet.push_back(getRestoreInst()); - - assert(snippet.size() - initSize == getGenRestoreSize() && - "Unexpected number of instructions in code sequence for restore"); -} - -void InstManip::generateSave() -{ - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - std::vector& snippet = *m_pCurrSnippet; - - unsigned initSize = snippet.size(); - - // save %sp, -176, %sp - snippet.push_back(MK_SAVE_IMM(R_O6, R_O6, -176)); - - assert(snippet.size() - initSize == getGenSaveSize() && - "Unexpected number of instructions in code sequence for save"); -} - -// TODO: It will be worthwhile to generate calls to functions that spill/restore the -// shared registers instead of dumping all of the code into the current snippet. - -void InstManip::generateRestoreShared(uint64_t restoreFromAddr, - LogicalRegister tmp1, - LogicalRegister tmp2) -{ - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - assert(tmp1 != tmp2 && "Distinct logical registers required"); - - std::vector& snippet = *m_pCurrSnippet; - unsigned initSize = snippet.size(); - unsigned tmpReg = m_logicalToActualReg[tmp1]; - - generateLoad(restoreFromAddr, tmp1, tmp2); - snippet.push_back(MK_LOAD_IMM(R_G1, tmpReg, 8)); - snippet.push_back(MK_LOAD_IMM(R_G2, tmpReg, 16)); - snippet.push_back(MK_LOAD_IMM(R_G3, tmpReg, 24)); - snippet.push_back(MK_LOAD_IMM(R_G4, tmpReg, 32)); - snippet.push_back(MK_LOAD_IMM(R_G5, tmpReg, 40)); - snippet.push_back(MK_LOAD_IMM(R_G6, tmpReg, 48)); - snippet.push_back(MK_LOAD_IMM(R_G7, tmpReg, 56)); - - assert(snippet.size() - initSize == getGenRestoreSharedSize() && - "Unexpected number of instructions in code sequence for restore shared"); } -void InstManip::generateSpillShared(uint64_t spillToAddr, - LogicalRegister tmp1, - LogicalRegister tmp2) +InstManip::~InstManip() { - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - assert(tmp1 != tmp2 && "Distinct logical registers required"); - - std::vector& snippet = *m_pCurrSnippet; - unsigned initSize = snippet.size(); - unsigned tmpReg = m_logicalToActualReg[tmp1]; - - generateLoad(spillToAddr, tmp1, tmp2); - snippet.push_back(MK_STORE_IMM(R_G1, tmpReg, 8)); - snippet.push_back(MK_STORE_IMM(R_G2, tmpReg, 16)); - snippet.push_back(MK_STORE_IMM(R_G3, tmpReg, 24)); - snippet.push_back(MK_STORE_IMM(R_G4, tmpReg, 32)); - snippet.push_back(MK_STORE_IMM(R_G5, tmpReg, 40)); - snippet.push_back(MK_STORE_IMM(R_G6, tmpReg, 48)); - snippet.push_back(MK_STORE_IMM(R_G7, tmpReg, 56)); - - assert(snippet.size() - initSize == getGenSpillSharedSize() && - "Unexpected number of instructions in code sequence for spill shared"); -} - -void InstManip::generateBranchAlways(uint64_t dest, - uint64_t slotBase, - unsigned delaySlotInstr) -{ - assert(m_pCurrSnippet && "Invalid snippet for code generation"); - std::vector& snippet = *m_pCurrSnippet; - - unsigned initSize = snippet.size(); - - // Calculate address of branch instruction from slotBase - uint64_t branchInstAddr = slotBase + getInstWidth() * snippet.size(); - - // Add branch instruction and the specified delay slot instruction to code snippet. - snippet.push_back(getBranchAlways(dest, branchInstAddr, false)); // annul bit low - snippet.push_back(delaySlotInstr); - - assert(snippet.size() - initSize == getGenBranchAlwaysSize() && - "Unexpected number of instruction in code sequence for branch-always"); -} - -void InstManip::findCandidates(uint64_t start, - uint64_t end, - std::vector& candidates) -{ - for(uint64_t currAddr = start; currAddr <= end; currAddr += getInstWidth()) { - InstCandidate cand; - if(isCandidateLoad(currAddr, end, cand)) - candidates.push_back(cand); - } -} - -static inline bool isLoadHalfWord(unsigned inst) -{ - // Returns true if inst is an LDUH instruction - return RD_FLD(inst, INSTR_OP) == OP_3 && - RD_FLD(inst, INSTR_OP3) == OP3_LDUH; -} - -static inline bool isLoadByte(unsigned inst) -{ - // Returns true if inst is a LDUB instruction - return RD_FLD(inst, INSTR_OP) == OP_3 && - RD_FLD(inst, INSTR_OP3) == OP3_LDUB; -} - -static inline bool isFPRelative(unsigned inst) -{ - return RD_FLD(inst, INSTR_RS1) == R_BP && RD_FLD(inst, INSTR_I) == 1; -} - -static inline bool isSTH(unsigned inst) -{ - return RD_FLD(inst, INSTR_OP) == OP_3 && - RD_FLD(inst, INSTR_OP3) == OP3_STH; -} - -static inline bool isSTB(unsigned inst) -{ - return RD_FLD(inst, INSTR_OP) == OP_3 && - RD_FLD(inst, INSTR_OP3) == OP3_STB; -} - -static inline unsigned getLoadDest(unsigned inst) -{ - // Assumes that inst is a load instruction, and returns the register ID of its - // destination operand. - - return RD_FLD(inst, INSTR_RD); -} - -static inline unsigned getStoreSrc(unsigned inst) -{ - // Assumes that inst is a stb/sth instruction, and returns the register ID of its - // source operand (by source, we don't mean rs1 or rs2, but rather rd, which specifies - // the register which contains the value being stored); - - return RD_FLD(inst, INSTR_RD); -} - -static inline unsigned getFPOffset(unsigned inst) -{ - assert(isFPRelative(inst) && "Expect instruction to be FP-relative"); - return RD_FLD(inst, INSTR_SIMM13); -} - -bool InstManip::determineSchema(InstCandidate& cand, - uint64_t end, - std::pair& load, - std::pair& store) -{ - // inst1 contains the load instruction (the actual candidate). inst2 contains the - // corresponding store instruction, which is either STB or STH. If STB, take actions - // for schema 1, and if STH, schema 2. - - if(isSTB(store.second)) { - // Schema 1: "direct" pattern - cand.setType(InstCandidate::DIRECT); - cand.push_back(load); - cand.push_back(store); - return true; - } - else { - assert(isSTH(store.second) && "Instruction must be STH"); - - // We have potentially discovered an instance of schema 2, but must search - // more to determine if this is the case. - // - // KIS heuristic concession: The STH given by storeInst *must* be storing to the stack - // in an fp-relative manner; if not, we deny the originating load's candidacy. - - if(isFPRelative(store.second)) { - // Search forward until a LDUB from same stack location (+1) as the STH wrote to - // is encountered. The +1 in specified in the FP offset we're searching for is - // due to the fact that we stored a half-word but are loading a byte. - - if(uint64_t stkLoadAddr = findNextStackLoad(store.first, end, getFPOffset(store.second) + 1)) { - // Last schema-2 search: find the STB instruction that stores from the - // LDUB's destination register. - - unsigned ldubInst = m_pVM->readInstrFrmVm(stkLoadAddr); - uint64_t stbAddr = findNextStore(stkLoadAddr, end, getLoadDest(ldubInst)); - unsigned stbInst; - - if(stbAddr && isSTB((stbInst = m_pVM->readInstrFrmVm(stbAddr)))) { - - // All of the criteria have been met for Schema 2, the "stack transfer" - // pattern. - - cand.setType(InstCandidate::STACK_XFER); - cand.push_back(load); - cand.push_back(store); - cand.push_back(stkLoadAddr, ldubInst); - cand.push_back(stbAddr, stbInst); - return true; - } - } - } - } - - return false; -} - -bool InstManip::isCandidateLoad(uint64_t addr, - uint64_t end, - InstCandidate& cand) -{ - // {{{ Description of heuristic - - // A candidate load is the first instruction in a sequence (with an arbitrary number - // of instructions in between elements of this sequence) that is a "signature" for the - // particular load of a volatile variable which needs to be replaced with a call to an - // instrumentation function. - // - // Detecting this candidacy condition is accomplished via the application of a - // relatively simple heurstic. The signature sequence always begins with a "load - // half-word" and ends with a "store byte". However, we cannot guarantee that the - // sequence looks like: - // - // lduh [mem1], %r[d] | - // ... | "Schema 1" - // stb %r[d], [mem2] | - // - // although this is a perfectly valid pattern to look for. However, unoptimized code - // will frequently transfer this data using the stack, as in this instruction sequence: - // - // lduh [mem1] %r[d] | - // ... | - // sth %r[d], [stack loc] | - // ... | "Schema 2" - // lduh [stack loc], %r[d'] | - // ... | - // stb %r[d'], [mem2] | - // - // The current heurstic catches both of these patterns (designated "direct" and "stack - // transfer" respectively), and will be extended as insufficiencies in the heuristic - // are revealed. - - // }}} - - // Address of potential candidate load is given by 'addr', maximum search address is - // given by 'end' - - unsigned inst = m_pVM->readInstrFrmVm(addr); - - if(isLoadHalfWord(inst)) { - // Search forward until a sth/stb from inst's target register is encountered - if(uint64_t storeAddr = findNextStore(addr, end, getLoadDest(inst))) { - - // If STB, take actions for schema 1, otherwise check for schema 2 conditions. - - unsigned storeInst = m_pVM->readInstrFrmVm(storeAddr); - std::pair inst1(addr, inst); - std::pair inst2(storeAddr, storeInst); - - return determineSchema(cand, end, inst1, inst2); - } - } - - return false; -} - -uint64_t InstManip::findNextStackLoad(uint64_t addr, - uint64_t end, - unsigned fpOffset) -{ - // Sweep the range of addresses starting at addr, up to end, looking for a load byte - // that is loading from [%fp + fpOffset]. Return the first such instance, or 0 is such - // an instance cannot be found. - - for(uint64_t currAddr = addr; currAddr <= end; currAddr += getInstWidth()) { - unsigned inst = m_pVM->readInstrFrmVm(currAddr); - - if(isLoadByte(inst) && isFPRelative(inst) && getFPOffset(inst) == fpOffset) - return currAddr; - } - - return 0; -} - -uint64_t InstManip::findNextStore(uint64_t addr, - uint64_t end, - unsigned srcReg) -{ - // Sweep the range of addresses starting at addr, up to end, looking for stb or sth - // instructions that are storing _from_ 'fromReg'. Return the first such instance, or - // 0 if such an instance cannot be found. - - for(uint64_t currAddr = addr; currAddr <= end; currAddr += getInstWidth()) { - unsigned inst = m_pVM->readInstrFrmVm(currAddr); - if(isSTH(inst) || isSTB(inst) && getStoreSrc(inst) == srcReg) - return currAddr; - } - - return 0; } Index: llvm/lib/Reoptimizer/Inst/InstManip.h diff -u llvm/lib/Reoptimizer/Inst/InstManip.h:1.10 llvm/lib/Reoptimizer/Inst/InstManip.h:1.11 --- llvm/lib/Reoptimizer/Inst/InstManip.h:1.10 Tue Apr 29 21:08:42 2003 +++ llvm/lib/Reoptimizer/Inst/InstManip.h Tue Apr 29 22:08:03 2003 @@ -2,24 +2,63 @@ // programmer: Joel Stanley // date: Tue Apr 8 22:42:14 CDT 2003 // fileid: InstManip.h -// purpose: InstManip is a wrapper class around any BinInterface macros/mechanisms, as -// well as the TraceCache "instruction utilities", all which are (currently) -// SparcV9-specific. This class exists both for conceptual clarity and to facilitate -// the hiding of Sparc-specific code from the Phase 2-4 actions (and thus making it -// easier to use the transformations on other platforms in the future; we should be -// able to change which instruction manipulator object is instantiated, after making -// the appropriate superclass, etc). +// purpose: InstManip is a (pure virtual) class that hdies platform-specific +// instruction manipulation behind a common interface, and provides clients with +// various instruction manipulation utilities. Only two relevant assumptions are made: +// +// * The TraceCache objects (TraceCache, MemoryManager, VirtualMem, etc) from the +// Reoptimizer library work in an appropriate manner on the given platform. +// +// * uint64_t is used for addresses, and unsigned is used for instruction words. +// +// Better parameterization of type attributes (perhaps by making it a template class?) +// is on the TODO list. This is currently difficult because the aforementioned +// Reoptimizer classes are not parameterized. #ifndef _INCLUDED_INSTMANIP_H #define _INCLUDED_INSTMANIP_H #include #include -#include "llvm/Reoptimizer/BinInterface/sparcdis.h" -#include "llvm/Reoptimizer/InstrUtils.h" // getCallInstr, getUndepJumpInstr, etc. class VirtualMem; +class InstManip +{ + public: + InstManip(VirtualMem* vm); + virtual ~InstManip(); + + // Logical registers used by clients of this class, mapped to machine-specific IDs + // by the logical -> actual register map. + enum LogicalRegister { + REG_0, + REG_1, + REG_2 + }; + + virtual void printRange(unsigned* start, unsigned* end) const = 0; + virtual void printInst(unsigned inst) const = 0; + virtual unsigned getInstWidth() const = 0; + + inline void printRange(uint64_t start, uint64_t end) const; + + protected: + InstManip() {} + + typedef std::map LogicalToActualRegMap; + + LogicalToActualRegMap m_logicalToActualReg; // Maps logical -> actual register + VirtualMem* m_pVM; +}; + +void InstManip::printRange(uint64_t start, uint64_t end) const +{ + printRange((unsigned*) start, (unsigned*) end); +} + +//////////////// + // InstCandidate is a class that represents a location in the code that is determined to // be a candidate for instrumentation. Because the transformation action required for a // particular candidate requires auxiliary information (such as other instructions found @@ -31,8 +70,9 @@ public: enum CandType { DIRECT, STACK_XFER }; - InstCandidate() {} - InstCandidate(CandType type): m_type(type) {} + InstCandidate(): m_pIM(0) {} + InstCandidate(InstManip* pIM): m_pIM(pIM) {} + InstCandidate(InstManip* pIM, CandType type): m_pIM(pIM), m_type(type) {} void setType(CandType type) { m_type = type; } bool isDirect() const { return m_type == DIRECT; } @@ -66,203 +106,15 @@ void print(std::ostream& ostr) const; protected: - CandType m_type; + InstManip* m_pIM; + CandType m_type; // Each element of this vector holds a (address, inst) pair. std::vector > m_insts; -}; - -std::ostream& operator<<(std::ostream& ostr, const InstCandidate& cand); - -class InstManip -{ - public: - InstManip(VirtualMem* vm); - - typedef std::pair Inst; // (location, inst word) pair - - // Logical registers used by clients of this class, mapped to machine-specific IDs - // by the logical -> actual register map. - enum LogicalRegister { - REG_0, - REG_1, - REG_2 - }; - // Offsets in stack frame for function parameters - enum StackOffset { - PARAM_0 = 128, - PARAM_1 = 136 - }; - - void printRange(unsigned* start, unsigned* end) const; - inline void printRange(uint64_t start, uint64_t end) const; - - inline void printInst(unsigned inst) const; - inline void printInst(unsigned* instAddr) const; - - uint64_t skipFunctionHdr(uint64_t addr) const; - - void startCode(std::vector& snippet) { m_pCurrSnippet = &snippet; } - void endCode() { m_pCurrSnippet = 0; } - - void generateAddressCopy(unsigned loadInst, - LogicalRegister dest, - bool afterSave); - - void generateBranchAlways(uint64_t dest, - uint64_t slotBase, - unsigned delaySlotInstr = NOP_INST); - - void generateCall(uint64_t dest, uint64_t slotBase); - - void generateLoad(uint64_t value, - LogicalRegister dest, - LogicalRegister tmp); - - void generateParamStore(LogicalRegister src, StackOffset off); - - void generateRestore(); - void generateSave(); - - void generateRestoreShared(uint64_t restoreFromAddr, - LogicalRegister tmp1 = REG_0, - LogicalRegister tmp2 = REG_1); - - void generateSpillShared(uint64_t spillFromAddr, - LogicalRegister tmp1 = REG_0, - LogicalRegister tmp2 = REG_1); - - void findCandidates(uint64_t start, - uint64_t end, - std::vector& candidates); - - unsigned getRestoreInst() const; - inline unsigned getBranchAlways(uint64_t dest, uint64_t pc, bool annulHigh = true) const; - inline unsigned getCallInst(uint64_t dest, uint64_t pc) const; - inline bool isBranch(unsigned inst) const; - - // These are functions so when InstManip is superclassed, they'd become virtual, etc. - // In the short term we could use class constants, but this is more clear. - - unsigned getNOP() const { return NOP_INST; } - unsigned getGenLoadSize() const { return 6; } - unsigned getGenCallSize() const { return 2; } - unsigned getGenBranchAlwaysSize() const { return 2; } - unsigned getGenSaveSize() const { return 1; } - unsigned getGenParamStoreSize() const { return 1; } - unsigned getGenSpillSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } - unsigned getGenRestoreSharedSize() const { return getGenLoadSize() + SHARED_SIZE; } - unsigned getGenRestoreSize() const { return 1; } - unsigned getInstWidth() const { return 4; } - unsigned getSharedSize() const { return SHARED_SIZE; } - - inline unsigned getGenAddressCopySize(unsigned loadInst) const; - - uint64_t getPhase3SpillAddr() { return (uint64_t) sm_phase3SpillRegion; } - - private: - InstManip() {} - typedef std::map LogicalToActualRegMap; - typedef std::map OutputToInputRegMap; - - bool isCandidateLoad(uint64_t addr, - uint64_t end, - InstCandidate& cand); - - bool determineSchema(InstCandidate& cand, - uint64_t end, - std::pair& load, - std::pair& store); - - uint64_t findNextStore(uint64_t addr, - uint64_t end, - unsigned srcReg); - - uint64_t findNextStackLoad(uint64_t addr, - uint64_t end, - unsigned fpOffset); - - VirtualMem* m_pVM; - std::vector* m_pCurrSnippet; - LogicalToActualRegMap m_logicalToActualReg; // Maps logical -> actual register - OutputToInputRegMap m_outputToInputReg; // Maps input register -> output register - - // Branch-always (annul bit high) instruction base (i.e., address not filled in yet) - static const unsigned BRANCH_ALWAYS_BASE_ANNUL; - - // Branch-always (annul bit low) instruction base (i.e., address not filled in yet) - static const unsigned BRANCH_ALWAYS_BASE; - - // NOP instruction - static const unsigned NOP_INST; - - // Size (in number of 64-bit words) required for storing shared registers - static const unsigned SHARED_SIZE = 7; - - // Sparc-specific constant used in SP manipulations - static const unsigned BIAS; - - // Memory region into which to spill shared registers when executing a phase 4 slot - // (i.e., the slot that invokes the phase4 function, the slot written by phase 3 - // invocations). NB: One region is sufficient and we do not need stack semantics - // because only one activation of a phase 4 slot ever occurs at a given time (assuming - // single-threaded execution). - - static uint64_t sm_phase3SpillRegion[SHARED_SIZE]; }; -void InstManip::printRange(uint64_t start, uint64_t end) const -{ - printRange((unsigned*) start, (unsigned*) end); -} - -void InstManip::printInst(unsigned inst) const -{ - sparc_print(inst); - fflush(stdout); -} - -void InstManip::printInst(unsigned* instAddr) const -{ - sparc_print(*instAddr); - fflush(stdout); -} - -unsigned InstManip::getBranchAlways(uint64_t dest, uint64_t pc, bool annul) const -{ - // dest is the destination address, pc is the value of the program counter when the - // branch instruction is executed (i.e., the address of the branch instruction). NB: - // Only handles branch-always-annul-high at the moment - - return getUndepJumpInstr(annul ? BRANCH_ALWAYS_BASE_ANNUL : BRANCH_ALWAYS_BASE, - dest, - pc); -} - -unsigned InstManip::getCallInst(uint64_t dest, uint64_t pc) const -{ - // dest is the destination address to call, pc is the value of the program counter - // when the call instruction is executed (i.e., the address of the branch - // instruction). - - return getCallInstr(dest, pc); -} - -bool InstManip::isBranch(unsigned inst) const -{ - return ::isBranchInstr(inst); -} - -unsigned InstManip::getGenAddressCopySize(unsigned loadInst) const -{ - // Determine the number of instructions required to load the address value used by the - // load instruction into some register. - - // Case 1: load is immediate-valued --> add-immediate instruction needed, size is 1 inst - // Case 2: load is register-valued --> add-registers instruction needed, size is 1 inst - - return 1; -} +std::ostream& operator<<(std::ostream& ostr, const InstCandidate& cand); #endif // _INCLUDED_INSTMANIP_H + Index: llvm/lib/Reoptimizer/Inst/Phases.cpp diff -u llvm/lib/Reoptimizer/Inst/Phases.cpp:1.15 llvm/lib/Reoptimizer/Inst/Phases.cpp:1.16 --- llvm/lib/Reoptimizer/Inst/Phases.cpp:1.15 Tue Apr 29 21:08:42 2003 +++ llvm/lib/Reoptimizer/Inst/Phases.cpp Tue Apr 29 22:08:03 2003 @@ -79,7 +79,8 @@ #include "llvm/Reoptimizer/MemoryManager.h" #include "ElfReader.h" -#include "InstManip.h" +//#include "InstManip.h" +#include "SparcInstManip.h" using std::vector; using std::cerr; @@ -108,13 +109,15 @@ uint64_t replaceAddr, uint64_t slotDescriptor, unsigned slotSize, - TraceCache* pTraceCache): + TraceCache* pTraceCache, + SparcInstManip* pInstManip): m_addrRange(addressRange), m_origInst(origInst), m_replaceAddr(replaceAddr), m_slotDescriptor(slotDescriptor), m_slotSize(slotSize), - m_pTraceCache(pTraceCache) + m_pTraceCache(pTraceCache), + m_pInstManip(pInstManip) { } @@ -134,7 +137,8 @@ uint64_t getReplaceAddr() const { return m_replaceAddr; } uint64_t getSlot() const { return m_slotDescriptor; } uint64_t getSlotSize() const { return m_slotSize; } - TraceCache* getTraceCache() { return m_pTraceCache; } + TraceCache* getTraceCache() { return m_pTraceCache; } + SparcInstManip* getIM() { return m_pInstManip; } private: Phase3Info() {} @@ -145,6 +149,7 @@ uint64_t m_slotDescriptor; // Slot created by phase 2 unsigned m_slotSize; // Size of slot created by phase 2 TraceCache* m_pTraceCache; // TraceCache instance used by phase 2 + SparcInstManip* m_pInstManip; // The InstManip instance to pass to the next phase }; class Phase4Info @@ -153,18 +158,21 @@ Phase4Info(const InstCandidate& candidate, uint64_t slotDescriptor, uint64_t slotSize, - TraceCache* pTraceCache): + TraceCache* pTraceCache, + SparcInstManip* pInstManip): m_candidate(candidate), m_slotDescriptor(slotDescriptor), m_slotSize(slotSize), - m_pTraceCache(pTraceCache) + m_pTraceCache(pTraceCache), + m_pInstManip(pInstManip) { } const InstCandidate& getCandidate() const { return m_candidate; } uint64_t getSlot() const { return m_slotDescriptor; } uint64_t getSlotSize() const { return m_slotSize; } - TraceCache* getTraceCache() { return m_pTraceCache; } + TraceCache* getTraceCache() { return m_pTraceCache; } + SparcInstManip* getIM() { return m_pInstManip; } private: Phase4Info() {} @@ -173,6 +181,7 @@ uint64_t m_slotDescriptor; // Slot created by phase 3 unsigned m_slotSize; // Size of slot created by phase 3 TraceCache* m_pTraceCache; // TraceCache instance used by phases 2 and 3 + SparcInstManip* m_pInstManip; // The InstManip instance to pass to the next phase }; void phase3(Phase3Info* p3info); @@ -184,16 +193,16 @@ class Phase2 { public: - Phase2(TraceCache* pTraceCache); + Phase2(TraceCache* pTraceCache, SparcInstManip* pIM); void transform(); void transformFunction(AddressRange& range); private: - Phase2(): m_instManip(0) {} + Phase2() {} inline unsigned getSlotSize() const; TraceCache* m_pTraceCache; - InstManip m_instManip; + SparcInstManip* m_pInstManip; static uint64_t* sm_pSpillRegion; // Base pointer to the spill region for phase 3 invocations static uint64_t* sm_pCurrSpill; // Pointer to current location in the spill region @@ -215,14 +224,14 @@ void transform(); private: - Phase3(): m_instManip(0) {} + Phase3() {} void processCandidates(vector& candidates); inline unsigned getSlotSize(InstCandidate&) const; Phase3Info* m_pPhase3Info; TraceCache* m_pTraceCache; - InstManip m_instManip; + SparcInstManip* m_pInstManip; }; // Phase4 is the class that is responsible for making the "phase 4" transformation; the @@ -238,13 +247,13 @@ void transform(); private: - Phase4(): m_instManip(0) {} + Phase4() {} inline unsigned getSlotSize() const; Phase4Info* m_pPhase4Info; TraceCache* m_pTraceCache; - InstManip m_instManip; + SparcInstManip* m_pInstManip; uint64_t m_tag; // Entry to look for in the GBT }; @@ -252,13 +261,15 @@ extern "C" void phase2() { - Phase2 ph(new TraceCache()); + TraceCache* pTC = new TraceCache(); + SparcInstManip* pIM = new SparcInstManip(pTC->getVM()); + Phase2 ph(pTC, pIM); ph.transform(); } -Phase2::Phase2(TraceCache* tc): +Phase2::Phase2(TraceCache* tc, SparcInstManip* pInstManip): m_pTraceCache(tc), - m_instManip(tc->getVM()) + m_pInstManip(pInstManip) { } @@ -288,7 +299,7 @@ // invocations. We allocate one unit of space (given by InstManip::getSharedSize()) // for each function that we transform. - sm_pSpillRegion = new uint64_t[m_instManip.getSharedSize() * funcs.size()]; + sm_pSpillRegion = new uint64_t[m_pInstManip->getSharedSize() * funcs.size()]; sm_pCurrSpill = sm_pSpillRegion; for(vector >::iterator i = funcs.begin(), @@ -320,49 +331,49 @@ // Obtain address of first replacable instruction in function and obtain a new slot from // the TraceCache memory manager (i.e., a new slot in the dummy function). - uint64_t repInstAddr = m_instManip.skipFunctionHdr(range.first); + uint64_t repInstAddr = m_pInstManip->skipFunctionHdr(range.first); uint64_t slotBase = m_pTraceCache->getMemMgr()->getMemory(getSlotSize()); assert(slotBase && "Unable to obtain memory from MemoryManger instance"); // Replace instruction at repInstAddr with a branch to start of slot. VirtualMem* vm = m_pTraceCache->getVM(); unsigned origInst = vm->readInstrFrmVm(repInstAddr); - assert(!m_instManip.isBranch(origInst) && + assert(!m_pInstManip->isBranch(origInst) && "Unhandled case: branch instruction first in function body"); - vm->writeInstToVM(repInstAddr, m_instManip.getBranchAlways(slotBase, repInstAddr)); + vm->writeInstToVM(repInstAddr, m_pInstManip->getBranchAlways(slotBase, repInstAddr)); // Generate the phase 3 slot. See picture of phase 3 slot contents for more info. Phase3Info* p3info = new Phase3Info(range, origInst, repInstAddr, - slotBase, getSlotSize(), m_pTraceCache); + slotBase, getSlotSize(), m_pTraceCache, m_pInstManip); vector snippet; - m_instManip.startCode(snippet); + m_pInstManip->startCode(snippet); - m_instManip.generateSave(); - m_instManip.generateSpillShared((uint64_t) sm_pCurrSpill); - m_instManip.generateLoad((uint64_t) p3info, InstManip::REG_0, InstManip::REG_1); - m_instManip.generateCall((uint64_t) &phase3, slotBase); - m_instManip.generateRestoreShared((uint64_t) sm_pCurrSpill); - m_instManip.generateBranchAlways(repInstAddr, slotBase, m_instManip.getRestoreInst()); + m_pInstManip->generateSave(); + m_pInstManip->generateSpillShared((uint64_t) sm_pCurrSpill); + m_pInstManip->generateLoad((uint64_t) p3info, InstManip::REG_0, InstManip::REG_1); + m_pInstManip->generateCall((uint64_t) &phase3, slotBase); + m_pInstManip->generateRestoreShared((uint64_t) sm_pCurrSpill); + m_pInstManip->generateBranchAlways(repInstAddr, slotBase, m_pInstManip->getRestoreInst()); - m_instManip.endCode(); + m_pInstManip->endCode(); // Dump snippet instructions: cerr << "phase3 slot instructions:" << endl; for(vector::iterator j = snippet.begin(), k = snippet.end(); j != k; ++j) { - m_instManip.printInst(*j); + m_pInstManip->printInst(*j); cerr << endl; } // Bump the current spill pointer to the next "spill slot" in the spill region used // before/after phase3() invocations. - sm_pCurrSpill += m_instManip.getSharedSize(); + sm_pCurrSpill += m_pInstManip->getSharedSize(); // Copy the snippet code into the slot assert(snippet.size() == getSlotSize() && "Snippet size does not match slot size"); - copySnippetToSlot(snippet, slotBase, vm, m_instManip); + copySnippetToSlot(snippet, slotBase, vm, *m_pInstManip); } unsigned Phase2::getSlotSize() const @@ -370,12 +381,12 @@ // The following sum corresponds to the sizes consumed by the various regions of the // phase 2 slot. See picture of phase 2 contents for details. - return m_instManip.getGenSaveSize() + - m_instManip.getGenSpillSharedSize() + - m_instManip.getGenLoadSize() + - m_instManip.getGenCallSize() + - m_instManip.getGenRestoreSharedSize() + - m_instManip.getGenBranchAlwaysSize(); + return m_pInstManip->getGenSaveSize() + + m_pInstManip->getGenSpillSharedSize() + + m_pInstManip->getGenLoadSize() + + m_pInstManip->getGenCallSize() + + m_pInstManip->getGenRestoreSharedSize() + + m_pInstManip->getGenBranchAlwaysSize(); } //////////////// Phase3 implementation //////////////// @@ -389,7 +400,7 @@ Phase3::Phase3(Phase3Info* p3info): m_pPhase3Info(p3info), m_pTraceCache(p3info->getTraceCache()), - m_instManip(p3info->getTraceCache()->getVM()) + m_pInstManip(p3info->getIM()) { cerr << "================ Begin Phase 3 [" << std::hex << m_pPhase3Info->getStartAddr() << ", " << m_pPhase3Info->getEndAddr() @@ -423,7 +434,7 @@ static uint64_t replaceInstWithBrToSlot(uint64_t srcAddr, unsigned slotSize, TraceCache* tc, - InstManip& im) + SparcInstManip& im) { // Obtain a new slot of the given size uint64_t slotBase = tc->getMemMgr()->getMemory(slotSize); @@ -445,16 +456,16 @@ // Replace load candidate instruction with a branch to the start of a new slot. uint64_t slotBase = replaceInstWithBrToSlot(i->front().first, getSlotSize(*i), - m_pTraceCache, m_instManip); + m_pTraceCache, *m_pInstManip); // Generate the phase 4 slot. See picture of phase 4 slot contents for more info. - Phase4Info* p4info = new Phase4Info(*i, slotBase, getSlotSize(*i), m_pTraceCache); + Phase4Info* p4info = new Phase4Info(*i, slotBase, getSlotSize(*i), m_pTraceCache, m_pInstManip); - uint64_t spillAddr = m_instManip.getPhase3SpillAddr(); + uint64_t spillAddr = m_pInstManip->getPhase3SpillAddr(); vector snippet; - m_instManip.startCode(snippet); + m_pInstManip->startCode(snippet); // NB: We pass parameters to the phase4 function in REG_0 and REG_1 on the // assumption that the input parameters will be looked for there. However, it is @@ -462,30 +473,30 @@ // fixed offsets from the stack pointer. Hence, we store the parameters there as // well. - m_instManip.generateSave(); - m_instManip.generateAddressCopy(i->front().second, InstManip::REG_0, true); // REG_0 live to call - m_instManip.generateParamStore(InstManip::REG_0, InstManip::PARAM_0); - m_instManip.generateSpillShared(spillAddr, InstManip::REG_1, InstManip::REG_2); - m_instManip.generateLoad((uint64_t) p4info, InstManip::REG_1, InstManip::REG_2); // REG_1 live to call - m_instManip.generateParamStore(InstManip::REG_1, InstManip::PARAM_1); - m_instManip.generateCall((uint64_t) &phase4, slotBase); - m_instManip.generateRestoreShared(spillAddr); - m_instManip.generateBranchAlways(i->front().first, slotBase, m_instManip.getRestoreInst()); + m_pInstManip->generateSave(); + m_pInstManip->generateAddressCopy(i->front().second, InstManip::REG_0, true); // REG_0 live to call + m_pInstManip->generateParamStore(InstManip::REG_0, SparcInstManip::PARAM_0); + m_pInstManip->generateSpillShared(spillAddr, InstManip::REG_1, InstManip::REG_2); + m_pInstManip->generateLoad((uint64_t) p4info, InstManip::REG_1, InstManip::REG_2); // REG_1 live to call + m_pInstManip->generateParamStore(InstManip::REG_1, SparcInstManip::PARAM_1); + m_pInstManip->generateCall((uint64_t) &phase4, slotBase); + m_pInstManip->generateRestoreShared(spillAddr); + m_pInstManip->generateBranchAlways(i->front().first, slotBase, m_pInstManip->getRestoreInst()); - m_instManip.endCode(); + m_pInstManip->endCode(); // Dump snippet instructions: cerr << "phase4 slot instructions:" << endl; for(vector::iterator j = snippet.begin(), k = snippet.end(); j != k; ++j) { - m_instManip.printInst(*j); + m_pInstManip->printInst(*j); cerr << endl; } // Copy the snippet code into the slot assert(snippet.size() == getSlotSize(*i) && "Snippet size does not match slot size"); - copySnippetToSlot(snippet, slotBase, m_pTraceCache->getVM(), m_instManip); + copySnippetToSlot(snippet, slotBase, m_pTraceCache->getVM(), *m_pInstManip); // just one candidate for now break; @@ -497,22 +508,22 @@ // The following sum corresponds to the sizes consumed by the various regions of the // phase 3 slot. See picture of phase 3 contents for details. - return m_instManip.getGenSaveSize() + - m_instManip.getGenAddressCopySize(cand.front().second) + - m_instManip.getGenParamStoreSize() + - m_instManip.getGenSpillSharedSize() + - m_instManip.getGenLoadSize() + - m_instManip.getGenParamStoreSize() + - m_instManip.getGenCallSize() + - m_instManip.getGenRestoreSharedSize() + - m_instManip.getGenBranchAlwaysSize(); + return m_pInstManip->getGenSaveSize() + + m_pInstManip->getGenAddressCopySize(cand.front().second) + + m_pInstManip->getGenParamStoreSize() + + m_pInstManip->getGenSpillSharedSize() + + m_pInstManip->getGenLoadSize() + + m_pInstManip->getGenParamStoreSize() + + m_pInstManip->getGenCallSize() + + m_pInstManip->getGenRestoreSharedSize() + + m_pInstManip->getGenBranchAlwaysSize(); } void Phase3::transform() { // 2. Analyze the function and determine the load-volatile candidates... vector candidates; - m_instManip.findCandidates(m_pPhase3Info->getStartAddr(), + m_pInstManip->findCandidates(m_pPhase3Info->getStartAddr(), m_pPhase3Info->getEndAddr(), candidates); @@ -533,7 +544,7 @@ Phase4::Phase4(uint64_t tag, Phase4Info* p4info): m_pPhase4Info(p4info), m_pTraceCache(p4info->getTraceCache()), - m_instManip(p4info->getTraceCache()->getVM()), + m_pInstManip(p4info->getIM()), m_tag(tag) { cerr << "phase4 ctor: tag is " << tag << endl; @@ -609,7 +620,7 @@ VirtualMem* vm = m_pTraceCache->getVM(); for(vector >::const_iterator i = cand.getInsts().begin() + 1, e = cand.getInsts().end(); i != e; ++i) - vm->writeInstToVM(i->first, m_instManip.getNOP()); + vm->writeInstToVM(i->first, m_pInstManip->getNOP()); // Write the instructions to call the instrumentation function From brukman at cs.uiuc.edu Wed Apr 30 00:05:03 2003 From: brukman at cs.uiuc.edu (Michael Brukman) Date: Wed Apr 30 00:05:03 2003 Subject: [llvm-commits] CVS: llvm/www/www-index.html Message-ID: <200304300504.AAA01784@tank.cs.uiuc.edu> Changes in directory llvm/www: www-index.html updated: 1.23 -> 1.24 --- Log message: Cleaned up usage of and to make things consistent. --- Diffs of the changes: Index: llvm/www/www-index.html diff -u llvm/www/www-index.html:1.23 llvm/www/www-index.html:1.24 --- llvm/www/www-index.html:1.23 Tue Apr 29 21:48:42 2003 +++ llvm/www/www-index.html Wed Apr 30 00:03:55 2003 @@ -398,7 +398,7 @@ research projects in our group. Here are some of the projects currently active: