From sabre at nondot.org Mon Mar 31 11:03:01 2003 From: sabre at nondot.org (Chris Lattner) Date: Mon Mar 31 11:03:01 2003 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/ModuloScheduling/Makefile ModuloSchedGraph.cpp ModuloSchedGraph.h ModuloScheduling.cpp ModuloScheduling.h In-Reply-To: <200303271757.LAA04117@psmith.cs.uiuc.edu> Message-ID: > Makefile added (r1.1) > ModuloSchedGraph.cpp added (r1.1) > ModuloSchedGraph.h added (r1.1) > ModuloScheduling.cpp added (r1.1) > ModuloScheduling.h added (r1.1) Hey Guochun, This code doesn't compile. Specifically it cannot find "../InstrSched/SchedGraphCommon.h". Could you please check this file (and any other missing ones) in? Also, std::min and std::max are defined in , you shouldn't have to define your own versions. Thanks, -Chris From lattner at cs.uiuc.edu Mon Mar 31 11:30:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 11:30:01 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h Message-ID: <200303311729.LAA17894@apoc.cs.uiuc.edu> Changes in directory llvm/include/llvm/Transforms/Utils: UnifyFunctionExitNodes.h updated: 1.13 -> 1.14 --- Log message: Add getAnalysisUsage method --- Diffs of the changes: Index: llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h diff -u llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h:1.13 llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h:1.14 --- llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h:1.13 Wed Aug 21 12:09:26 2002 +++ llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h Mon Mar 31 11:29:18 2003 @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_XFORMS_UNIFY_FUNCTION_EXIT_NODES_H -#define LLVM_XFORMS_UNIFY_FUNCTION_EXIT_NODES_H +#ifndef LLVM_TRANSFORMS_UNIFYFUNCTIONEXITNODES_H +#define LLVM_TRANSFORMS_UNIFYFUNCTIONEXITNODES_H #include "llvm/Pass.h" @@ -16,6 +16,9 @@ BasicBlock *ExitNode; public: UnifyFunctionExitNodes() : ExitNode(0) {} + + // We can preserve non-critical-edgeness when we unify function exit nodes + virtual void getAnalysisUsage(AnalysisUsage &AU) const; // getExitNode - Return the new single (or nonexistant) exit node of the CFG. // From lattner at cs.uiuc.edu Mon Mar 31 11:31:00 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 11:31:00 2003 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp Message-ID: <200303311730.LAA17917@apoc.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Utils: UnifyFunctionExitNodes.cpp updated: 1.22 -> 1.23 --- Log message: * We now preserve the no-critical-edge pass (because we cannot insert critical edges) * Small modification to be more efficient --- Diffs of the changes: Index: llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp diff -u llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp:1.22 llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp:1.23 --- llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp:1.22 Thu Sep 12 14:00:43 2002 +++ llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp Mon Mar 31 11:30:25 2003 @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/BasicBlock.h" #include "llvm/Function.h" #include "llvm/iTerminators.h" @@ -18,6 +19,11 @@ static RegisterOpt X("mergereturn", "Unify function exit nodes"); +void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ + // We preserve the non-critical-edgeness property + AU.addPreservedID(BreakCriticalEdgesID); +} + // UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new // BasicBlock, and converting all returns to unconditional branches to this // new basic block. The singular exit node is returned. @@ -47,18 +53,11 @@ // BasicBlock *NewRetBlock = new BasicBlock("UnifiedExitNode", &F); + PHINode *PN = 0; if (F.getReturnType() != Type::VoidTy) { // If the function doesn't return void... add a PHI node to the block... - PHINode *PN = new PHINode(F.getReturnType(), "UnifiedRetVal"); + PN = new PHINode(F.getReturnType(), "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); - - // Add an incoming element to the PHI node for every return instruction that - // is merging into this new block... - for (vector::iterator I = ReturningBlocks.begin(), - E = ReturningBlocks.end(); I != E; ++I) - PN->addIncoming((*I)->getTerminator()->getOperand(0), *I); - - // Add a return instruction to return the result of the PHI node... NewRetBlock->getInstList().push_back(new ReturnInst(PN)); } else { // If it returns void, just add a return void instruction to the block @@ -70,9 +69,16 @@ // for (vector::iterator I = ReturningBlocks.begin(), E = ReturningBlocks.end(); I != E; ++I) { - (*I)->getInstList().pop_back(); // Remove the return insn - (*I)->getInstList().push_back(new BranchInst(NewRetBlock)); + BasicBlock *BB = *I; + + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->getInstList().pop_back(); // Remove the return insn + BB->getInstList().push_back(new BranchInst(NewRetBlock)); } ExitNode = NewRetBlock; + return true; } From lattner at cs.uiuc.edu Mon Mar 31 11:31:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 11:31:01 2003 Subject: [llvm-commits] CVS: llvm/tools/analyze/Makefile Message-ID: <200303311730.LAA17929@apoc.cs.uiuc.edu> Changes in directory llvm/tools/analyze: Makefile updated: 1.17 -> 1.18 --- Log message: Minor tweak --- Diffs of the changes: Index: llvm/tools/analyze/Makefile diff -u llvm/tools/analyze/Makefile:1.17 llvm/tools/analyze/Makefile:1.18 --- llvm/tools/analyze/Makefile:1.17 Mon Nov 4 14:50:51 2002 +++ llvm/tools/analyze/Makefile Mon Mar 31 11:30:35 2003 @@ -1,7 +1,7 @@ LEVEL = ../.. TOOLNAME = analyze USEDLIBS = asmparser bcreader scalaropts.a transforms.a analysis ipa \ - datastructure target.a transformutils.a vmcore support + datastructure target.a transformutils.a scalaropts.a vmcore support TOOLLINKOPTS = -ldl include $(LEVEL)/Makefile.common From jstanley at cs.uiuc.edu Mon Mar 31 11:38:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Mon Mar 31 11:38:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/ElfReader.cpp Message-ID: <200303311748.LAA14949@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: ElfReader.cpp added (r1.1) --- Log message: ElfReader initial checkin; client code invokes iterator method over functions in ELF symtab. --- Diffs of the changes: From jstanley at cs.uiuc.edu Mon Mar 31 11:39:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Mon Mar 31 11:39:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/ElfReader.h PerfInst.cpp design.txt Message-ID: <200303311748.LAA14964@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: ElfReader.h added (r1.1) PerfInst.cpp updated: 1.2 -> 1.3 design.txt updated: 1.3 -> 1.4 --- Log message: ElfReader initial checkin; client code invokes iterator method over functions in ELF symtab. --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/PerfInst.cpp diff -u llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.2 llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.3 --- llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.2 Mon Mar 17 18:49:31 2003 +++ llvm/lib/Reoptimizer/Inst/PerfInst.cpp Mon Mar 31 11:48:34 2003 @@ -1,79 +1,40 @@ //////////////// // programmer: Joel Stanley -// date: Mon Mar 3 13:34:13 CST 2003 +// date: Fri Mar 21 12:32:01 CST 2003 // fileid: PerfInst.cpp // purpose: Provides code for performing performance instrumentation at // runtime. The goal of the phase2 function is to implement Phase 2 // of the performance-oriented language extensions transformation. That is, // it is responsible for replacing loads of particular global volatiles and // stores of particular temporaries with appropriate calls to instrumentation -// functions. +// functions. More detail is given through the implementation. // [] -#include "llvm/Reoptimizer/TraceCache.h" -#include "llvm/Reoptimizer/VirtualMem.h" -#include "llvm/Reoptimizer/InstrUtils.h" -#include "llvm/Reoptimizer/GetTraceTime.h" -#include "llvm/Reoptimizer/Mapping/LLVMinfo.h" -#include "llvm/Bytecode/Reader.h" -#include "llvm/Module.h" -#include "llvm/iTerminators.h" -#include "llvm/Support/CFG.h" - +#include #include #include +#include + +#include "ElfReader.h" using std::vector; using std::cerr; using std::endl; -// Not sure if the following externs are required yet. -extern int llvm_length; -extern const unsigned char LLVMBytecode[]; -extern void** llvmFunctionTable[]; - -extern "C" void phase2(int methodNum) +extern "C" void phase2() { - static bool initialized = false; - static Module* pMod = 0; - static vector funcList; - - cerr << "phase2 invoked" << endl; + cerr << "============================== Begin Phase 2 ==============================\n"; - if(!initialized) { - initialized = true; - - cerr << "llvm_length is: " << llvm_length << endl; - - pMod = ParseBytecodeBuffer(LLVMBytecode, llvm_length); - assert(pMod && "Couldn't parse Module"); - - cerr << "Parsed bytecode" << endl; - - // Gather pointers to functions into funcList - for(Module::iterator i = pMod->begin(), e = pMod->end(); i != e; ++i) { - if(!i->isExternal()) - funcList.push_back(&*i); - } - } + const char* execName = getexecname(); + cerr << "Executable name is: " << execName << endl; - assert(pMod && "Module must have been parsed"); - assert(funcList[methodNum] && "Have not obtained methodNum'th function in funcList"); + ElfReader elfReader(execName); - cerr << "Dumping list of instructions in each function..." << endl; - - for(vector::iterator i = funcList.begin(), e = funcList.end(); i != e; ++i) { - cerr << "Processing function " << (*i)->getName() << endl; - for(Function::iterator bbi = (*i)->begin(), bbe = (*i)->end(); bbi != bbe; ++bbi) { - for(BasicBlock::iterator ii = bbi->begin(), ie = bbi->end(); ii != ie; ++ii) { - cerr << "Processing instruction: " << *ii << endl; - vector vec = getLLVMInstrInfo(&*ii); - cerr << "Obtained the following vector from getInstrInfo:" << endl; - for(unsigned k = 0; k < vec.size(); ++k) - cerr << vec[k] << endl; - } - } + std::string funcName; + ElfReader::AddressRange range; + while(elfReader.GetNextFunction(funcName, range)) { + cerr << "Function name is: " << funcName << endl; } -} - + cerr << "============================== End Phase 2 ==============================\n"; +} Index: llvm/lib/Reoptimizer/Inst/design.txt diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.3 llvm/lib/Reoptimizer/Inst/design.txt:1.4 --- llvm/lib/Reoptimizer/Inst/design.txt:1.3 Thu Mar 20 08:49:06 2003 +++ llvm/lib/Reoptimizer/Inst/design.txt Mon Mar 31 11:48:34 2003 @@ -455,9 +455,10 @@ {{{ MEETING MINUTES 20 Mar 2003 Agenda: - - Address pending issues already sent via e-mail. - - Confidence of approach, assurance of validity w.r.t time commitment. - - Inlining of functions and how to handle + + Things not sufficiently addressed in meeting or e-mail yet: + + - Inlining of functions and how to handle - Register schedule violation; or "how do we determine what registers should hold values when insert code?". Rather, should we simply adopt the policy @@ -467,28 +468,228 @@ analysis, at what point do we consider phase 2 "too expensive" when compared with plain old opaque function calls at instrumentation points? - - From the e-mail(s): - - (a) We have to balance the benefit of a vendor-independent implementation - vs. the opportunity to do something "more conceptually novel" with the - metrics. - - (b) We can discuss instrumenting functions at function entry; of course, - this point is moot if we do not take the binary editing approach. - - What is the purpose of "exit stubs" in Trigger/TraceCache? What is the role of the branch map and call map? - As long as the new code fits within the 64KB segment, we have the capability to add new code right? -Minutes: + {{{ Minutes: + +For thesis: We want to obtain a good quality (but not neccessarily "production +quality, fully-featured") tool. The design & implementation of such a tool *is* +sufficient for the thesis, although the thesis would be significantly +strengthened if we have "more conceptually novel" metrics in place, such as +pipeline simulation/metrics. Talked about the feasibility of being done +sometime in June, but potentially extending to July if need be. Work hard, +Joel. + +Suggested experiment to consider the varieties of indirect branch problems that +we'll encounter: write a large case statement (lots of code placed in each case, +duplicated is okay) and examine closely the indirect branches in the +compiler-generated code to determine how the table lookups occur (and if the +jump table is storing absolute or relative addresses). + +Vendor independence is a significant conceptual result for the thesis, and it is +not "all about using volatile". Rather, the key point to make is w.r.t. the +approach taken: the approach is two-phase, and transformations are done both +before the black-box compiler ever sees the code, and after it is done operating +on the code. Also, note that vendor independence and "uninhibiting +optimizations" _are_ competing goals (if we didn't care about uninhibiting +optimizations, we could place opaque calls at the instrumentation points and +look at them post-link, and if we didn't care about vendor independence we could +do much better than the two-phase approach by using compiler annotations, +etc)...our approach attempts to find a good compromise between the two goals. + +We want to make a "simple" (i.e. works in the common case) vendor-independent +implementation. + +Can ensure that a function body remains after optimization (i.e. inlining) by +printing the address of the function? + +New multi-phase approach discussed. We are not going to be using the LLVM +mapping information, and so must rely on ELF mechanisms. We want these ELF +mechanisms to be used at runtime, which requires mmapp-ing the executable or +otherwise loading it from disk. We will probably have to create a reverse +mapping of the symtable to go from address->function name (rather, function +size) so that we can obtain information about the function being instrumented. +The current plan is to transform each instrumented function on a demand-driven +basis, wherein there'd be a call to the "phase2 transformation function" that +would pass in the address of the enclosing function. [Actually, that approach +does not work if the function gets inlined elsewhere, because modification of +the function body's code will not result in the modification of the code at the +inlined sites -- let's worry about this later]. + +Talked about the need of a padding region for what is essentially a base +trampoline, placed at the end of the function and within the range of a +PC-relative jump (64K distance). Within this region of code, there'd need to be +placed code to (indirect) jump to the copied region of code (i.e., the +tracecache). Then, the start of the outermost interval is replaced with a +short-jump (w/ annulling bit set) down into the padding area, which long jumps +into the copied code region (which can be allocated on the heap now). This is +all well and good *after* the transformation has been applied, but what about +invoking the code that performs this transformation at runtime? Vikram's idea +is that we can, in fact, do the exact same thing for the transformation itself, +perhaps...more detail follows. + +Assume for the moment that we can locate the pad region easily, and that we can +distinguish two subregions within it by a label or something. (This isn't true, +but pretend that it is for now). + +... + +function entry point: + ... + ld volatile #1 [start of outermost interval] (***) + i2 + ... + ld volatile #2 [end of outermost interval] + ... + +pad_start_1: padinst 1 + padinst 2 + padinst 3 + ... + +pad_start_2: padinst k + padinst k+1 + ... + +pad_end: + ... (stuff can be moved here by compiler) + +function end point: ... + +Now, _someone_ (who? a pre-pass of sorts?) writes over the start of the +outermost interval, branching down to the pad_start_1 location. The pad +instructions in the pad_start_1 region are over-written with a call to the +"phase 2 transformation function" that performs all of the transformation on +this particular function, *including* re-writing the branch at location (***) +with a branch down to the pad_start_2 region, which contains code to perform the +longjmp to the heap-allocated instrumented code. + +This might work; however, what is the benefit between this approach and simply +having a _call_ (placed at function entry point above) to the so-called "phase 2 +transformation function"? + + {{{ E-mail sent to adve on the subject + +The implementation approach described at the end of the e-mail is the one that +I'm going to embark on. If you could provide me with your thoughts regarding +the approach in general, I'd appreciate it, so I don't waste time writing code +that we might throw away ;). + +Here's my assessment of the situation: + +Let F be a function that contains instrumentation. If F is inlined, we have the +following concerns & proposed resolutions: + + a) It is no longer identifiable by name. This has implications for locating +instrumentation, as we discussed briefly in a previous e-mail. + +I *think* that this can be taken care of by (on program startup, one-time-only) +processing the ELF symtable to construct a set of address ranges, and then +(possibly at program startup, possibly on a demand-driven basis, depending on +our implement approach) locate individual load instructions (using the "magic +heuristic") and determine their enclosing functions by looking them up in the +address range map. Can you think of a better approach or other problems that +this doesn't address? + + b) The body of F may now be enclosed in an inner loop, meaning that padding +(i.e. a small for loop) placed at the end of F becomes the innermost loop in the +calling function; this may prohibit optimizations. Likewise, if we place a +function call (i.e., placed by phase 1 for doing the transformation at runtime) +at the start of F (one of two approaches), an inner loop in the calling function +now contains a (potentially opaque) function call, which may prohibit +optimization. + +I think this is going to have to be caveat user: if you instrument functions +that are considered lightweight enough to be inlined by the optimizing compiler, +then you deal with the consequences. Is this unreasonable or too severe? We can +minimize the prohibited optimization of a function call (to perform the runtime +transformation) at the entry to F by using by not placing a function call there +and using the approach you talked about earlier today (branching down to the +end-of-function base trampoline). + +Current implementation sketch: + +[This approach focuses on doing the minimal amount of work (which is still a +*lot*, I think) at program startup (i.e. "phase 2" using the new phase +designations) and distributed work on-demand ("phase 3"). This should reduce the +startup cost somewhat, but that's really the only reason that I see for doing +it] + +0. Pad the end of each function that contains instrumentation. + +At program startup: + +1. mmap the executable (or whatever) and construct the address-range to function +mapping information. + +2. For each function, find the load-volatile instructions that define interval +and point metrics, and the starting locations of the pad region. At the entry to +the padded region, place a call to the "phase 3 transformation function", and +over-write the *first* instance of a load-volatile instruction (for either a +point or an interval) with a direct branch/annulled delay slot to the start of +the pad region. + +Execution continues. For those functions that are actually called, when +execution reaches the point where instrumentation should be invoked for the +first time, they get redirected to the base trampoline which calls the phase 3 +transformation function. + +The phase 3 transformation function: + +Does all the tracecache-like magic, copying the original code to a region of +memory where the code can grow, rewriting the pad region so that it will execute +the indirect jump to the new code region. + +Details of the last step are left intentionally opaque, if only because I don't +know exactly what they entail yet. :P + }}} + + }}} }}} {{{ IMPLEMENTATION SKETCH + {{{ Current implementation sketch: + +[This approach focuses on doing the minimal amount of work (which is still a +*lot*, I think) at program startup (i.e. "phase 2" using the new phase +designations) and distributed work on-demand ("phase 3"). This should reduce the +startup cost somewhat, but that's really the only reason that I see for doing +it] + +0. Pad the end of each function that contains instrumentation. + +At program startup: + +1. mmap the executable (or whatever) and construct the address-range to function +mapping information. + +2. For each function, find the load-volatile instructions that define interval +and point metrics, and the starting locations of the pad region. At the entry to +the padded region, place a call to the "phase 3 transformation function", and +over-write the *first* instance of a load-volatile instruction (for either a +point or an interval) with a direct branch/annulled delay slot to the start of +the pad region. + +Execution continues. For those functions that are actually called, when +execution reaches the point where instrumentation should be invoked for the +first time, they get redirected to the base trampoline which calls the phase 3 +transformation function. + +The phase 3 transformation function: + +Does all the tracecache-like magic, copying the original code to a region of +memory where the code can grow, rewriting the pad region so that it will execute +the indirect jump to the new code region. + + }}} + {{{ Older implementation sketches: At a high level, in broad sweeping strokes, we're going to use the trace cache tool as a framework for runtime manipulation of the binary code. That is, the framework provided by the tracecache allows the @@ -579,7 +780,7 @@ debugging information (take the address of it in a function, load that address, look for the address, verify?). -See TODO list below. + }}} }}} @@ -591,6 +792,73 @@ {{{ TODO +- Read EEL paper to get a better feel for binary modification issues + +- Use the existing mechanisms at your disposal + (ELF/tracecache/BinInterface/VirtualMem/etc) to do the following. + + In phase 1: + + Complete the remainder of the phase-1 actions: building the GBT, handling + the sigfuns properly (i.e. adding a pair-of-sigfuns mechanism even for + point metrics), compare against by-hand example for phase 1 actions, etc. + + At the end of each instrumented function, immutably pad with a large + enough pad region. {Propose doing this as a for loop containing immutable + loads} + + On program startup ("phase 2" function called from main()): + + [check] mmap or otherwise load the ELF representation of the program and + acquire an ELF descriptor (etc) that will be persistent throughout the + program's execution. + + Collect address ranges for all functions, so that when a particular + load-volatile instruction is encountered, it can be determined what + function it ended up being in. I think that these should be the same + virtual addresses as seen within the context of the executing code, but + this should be verified. + + ^^^ At this point, the application should be running and, at RUNTIME, spit + out (at the very least) the function boundary addresses; preferably, it + can spit out the BinInterface-obtained disassembly as well so that we can + compare it against the static disassembly. + + For each function, locate the load-volatile instructions that define + interval and point metrics (potentially recording some information about + them for later use); also find the padding region at the end of the + function (this may be hard). Write code into the padding region to call + the "phase 3 transformation function", and over-write the *first* + load-volatile in the function that corresponds to an instrumentation point + (or interval start point) with a direct branch down to the padded region. + + Vikram's comment on this last step: + + [Finding "the first" load-volatile in the function is not easy because of + control-flow. Furthermore, I don't think Step 2 needs to find + load-volatiles for actual instrumentations at all since many functions may + never be executed. We should leave that to step 3. + + Therefore, I would simplify as follows: + + For each function, find the load-volatile instructions that define the + entry of the padded region. Over-write the first instruction of the + function with a direct branch to a trampoline in the padded region. This + trampoline executes the first instruction and then calls the Phase 3 + routine to instrument the function.] + + On phase 3 transformation function invocation: + + Performs all of tracecache-like magic, copying the original code to a + region of memory where the code can grow, rewriting the pad region so that + it will execute the indirect jump to the new code region, etc. The + majority of the actions required here are still fairly unclear. To + accomplish this step, we must first determine how to make the branch- and + call-maps that the TraceCache addTrace() routine(s) require, and how to + otherwise use the existing tracecache stuff to accomplish what we want. +}}} + +{{{ COMPLETED TODO ITEMS - Answer the following questions about the tracecache: {{{ @@ -618,41 +886,7 @@ because all of the contextual information about a particular function is obtained via the LLVM mapping information. - - Perform the following experiement to help answer these questions: - - Use the tracecache/BinInterface/VirtualMem/etc mechanisms as they - currently exist, together with te ELF library and phase 1, to do the - following: - - Insert a call to our phase2 function in main; the phase2 function will - be responsible for doing all of the binary analysis and - transformations. - - For using ELF mechanisms that we need to use, determine how the - tracecache is currently (if it is) mmap'ing the executable, and how to - direct the ELF library to use the executable image in memory instead - of loading it from disk. - - Given the name of a function that exists in the ELF object file, - obtain its starting and ending address _in the address space of the - running application_. - - ^^^ At this point, the application should be running and, at RUNTIME, - spit out (at the very least) the function boundary addresses; - preferably, it can spit out the BinInterface-obtained disassembly as - well so that we can compare it against the static disassembly. - - Copy this address region to the cache and reroute execution, - preferably modifying some code in the cache so that the rerouted - execution is apparent during execution. [This step is really the key - investigatory point: do we need to access the LLVM-bytecode CFG to do - this? Does the copy mechanism only support a copy of a specified path - into the cache, or will it operate on an arbitrary CFG/CFG subgraph?] - }}} - -- Read EEL paper to get a better feel for binary modification issues - }}} {{{ BY-HAND EXAMPLE OF PHASE ACTIONS From lattner at cs.uiuc.edu Mon Mar 31 13:56:00 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 13:56:00 2003 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/PRE.cpp Message-ID: <200303311955.NAA18765@apoc.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: PRE.cpp added (r1.1) --- Log message: Initial checkin of PRE on LLVM. This implementation is still lacking in several ways: * Load expressions are not PRE'd well. Alias Analysis should be used to get accurate information when computing anticipatibility. * The expression collection implementation does not handle PHI nodes properly, thus the implementation misses many opportunities to PRE. * This code could be sped up quite a bit Despite these flaws, the code seems to work well, and handles PR's as one would expect. --- Diffs of the changes: From lattner at cs.uiuc.edu Mon Mar 31 13:57:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 13:57:01 2003 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/PRE/ Message-ID: <200303311956.NAA18775@apoc.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/PRE: --- Log message: Directory /home/vadve/vadve/Research/DynOpt/CVSRepository/llvm/test/Regression/Transforms/PRE added to the repository --- Diffs of the changes: From lattner at cs.uiuc.edu Mon Mar 31 13:58:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 13:58:01 2003 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/PRE/Makefile Message-ID: <200303311957.NAA18992@apoc.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/PRE: Makefile added (r1.1) --- Log message: Initial checkin of PRE tests --- Diffs of the changes: From lattner at cs.uiuc.edu Mon Mar 31 15:20:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 15:20:01 2003 Subject: [llvm-commits] CVS: llvm/test/Regression/Transforms/PRE/basictest.ll Message-ID: <200303312119.PAA19382@apoc.cs.uiuc.edu> Changes in directory llvm/test/Regression/Transforms/PRE: basictest.ll added (r1.1) --- Log message: Initial checkin of PRE test --- Diffs of the changes: From lattner at cs.uiuc.edu Mon Mar 31 15:27:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 15:27:01 2003 Subject: [llvm-commits] CVS: llvm/utils/Burg/gram.yc Message-ID: <200303312126.PAA19424@apoc.cs.uiuc.edu> Changes in directory llvm/utils/Burg: gram.yc updated: 1.1 -> 1.2 --- Log message: Update to work with newer versions of bison --- Diffs of the changes: Index: llvm/utils/Burg/gram.yc diff -u llvm/utils/Burg/gram.yc:1.1 llvm/utils/Burg/gram.yc:1.2 --- llvm/utils/Burg/gram.yc:1.1 Sat Sep 21 21:40:40 2002 +++ llvm/utils/Burg/gram.yc Mon Mar 31 15:26:06 2003 @@ -1,5 +1,5 @@ %{ -char rcsid_gram[] = "$Id: gram.yc,v 1.1 2002/09/22 02:40:40 lattner Exp $"; +char rcsid_gram[] = "$Id: gram.yc,v 1.2 2003/03/31 21:26:06 lattner Exp $"; #include #include "b.h" @@ -46,46 +46,46 @@ ; spec : decls K_PPERCENT rules - = { doSpec($1, $3); } + { doSpec($1, $3); } ; -decls : /* lambda */ = { $$ = 0; } - | decls decl = { $$ = newList($2, $1); } +decls : /* lambda */ { $$ = 0; } + | decls decl { $$ = newList($2, $1); } ; -decl : K_TERM bindinglist = { $$ = newArity(-1, $2); } - | K_GRAM grammarlist = { $$ = 0; doGram($2); } - | K_START ID = { $$ = 0; doStart($2); } /* kludge */ +decl : K_TERM bindinglist { $$ = newArity(-1, $2); } + | K_GRAM grammarlist { $$ = 0; doGram($2); } + | K_START ID { $$ = 0; doStart($2); } /* kludge */ ; -grammarlist : /* lambda */ = { $$ = 0; } - | grammarlist ID = { $$ = newList($2, $1); } +grammarlist : /* lambda */ { $$ = 0; } + | grammarlist ID { $$ = newList($2, $1); } ; -bindinglist : /* lambda */ = { $$ = 0; } - | bindinglist binding = { $$ = newList($2, $1); } +bindinglist : /* lambda */ { $$ = 0; } + | bindinglist binding { $$ = newList($2, $1); } ; -binding : ID '=' INT = { $$ = newBinding($1, $3); } +binding : ID '=' INT { $$ = newBinding($1, $3); } ; -rules : /* lambda */ = { $$ = 0; } - | rules rule = { $$ = newList($2, $1); } +rules : /* lambda */ { $$ = 0; } + | rules rule { $$ = newList($2, $1); } ; -rule : ID ':' pattern '=' INT cost ';' = { $$ = newRuleAST($1, $3, $5, $6); } +rule : ID ':' pattern '=' INT cost ';' { $$ = newRuleAST($1, $3, $5, $6); } ; -pattern : ID = { $$ = newPatternAST($1, 0); } - | ID '(' pattern ')' = { $$ = newPatternAST($1, newList($3,0)); } - | ID '(' pattern ',' pattern ')' = { $$ = newPatternAST($1, newList($3, newList($5, 0))); } +pattern : ID { $$ = newPatternAST($1, 0); } + | ID '(' pattern ')' { $$ = newPatternAST($1, newList($3,0)); } + | ID '(' pattern ',' pattern ')' { $$ = newPatternAST($1, newList($3, newList($5, 0))); } ; -cost : /* lambda */ = { $$ = 0; } - | '(' INT costtail ')' = { $$ = newIntList($2, $3); } +cost : /* lambda */ { $$ = 0; } + | '(' INT costtail ')' { $$ = newIntList($2, $3); } ; -costtail : /* lambda */ = { $$ = 0; } - | ',' INT costtail = { $$ = newIntList($2, $3); } - | INT costtail = { $$ = newIntList($1, $2); } +costtail : /* lambda */ { $$ = 0; } + | ',' INT costtail { $$ = newIntList($2, $3); } + | INT costtail { $$ = newIntList($1, $2); } ; From lattner at cs.uiuc.edu Mon Mar 31 16:13:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Mon Mar 31 16:13:01 2003 Subject: [llvm-commits] CVS: llvm/tools/lli/Interpreter/ExternalFunctions.cpp Message-ID: <200303312212.QAA20200@apoc.cs.uiuc.edu> Changes in directory llvm/tools/lli/Interpreter: ExternalFunctions.cpp updated: 1.42 -> 1.43 --- Log message: Implement scanf and fix sscanf to actually endian swap the results correctly --- Diffs of the changes: Index: llvm/tools/lli/Interpreter/ExternalFunctions.cpp diff -u llvm/tools/lli/Interpreter/ExternalFunctions.cpp:1.42 llvm/tools/lli/Interpreter/ExternalFunctions.cpp:1.43 --- llvm/tools/lli/Interpreter/ExternalFunctions.cpp:1.42 Sun Jan 12 18:59:47 2003 +++ llvm/tools/lli/Interpreter/ExternalFunctions.cpp Mon Mar 31 16:12:37 2003 @@ -410,17 +410,111 @@ return GV; } +static void ByteswapSCANFResults(const char *Fmt, void *Arg0, void *Arg1, + void *Arg2, void *Arg3, void *Arg4, void *Arg5, + void *Arg6, void *Arg7, void *Arg8) { + void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 }; + + // Loop over the format string, munging read values as appropriate (performs + // byteswaps as neccesary). + unsigned ArgNo = 0; + while (*Fmt) { + if (*Fmt++ == '%') { + // Read any flag characters that may be present... + bool Suppress = false; + bool Half = false; + bool Long = false; + bool LongLong = false; // long long or long double + + while (1) { + switch (*Fmt++) { + case '*': Suppress = true; break; + case 'a': /*Allocate = true;*/ break; // We don't need to track this + case 'h': Half = true; break; + case 'l': Long = true; break; + case 'q': + case 'L': LongLong = true; break; + default: + if (Fmt[-1] > '9' || Fmt[-1] < '0') // Ignore field width specs + goto Out; + } + } + Out: + + // Read the conversion character + if (!Suppress && Fmt[-1] != '%') { // Nothing to do? + unsigned Size = 0; + const Type *Ty = 0; + + switch (Fmt[-1]) { + case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p': + case 'd': + if (Long || LongLong) { + Size = 8; Ty = Type::ULongTy; + } else if (Half) { + Size = 4; Ty = Type::UShortTy; + } else { + Size = 4; Ty = Type::UIntTy; + } + break; + + case 'e': case 'g': case 'E': + case 'f': + if (Long || LongLong) { + Size = 8; Ty = Type::DoubleTy; + } else { + Size = 4; Ty = Type::FloatTy; + } + break; + + case 's': case 'c': case '[': // No byteswap needed + Size = 1; + Ty = Type::SByteTy; + break; + + default: break; + } + + if (Size) { + GenericValue GV; + void *Arg = Args[ArgNo++]; + memcpy(&GV, Arg, Size); + TheInterpreter->StoreValueToMemory(GV, (GenericValue*)Arg, Ty); + } + } + } + } +} + // int sscanf(const char *format, ...); GenericValue lle_X_sscanf(FunctionType *M, const vector &args) { assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!"); - const char *Args[10]; + char *Args[10]; for (unsigned i = 0; i < args.size(); ++i) - Args[i] = (const char*)GVTOP(args[i]); + Args[i] = (char*)GVTOP(args[i]); GenericValue GV; GV.IntVal = sscanf(Args[0], Args[1], Args[2], Args[3], Args[4], Args[5], Args[6], Args[7], Args[8], Args[9]); + ByteswapSCANFResults(Args[1], Args[2], Args[3], Args[4], + Args[5], Args[6], Args[7], Args[8], Args[9], 0); + return GV; +} + +// int scanf(const char *format, ...); +GenericValue lle_X_scanf(FunctionType *M, const vector &args) { + assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!"); + + char *Args[10]; + for (unsigned i = 0; i < args.size(); ++i) + Args[i] = (char*)GVTOP(args[i]); + + GenericValue GV; + GV.IntVal = scanf(Args[0], Args[1], Args[2], Args[3], Args[4], + Args[5], Args[6], Args[7], Args[8], Args[9]); + ByteswapSCANFResults(Args[0], Args[1], Args[2], Args[3], Args[4], + Args[5], Args[6], Args[7], Args[8], Args[9]); return GV; } @@ -644,6 +738,7 @@ FuncNames["lle_X_printf"] = lle_X_printf; FuncNames["lle_X_sprintf"] = lle_X_sprintf; FuncNames["lle_X_sscanf"] = lle_X_sscanf; + FuncNames["lle_X_scanf"] = lle_X_scanf; FuncNames["lle_i_clock"] = lle_i_clock; FuncNames["lle_X_fopen"] = lle_X_fopen; FuncNames["lle_X_fclose"] = lle_X_fclose; From jstanley at cs.uiuc.edu Tue Apr 1 16:04:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Tue Apr 1 16:04:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/BinInterface/sparc9.cpp sparc9.h sparcdis.cpp Message-ID: <200304012213.QAA19360@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/BinInterface: sparc9.cpp updated: 1.2 -> 1.3 sparc9.h updated: 1.8 -> 1.9 sparcdis.cpp updated: 1.6 -> 1.7 --- Log message: Minor bug fixes. --- Diffs of the changes: Index: llvm/lib/Reoptimizer/BinInterface/sparc9.cpp diff -u llvm/lib/Reoptimizer/BinInterface/sparc9.cpp:1.2 llvm/lib/Reoptimizer/BinInterface/sparc9.cpp:1.3 --- llvm/lib/Reoptimizer/BinInterface/sparc9.cpp:1.2 Sun Dec 15 05:07:33 2002 +++ llvm/lib/Reoptimizer/BinInterface/sparc9.cpp Tue Apr 1 16:13:35 2003 @@ -1,3 +1,4 @@ + //***************************************************************************** // // Portable SPARC v9 Machine Code Definition Arrays Index: llvm/lib/Reoptimizer/BinInterface/sparc9.h diff -u llvm/lib/Reoptimizer/BinInterface/sparc9.h:1.8 llvm/lib/Reoptimizer/BinInterface/sparc9.h:1.9 --- llvm/lib/Reoptimizer/BinInterface/sparc9.h:1.8 Sun Dec 15 05:07:33 2002 +++ llvm/lib/Reoptimizer/BinInterface/sparc9.h Tue Apr 1 16:13:35 2003 @@ -307,7 +307,7 @@ #define OP3_STB 0x00000005 /* 0b000101 */ #define OP3_STH 0x00000006 /* 0b000110 */ #define OP3_STW 0x00000004 /* 0b000100 */ -#define OP3_STX 0x00000007 /* 0b000111 */ +#define OP3_STX 0x0000000E /* 0b001110 */ // Was 0b111 which is wrong (see pg 229 v9 spec) #define OP3_LDSB 0x00000009 /* 0b001001 */ #define OP3_LDSH 0x0000000A /* 0b001010 */ #define OP3_LDSW 0x00000008 /* 0b001000 */ Index: llvm/lib/Reoptimizer/BinInterface/sparcdis.cpp diff -u llvm/lib/Reoptimizer/BinInterface/sparcdis.cpp:1.6 llvm/lib/Reoptimizer/BinInterface/sparcdis.cpp:1.7 --- llvm/lib/Reoptimizer/BinInterface/sparcdis.cpp:1.6 Sun Dec 15 05:07:42 2002 +++ llvm/lib/Reoptimizer/BinInterface/sparcdis.cpp Tue Apr 1 16:13:35 2003 @@ -52,7 +52,7 @@ void sparc_printbr(unsigned instr, bool labels, int labelrs1, int labelrs2, int labelrd, int labelccf) { - // look at the OP2 field + // look at the OP2 field if (RD_FLD(instr,INSTR_OP2)==OP2_ILLTRAP) printf("ILLTRAP"); else if (RD_FLD(instr, INSTR_OP2)==OP2_SETHI) @@ -84,6 +84,11 @@ RD_FLD(instr, INSTR_A) ? ",a" : "", RD_FLD(instr, INSTR_DISP22)); } + else if (RD_FLD(instr, INSTR_OP2)==OP2_BPICC) + { + // TODO + printf("[BPICC -- currently not handled]"); + } else{ printf("Unknown:OP=0b00 OP2 = 0x%04X", RD_FLD(instr, INSTR_OP2)); assert(0); @@ -118,6 +123,7 @@ { //OP=OP_2: RD, OP_3 RS1: {I=0 -> X & RS2 ,I=1 -> // {X=0 -> SHCNT32 X=1->SHCNT64 }} + if (RD_FLD(instr, INSTR_I)==0) { printf("%s%d ", (RD_FLD(instr, INSTR_X)==0 ? 32 : 64)); @@ -130,14 +136,15 @@ } else { - printf("%s%d ", (RD_FLD(instr, INSTR_X)==0 ? 32 : 64)); - if (!labels) - printf("%s, ", reg_names[RD_FLD(instr, INSTR_RD)]); + printf("%s%s ", basename, (RD_FLD(instr, INSTR_X)==0 ? "" : "x")); sparc_printop_rs1(instr, labelrs1); - printf(", 0x%04X", (RD_FLD(instr, INSTR_X)==0 + + printf(", 0x%04X, ", (RD_FLD(instr, INSTR_X)==0 ? RD_FLD(instr, INSTR_SHCNT32) : RD_FLD(instr, INSTR_SHCNT64))); + if (!labels) + printf("%s", reg_names[RD_FLD(instr, INSTR_RD)]); } } From jstanley at cs.uiuc.edu Thu Apr 3 15:01:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Thu Apr 3 15:01:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/ElfReader.cpp ElfReader.h PerfInst.cpp design.txt Message-ID: <200304032101.PAA27351@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: ElfReader.cpp updated: 1.1 -> 1.2 ElfReader.h updated: 1.1 -> 1.2 PerfInst.cpp updated: 1.3 -> 1.4 design.txt updated: 1.4 -> 1.5 --- Log message: --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/ElfReader.cpp diff -u llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.1 llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.2 --- llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.1 Mon Mar 31 11:48:07 2003 +++ llvm/lib/Reoptimizer/Inst/ElfReader.cpp Thu Apr 3 15:00:51 2003 @@ -83,66 +83,12 @@ if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits fname = m_strTab + sym.st_name; addressRange.first = sym.st_value; - addressRange.second = sym.st_value + sym.st_size; + addressRange.second = sym.st_value + sym.st_size - 4; return true; } } return false; -} - -void ElfReader::DumpFunctions(std::ostream& ostr) -{ - if(!m_symTab) - LocateSymbolTable(); - - // tmp - ostr << "Dumping functions from linker symbol table" << endl; - Elf_Data* data = elf_getdata(elf_getscn(m_elfDes, m_elfHdr->e_shstrndx), 0); - char* secName = (char*) data->d_buf + m_symTab->sh_name; - ostr << "Section name of symtable is: " << secName << endl; - // tmp - - // Obtain ptr to string table associated with the symbol table. - Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_symTab->sh_link), 0); - assert(strTabHand && "Couldn't obtain ELF data handle to string table"); - char* strTab = (char*) strTabHand->d_buf; - - // Determine the size of each entry and the number of entries in the symbol table - int entrySize = m_symTab->sh_entsize; - int numEntries = m_symTab->sh_size / entrySize; - assert(m_symTab->sh_size % entrySize == 0 && "Symtable size must be multiple of entry size"); - ostr << "Symbol table contains " << numEntries << " entries" << endl; - - // Seek to the start of the symbol table in the file - if(lseek(m_execFD, m_symTab->sh_offset, SEEK_SET) < 0) - assert(0 && "Couldn't seek to start of symbol table"); - - // Scan for entries of type STT_FUNC (which denote symtable table entries that - // correspond to function entry points), and dump information about each one of them. - - Elf64_Sym sym; - int numFunc = 0; - for(int currEnt = 0; currEnt < numEntries; ++currEnt) { - int rdcnt = 0; - do { - rdcnt = read(m_execFD, &sym + rdcnt, entrySize); - } while(rdcnt < entrySize); - - if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits - numFunc++; - - //ostr << (strTab + sym.st_name) << "["; - //fprintf(stderr, "Address is %lx", sym.st_value); - //fflush(stderr); - //ostr << "], size " << sym.st_size << endl; - - ostr << "Function name is: " << (strTab + sym.st_name) << endl; - - } - } - - //ostr << "Done, encountered " << numFunc << " functions" << endl; } void ElfReader::LocateSymbolTable() Index: llvm/lib/Reoptimizer/Inst/ElfReader.h diff -u llvm/lib/Reoptimizer/Inst/ElfReader.h:1.1 llvm/lib/Reoptimizer/Inst/ElfReader.h:1.2 --- llvm/lib/Reoptimizer/Inst/ElfReader.h:1.1 Mon Mar 31 11:48:34 2003 +++ llvm/lib/Reoptimizer/Inst/ElfReader.h Thu Apr 3 15:00:51 2003 @@ -19,7 +19,6 @@ typedef std::pair AddressRange; bool GetNextFunction(std::string& string, AddressRange& range); - void DumpFunctions(std::ostream& ostr); private: ElfReader() {} Index: llvm/lib/Reoptimizer/Inst/PerfInst.cpp diff -u llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.3 llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.4 --- llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.3 Mon Mar 31 11:48:34 2003 +++ llvm/lib/Reoptimizer/Inst/PerfInst.cpp Thu Apr 3 15:00:51 2003 @@ -16,6 +16,7 @@ #include #include "ElfReader.h" +#include "../BinInterface/sparcdis.h" using std::vector; using std::cerr; @@ -33,7 +34,32 @@ std::string funcName; ElfReader::AddressRange range; while(elfReader.GetNextFunction(funcName, range)) { - cerr << "Function name is: " << funcName << endl; + if(funcName == "main") { + cerr << "Function name is: " << funcName << endl; + cerr << "\tAddress range is ["; + fprintf(stderr, "%lx, %lx]", range.first, range.second); + cerr << endl; + + cerr << "Dumping BinInterface-generated disasm:" << endl; + + for(unsigned* inst = (unsigned*)((void*) range.first), + *end = (unsigned*)((void*) range.second); inst <= end; ++inst){ + printf("%lx:\t%8x\t", (uint64_t) inst, *inst); + sparc_print(*inst); + printf("\n"); + fflush(stdout); + } + +#if 0 + cerr << "First instruction in function: " << endl; + void* ptr = (void*) range.first; + unsigned inst = *((uint32_t*)((void*) range.first)); + fprintf(stderr, "%x\n", inst); + cerr << "Disassembly is: "; + sparc_print(inst); + fflush(stdout); +#endif + } } cerr << "============================== End Phase 2 ==============================\n"; Index: llvm/lib/Reoptimizer/Inst/design.txt diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.4 llvm/lib/Reoptimizer/Inst/design.txt:1.5 --- llvm/lib/Reoptimizer/Inst/design.txt:1.4 Mon Mar 31 11:48:34 2003 +++ llvm/lib/Reoptimizer/Inst/design.txt Thu Apr 3 15:00:51 2003 @@ -653,6 +653,79 @@ }}} +{{{ MEETING MINUTES 03 Apr 3003 + +New definition of different phases: + +Phase 1: + +Same as before but inserts ONE call to phase 2 in main. + +Phase 2: + +Using the ELF symbol table, iterate over *every* function (can we restrict +ourselves to only the code in the text segment? I sure hope so) and attempts to +locate its pad. If the pad is not found, the function has not been instrumented +and we don't care about it. + +For functions where only one pad is encountered (common case), the format is: + + entry instruction + ... + paddedRegion start + ... + paddedRegion end + return code + +The new code looks like: + +branch to padded region start + return code size +... +return code (copied from end of function; this is at the padded region start location) +entry instruction (target of inserted branch) +[padded region contents] +return code + +Must decide how to handle functions where two (or more) pads are encountered. I +have a sinking feeling that we will have to use both pads because we cannot +dynamically grow the pad regions and there is cause for their total combined +capacity to be there. We may have to devise some special action on the part of +phase 2 to "distribute" the branches to phase 3 across the multiple pads. + +Phase 3: + +Isn't really a "phase"; rather, it will executes the code written by phase 2. +However, this may result in transformation of the code because if we have a +load-volatile candidate that is a false positive, the original load instruction +will be restored in place of the branch into the base tramp. + +---- + +Discussed a "signature" for load-volatiles. In C, the instrumentation locations +will be denoted with: + +volatile char gvVar1; +volatile short gvVar2; +gvVar1 = gvVar2; + +The reason for the size difference is so that we get a load half-word, store +byte instruction pair which can be searched for by phase2: + +ldh [%o0], %rn +... +stb rn, [%o5] + +or whatever. These will be automatically selected as candidates for +branch-replacement, which means that phase 2 will: + +a) overwrite the load with the branch down into the pad +b) nop over the store +c) save both instructions for restoring later if false positive (how? in pad?) + +First order of business is to actually FIND the pad extents. + +}}} + {{{ IMPLEMENTATION SKETCH {{{ Current implementation sketch: @@ -794,36 +867,11 @@ - Read EEL paper to get a better feel for binary modification issues +{{{ OLD PHASE DESCRIPTION + - Use the existing mechanisms at your disposal (ELF/tracecache/BinInterface/VirtualMem/etc) to do the following. - In phase 1: - - Complete the remainder of the phase-1 actions: building the GBT, handling - the sigfuns properly (i.e. adding a pair-of-sigfuns mechanism even for - point metrics), compare against by-hand example for phase 1 actions, etc. - - At the end of each instrumented function, immutably pad with a large - enough pad region. {Propose doing this as a for loop containing immutable - loads} - - On program startup ("phase 2" function called from main()): - - [check] mmap or otherwise load the ELF representation of the program and - acquire an ELF descriptor (etc) that will be persistent throughout the - program's execution. - - Collect address ranges for all functions, so that when a particular - load-volatile instruction is encountered, it can be determined what - function it ended up being in. I think that these should be the same - virtual addresses as seen within the context of the executing code, but - this should be verified. - - ^^^ At this point, the application should be running and, at RUNTIME, spit - out (at the very least) the function boundary addresses; preferably, it - can spit out the BinInterface-obtained disassembly as well so that we can - compare it against the static disassembly. - For each function, locate the load-volatile instructions that define interval and point metrics (potentially recording some information about them for later use); also find the padding region at the end of the @@ -847,6 +895,19 @@ trampoline executes the first instruction and then calls the Phase 3 routine to instrument the function.] + Scratch that. I think this needs to be rephrased again to (assuming we + have only one pad region in the function body: + + For each function, find the load-volatile instructions that define the + padded region so we know where it is. Then, replace the first instruction + in the function w/ a branch down to the padded region. The padded region + contains and indirect branch to a dynamically-allocated body of code into + which the entire function body is copied. Phase 2 then manipulates the + code in the copied region, replacing candidate load-volatiles w/ if/else + blocks that call the appropriate instrumentation function if the + load-volatile is actually an instrumentation function or executing the + original code otherwise. + On phase 3 transformation function invocation: Performs all of tracecache-like magic, copying the original code to a @@ -856,6 +917,47 @@ accomplish this step, we must first determine how to make the branch- and call-maps that the TraceCache addTrace() routine(s) require, and how to otherwise use the existing tracecache stuff to accomplish what we want. + +}}} + +{{{ NEW PHASE DESCRIPTION + +Notes on using the total-copy approach in the prototype implementation. + +Note that we will need to use the total-copy approach as a "fall-back" from the +dummy function (or padded region approach) in the following cases: + +a) The dummy function is outside the PC-relative distance + +b) The number of candidate instructions exceeds the fixed-size of dummy function +(or of the padding region). This may happen fairly easily, it seems to me, +esp. since we can't really estimate the frequency of our "load half-word, store +byte pattern". + +Since the padding region has problems (sorta complex "determining the load +instructions" heuristic, but worse, what happens with inlining?), we'd decided +to go with the dummy function approach. + +However, the copy solution will still have to be used in a robust implementation +as a fall-back, so I intend to implement that first. Using the "copy-always" +approach increases the intial overhead (which we don't *really* care about, but +is important) and increases the memory footprint (but not the working-set +size). These seem like inefficiencies that aren't so egregious that they ought +not exist in the prototype version. + +Before I do the heap-managed copy-always approach, however, I should look into +the trace cache capabilities and see if there is anything there that will +significantly help me out. It's sufficient to do a dummy-function-only prototype +as well (for similar reasons), but it's not as flexible. + +The tradeoff is a less general, more efficient implementation vs. a more +general, less efficient implementation. The most general, most efficient +implementation may not be obtainable in the short term, but it's reasonable to +try for. + + +}}} + }}} {{{ COMPLETED TODO ITEMS From ashukla at cs.uiuc.edu Fri Apr 4 14:29:02 2003 From: ashukla at cs.uiuc.edu (Anand Shukla) Date: Fri Apr 4 14:29:02 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/TraceCache/TraceCache.cpp Message-ID: <200304042028.OAA24621@morpheus.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/TraceCache: TraceCache.cpp updated: 1.9 -> 1.10 --- Log message: Added a simple traceadd routine --- Diffs of the changes: Index: llvm/lib/Reoptimizer/TraceCache/TraceCache.cpp diff -u llvm/lib/Reoptimizer/TraceCache/TraceCache.cpp:1.9 llvm/lib/Reoptimizer/TraceCache/TraceCache.cpp:1.10 --- llvm/lib/Reoptimizer/TraceCache/TraceCache.cpp:1.9 Fri Feb 14 14:45:35 2003 +++ llvm/lib/Reoptimizer/TraceCache/TraceCache.cpp Fri Apr 4 14:28:01 2003 @@ -368,3 +368,71 @@ } +#undef GET_TRACE_TIME + +//Do the following: +//1. Insert a jump at location instrAddr with target as new trace +bool TraceCache::addTrace(uint64_t instAddr, int sz, + int traceUniqId, uint64_t &addr){ + + //remove trace if its already there + if(hasTraceAddr(instAddr)) + removeTrace(instAddr); + + while(isLimitSet && currSize+sz>limit){ + if(currSize == 0) + return false; + + //erase first addr from queue + assert(allocationOrder.size()>0 && "No entries in trace!"); + uint64_t addr = allocationOrder.front(); + allocationOrder.pop_front(); + removeTrace(addr); + } + + uint64_t traceStartAddr = mm->getMemory(sz); + + if(traceStartAddr == 0) return false; //could not allocate space! + + traces[instAddr] = traceStartAddr; + traceId[traceUniqId] = traceStartAddr; + reverseTraceId[instAddr] = traceUniqId; + traceSize[instAddr] = sz; + reverseMap[traceStartAddr] = instAddr; + currSize += sz; + + //add instAddr to queue + allocationOrder.push_back(instAddr); + + //copy the first few instructions of the trace in the original code + //and write a jump instruction in its place + traceInstructions[instAddr].push_back(vm->readInstrFrmVm(instAddr)); + traceInstructions[instAddr].push_back(vm->readInstrFrmVm(instAddr+4)); + +#ifdef GET_TRACE_TIME + //two more: to accomodate call instruction and get time! + traceInstructions[instAddr].push_back(vm->readInstrFrmVm(instAddr+8)); + traceInstructions[instAddr].push_back(vm->readInstrFrmVm(instAddr+12)); + + //write call instruction! + vm->writeCallLLVMTime(instAddr); + + //Now write branch instruction with target as + //traceStartAddr at the address instAddr + vm->writeBranchInstruction(instAddr+8, traceStartAddr); +#endif + +#ifndef GET_TRACE_TIME + vm->writeBranchInstruction(instAddr, traceStartAddr); +#endif + + doFlush(instAddr, instAddr+4); + +#ifdef GET_TRACE_TIME + doFlush(instAddr+8, instAddr+12); +#endif + + addr = traceStartAddr; + + return true; +} From ashukla at cs.uiuc.edu Fri Apr 4 14:29:04 2003 From: ashukla at cs.uiuc.edu (Anand Shukla) Date: Fri Apr 4 14:29:04 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Reoptimizer/TraceCache.h Message-ID: <200304042028.OAA24636@morpheus.cs.uiuc.edu> Changes in directory llvm/include/llvm/Reoptimizer: TraceCache.h updated: 1.6 -> 1.7 --- Log message: Added a simple traceadd routine --- Diffs of the changes: Index: llvm/include/llvm/Reoptimizer/TraceCache.h diff -u llvm/include/llvm/Reoptimizer/TraceCache.h:1.6 llvm/include/llvm/Reoptimizer/TraceCache.h:1.7 --- llvm/include/llvm/Reoptimizer/TraceCache.h:1.6 Fri Feb 14 14:43:26 2003 +++ llvm/include/llvm/Reoptimizer/TraceCache.h Fri Apr 4 14:28:23 2003 @@ -99,7 +99,10 @@ std::map &callMap, std::map &branchMap, std::vector *exitStubs); - + + bool addTrace(uint64_t instAddr, int sz, int traceUniqId, + uint64_t &addr); + //bool addTrace(uint64_t instAddr, unsigned int trace[], int sz, // std::vector &inBranches, // std::vector > &outBranches, int traceUniqId); From jstanley at cs.uiuc.edu Fri Apr 4 16:58:00 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Fri Apr 4 16:58:00 2003 Subject: [llvm-commits] CVS: llvm/test/Regression/Reoptimizer/inst/Makefile Message-ID: <200304042257.QAA23533@tank.cs.uiuc.edu> Changes in directory llvm/test/Regression/Reoptimizer/inst: Makefile added (r1.1) --- Log message: --- Diffs of the changes: From jstanley at cs.uiuc.edu Fri Apr 4 17:09:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Fri Apr 4 17:09:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/Phase2.cpp design.txt PerfInst.cpp Message-ID: <200304042309.RAA31886@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: Phase2.cpp added (r1.1) design.txt updated: 1.5 -> 1.6 PerfInst.cpp (r1.4) removed --- Log message: Moved PerfInst.cpp contents -> Phase2.cpp, removing PerfInst.cpp. --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/design.txt diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.5 llvm/lib/Reoptimizer/Inst/design.txt:1.6 --- llvm/lib/Reoptimizer/Inst/design.txt:1.5 Thu Apr 3 15:00:51 2003 +++ llvm/lib/Reoptimizer/Inst/design.txt Fri Apr 4 17:08:56 2003 @@ -865,64 +865,117 @@ {{{ TODO -- Read EEL paper to get a better feel for binary modification issues + - Investigate trace-cache dummy function mechanisms, decide on approach A or B + in phase outline -{{{ OLD PHASE DESCRIPTION + - Implement phase outline -- Use the existing mechanisms at your disposal - (ELF/tracecache/BinInterface/VirtualMem/etc) to do the following. - - For each function, locate the load-volatile instructions that define - interval and point metrics (potentially recording some information about - them for later use); also find the padding region at the end of the - function (this may be hard). Write code into the padding region to call - the "phase 3 transformation function", and over-write the *first* - load-volatile in the function that corresponds to an instrumentation point - (or interval start point) with a direct branch down to the padded region. - - Vikram's comment on this last step: - - [Finding "the first" load-volatile in the function is not easy because of - control-flow. Furthermore, I don't think Step 2 needs to find - load-volatiles for actual instrumentations at all since many functions may - never be executed. We should leave that to step 3. - - Therefore, I would simplify as follows: - - For each function, find the load-volatile instructions that define the - entry of the padded region. Over-write the first instruction of the - function with a direct branch to a trampoline in the padded region. This - trampoline executes the first instruction and then calls the Phase 3 - routine to instrument the function.] - - Scratch that. I think this needs to be rephrased again to (assuming we - have only one pad region in the function body: - - For each function, find the load-volatile instructions that define the - padded region so we know where it is. Then, replace the first instruction - in the function w/ a branch down to the padded region. The padded region - contains and indirect branch to a dynamically-allocated body of code into - which the entire function body is copied. Phase 2 then manipulates the - code in the copied region, replacing candidate load-volatiles w/ if/else - blocks that call the appropriate instrumentation function if the - load-volatile is actually an instrumentation function or executing the - original code otherwise. - - On phase 3 transformation function invocation: - - Performs all of tracecache-like magic, copying the original code to a - region of memory where the code can grow, rewriting the pad region so that - it will execute the indirect jump to the new code region, etc. The - majority of the actions required here are still fairly unclear. To - accomplish this step, we must first determine how to make the branch- and - call-maps that the TraceCache addTrace() routine(s) require, and how to - otherwise use the existing tracecache stuff to accomplish what we want. + - Read EEL paper to get a better feel for binary modification issues }}} -{{{ NEW PHASE DESCRIPTION +{{{ PHASE OUTLINE + + Below, Approach A refers to using *only* dummy functions, and Approach B + refers to using *only* dynamically-allocated, heap-managed memory. Approach C + (to come later) is the approach that combines the two, and is slightly more + complex. + + In phase 1: + + Phase 1 actions as described in earlier work (building the GBT, handling + sigfuns properly (i.e. adding a pair-of-sigfuns mechanism for point + metrics), compare against by-hand example for phase 1 actions, etc. Also + might need to record information about which volatiles are associated with + each for start/end points of intervals and point-scopes. + + Insert a call to phase2 in main. + + Handling storage for new code & instrumentation calls: + + Approach A: Construct a dummy function and record its address in the GBT + for use by the other phases. + + Approach B: Other phases use heap-managed dynamic memory; no dummy + function needed. + + In phase 2: + + On program startup ("phase 2" function called from main()): + + 1. Build a starting-addres-to-function-extent map for use by later phases. + + 2. For each function F (only those in the text segment preferably), setup phase 3 branches. + + Approach A: + + 2a. Replace the first instruction in F with a branch to a new slot in + the dummy function. -Notes on using the total-copy approach in the prototype implementation. + 2b. At the new slot write first the (replaced) first instruction in F, + followed by code to call the phase 3 function with the address of F as + an argument. + + Approach B: + + 2a. Save the first number instructions in an F -> [instructions] record + of some kind. Phase 3 will restore them later + + 2b. Over the top of the original instructions (now saved), write a call + to phase 3, passing the address of F as an argument. + + In phase 3: + + 1. Obtain the code region specified for F by the starting address to function + extent table built in phase 2. + + 2. + Approach A: Do nothing. + + Approach B: Copy the body of F into the heap-managed "instruction buffer" + (call the start location of the copy F') and over-write the first + instructions of F with an indirect jump to F'. Rewrite all branches within + the boundaries of F' as needed. Overwrite the first instructions of F' with + the instructions saved in the F -> [instructions] record constructed by + phase 2. + + 3. "Slots" refer to the properly-sized segments of memory containing whatever + code needs to be written. As a KIS concession, slots are not partitionable or + reusable. + + Approach A: For each candidate load instruction I within F, at location C: + Approach B: For each candidate load instruction I within F', at location C: + + 3a. Grab a new slot. + 3b. Save I's load/save instructions (L and S, respectively) in slot. + 3c. Replace the L with a branch to slot. + 3d. Replace S with a nop. + + Approach A: + + 3e. Write phase 4 code in slot: + if(actually an instrumentation site) + rewrite branch at C to next instruction + call proper instrumentation fnction <- C branches to here + branch back to C + else + restore original instructions + branch back to C + + Approach B: + + 3e. Write phase 4 code in slot: + if(actually an instrumentation site) + Grow code at C to call proper instrumentation function, + replacing branch placed there by 3c. + branch back to C + else + restore original instructions + branch back to C + + In phase 4: No special action needed. + +{{{ Notes on using the total-copy approach in the prototype implementation. Note that we will need to use the total-copy approach as a "fall-back" from the dummy function (or padded region approach) in the following cases: @@ -954,7 +1007,6 @@ general, less efficient implementation. The most general, most efficient implementation may not be obtainable in the short term, but it's reasonable to try for. - }}} From gshi1 at cs.uiuc.edu Sun Apr 6 18:57:01 2003 From: gshi1 at cs.uiuc.edu (Guochun Shi) Date: Sun Apr 6 18:57:01 2003 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp ModuloSchedGraph.h ModuloScheduling.cpp ModuloScheduling.h Message-ID: <200304062356.SAA19977@trinity.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/ModuloScheduling: ModuloSchedGraph.cpp updated: 1.1 -> 1.2 ModuloSchedGraph.h updated: 1.1 -> 1.2 ModuloScheduling.cpp updated: 1.1 -> 1.2 ModuloScheduling.h updated: 1.1 -> 1.2 --- Log message: change the include file names and some class names to make it compile --- Diffs of the changes: Index: llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp diff -u llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp:1.1 llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp:1.2 --- llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp:1.1 Thu Mar 27 11:57:43 2003 +++ llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp Sun Apr 6 18:56:10 2003 @@ -7,7 +7,7 @@ #include "Support/StringExtras.h" #include "Support/STLExtras.h" #include -#include +//#include #include "llvm/iOperators.h" #include "llvm/iOther.h" #include "llvm/iPHINode.h" @@ -16,7 +16,7 @@ #include "llvm/Type.h" #include "llvm/CodeGen/MachineCodeForInstruction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Target/MachineSchedInfo.h" +#include "llvm/Target/TargetSchedInfo.h" #define UNIDELAY 1 #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -104,7 +104,8 @@ void ModuloSchedGraph::addDefUseEdges(const BasicBlock* bb) { //collect def instructions, store them in vector - const MachineInstrInfo& mii = target.getInstrInfo(); + // const TargetInstrInfo& mii = target.getInstrInfo(); + const TargetInstrInfo& mii = target.getInstrInfo(); typedef std::vector DefVec; DefVec defVec; @@ -274,7 +275,7 @@ RegToRefVecMap& regToRefVecMap, ValueToDefVecMap& valueToDefVecMap) { - //const MachineInstrInfo& mii=target.getInstrInfo(); + //const TargetInstrInfo& mii=target.getInstrInfo(); //Build graph nodes for each LLVM instruction and gather def/use info. //Do both together in a single pass over all machine instructions. @@ -889,7 +890,7 @@ this->dump(bb); if(!isLoop(bb)){ - modSched_os <<" dumping non-loop BB:"< > &ru) { - MachineSchedInfo& msi = (MachineSchedInfo&)target.getSchedInfo(); + TargetSchedInfo& msi = (TargetSchedInfo&)target.getSchedInfo(); std::vector > resourceNumVector = msi.resourceNumVector; modSched_os <<"resourceID\t"<<"resourceNum"<<"\n"; @@ -1128,8 +1129,8 @@ int ModuloSchedGraph::computeResII(const BasicBlock* bb) { - const MachineInstrInfo& mii = target.getInstrInfo(); - const MachineSchedInfo& msi = target.getSchedInfo(); + const TargetInstrInfo& mii = target.getInstrInfo(); + const TargetSchedInfo& msi = target.getSchedInfo(); int ResII; std::vector > resourceUsage; //pair Index: llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h diff -u llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h:1.1 llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h:1.2 --- llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h:1.1 Thu Mar 27 11:57:43 2003 +++ llvm/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h Sun Apr 6 18:56:16 2003 @@ -13,6 +13,7 @@ #include "../InstrSched/SchedGraphCommon.h" #include "llvm/Instruction.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include using std::pair; Index: llvm/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp diff -u llvm/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp:1.1 llvm/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp:1.2 --- llvm/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp:1.1 Thu Mar 27 11:57:43 2003 +++ llvm/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp Sun Apr 6 18:56:16 2003 @@ -8,9 +8,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineCodeForInstruction.h" -#include "llvm/CodeGen/MachineCodeForBasicBlock.h" -#include "llvm/CodeGen/MachineCodeForMethod.h" -#include "llvm/Analysis/LiveVar/FunctionLiveVarInfo.h" // FIXME: Remove when modularized better +//#include "llvm/CodeGen/MachineCodeForBasicBlock.h" +//#include "llvm/CodeGen/MachineCodeForMethod.h" +#include "llvm/CodeGen/MachineFunction.h" +//#include "llvm/Analysis/LiveVar/FunctionLiveVarInfo.h" // FIXME: Remove when modularized better #include "llvm/Target/TargetMachine.h" #include "llvm/BasicBlock.h" #include "llvm/Instruction.h" @@ -18,13 +19,13 @@ #include #include "ModuloSchedGraph.h" #include "ModuloScheduling.h" -#include "llvm/Target/MachineSchedInfo.h" +#include "llvm/Target/TargetSchedInfo.h" #include "llvm/BasicBlock.h" #include "llvm/iTerminators.h" #include "llvm/iPHINode.h" #include "llvm/Constants.h" #include -#include +//#include #include #include "llvm/CodeGen/InstrSelection.h" @@ -67,7 +68,7 @@ modSched_os<<"*************************computing modulo schedule ************************\n"; - const MachineSchedInfo& msi=target.getSchedInfo(); + const TargetSchedInfo& msi=target.getSchedInfo(); //number of issue slots in the in each cycle int numIssueSlots=msi.maxNumIssueTotal; @@ -137,13 +138,13 @@ } //clear memory from the last round and initialize if necessary -void ModuloScheduling::clearInitMem(const MachineSchedInfo& msi){ +void ModuloScheduling::clearInitMem(const TargetSchedInfo& msi){ unsigned numIssueSlots = msi.maxNumIssueTotal; //clear nodeScheduled from the last round if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){ - modSched_os<< "***** new round with II= "<= ModuloSched_PrintScheduleProcess) @@ -620,7 +621,7 @@ const Instruction* instr=node->getInst(); MachineCodeForInstruction& tempMvec= MachineCodeForInstruction::get(instr); bool resourceConflict=false; - const MachineInstrInfo &mii=msi.getInstrInfo(); + const TargetInstrInfo &mii=msi.getInstrInfo(); if(coreSchedule.size() < core_i+1 || !coreSchedule[core_i][core_j]){ //this->dumpResourceUsageTable(); @@ -784,10 +785,10 @@ //----------------------------------------------------------------------- void ModuloScheduling::dumpSchedule(std::vector< std::vector > thisSchedule){ - const MachineSchedInfo& msi=target.getSchedInfo(); + const TargetSchedInfo& msi=target.getSchedInfo(); unsigned numIssueSlots=msi.maxNumIssueTotal; for(unsigned i=0;i< numIssueSlots;i++) - modSched_os <<"\t#"; + modSched_os <<"\t#"; modSched_os<<"\n"; for(unsigned i=0;i < thisSchedule.size();i++) { @@ -799,7 +800,7 @@ modSched_os<<"\t"; modSched_os<<"\n"; } - + } @@ -812,7 +813,7 @@ void ModuloScheduling::dumpScheduling(){ modSched_os<<"dump schedule:"<<"\n"; - const MachineSchedInfo& msi=target.getSchedInfo(); + const TargetSchedInfo& msi=target.getSchedInfo(); unsigned numIssueSlots=msi.maxNumIssueTotal; for(unsigned i=0;i< numIssueSlots;i++) modSched_os <<"\t#"; @@ -880,7 +881,7 @@ if(ModuloSchedDebugLevel>= ModuloSched_PrintSchedule){ modSched_fb.open("moduloSchedDebugInfo.output", ios::out); - modSched_os<<"******************Modula Scheduling debug information*************************"< +#include + +using std::vector; class ModuloScheduling:NonCopyable { private: @@ -82,7 +85,7 @@ //*******************internel functions******************************* private: //clear memory from the last round and initialize if necessary - void clearInitMem(const MachineSchedInfo& ); + void clearInitMem(const TargetSchedInfo& ); //compute schedule and coreSchedule with the current II bool computeSchedule(); From gshi1 at cs.uiuc.edu Sun Apr 6 18:59:01 2003 From: gshi1 at cs.uiuc.edu (Guochun Shi) Date: Sun Apr 6 18:59:01 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Instruction.h Message-ID: <200304062358.SAA19991@trinity.cs.uiuc.edu> Changes in directory llvm/include/llvm: Instruction.h updated: 1.37 -> 1.38 --- Log message: added some memory for clone function --- Diffs of the changes: Index: llvm/include/llvm/Instruction.h diff -u llvm/include/llvm/Instruction.h:1.37 llvm/include/llvm/Instruction.h:1.38 --- llvm/include/llvm/Instruction.h:1.37 Mon Feb 24 14:48:28 2003 +++ llvm/include/llvm/Instruction.h Sun Apr 6 18:58:44 2003 @@ -23,6 +23,8 @@ friend class SymbolTableListTraits >; void setParent(BasicBlock *P); +private: + Instruction* cln; //the newest cloned instruction protected: unsigned iType; // InstructionType: The opcode of the instruction @@ -42,7 +44,9 @@ /// * The instruction has no name /// virtual Instruction *clone() const = 0; - + Instruction * getClone(){return cln;} + void setClone(Instruction* _cln){cln=_cln;} + void clearClone(){cln=NULL;} // Accessor methods... // inline const BasicBlock *getParent() const { return Parent; } From gshi1 at cs.uiuc.edu Sun Apr 6 19:01:00 2003 From: gshi1 at cs.uiuc.edu (Guochun Shi) Date: Sun Apr 6 19:01:00 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Target/TargetSchedInfo.h Message-ID: <200304070000.TAA20017@trinity.cs.uiuc.edu> Changes in directory llvm/include/llvm/Target: TargetSchedInfo.h updated: 1.14 -> 1.15 --- Log message: added a function and a member to the TargetSchedInfo class which is used by Modulo Scheduling pass --- Diffs of the changes: Index: llvm/include/llvm/Target/TargetSchedInfo.h diff -u llvm/include/llvm/Target/TargetSchedInfo.h:1.14 llvm/include/llvm/Target/TargetSchedInfo.h:1.15 --- llvm/include/llvm/Target/TargetSchedInfo.h:1.14 Tue Jan 14 15:59:58 2003 +++ llvm/include/llvm/Target/TargetSchedInfo.h Sun Apr 6 19:00:36 2003 @@ -267,6 +267,15 @@ return getInstrRUsage(opCode).numBubbles; } + inline unsigned getCPUResourceNum(int rd)const{ + for(unsigned i=0;i > resourceNumVector; + protected: unsigned numSchedClasses; const TargetInstrInfo* mii; @@ -294,6 +306,12 @@ std::vector > issueGaps; // indexed by [opcode1][opcode2] std::vector > conflictLists; // indexed by [opcode] + + + + friend class ModuloSchedGraph; + friend class ModuloScheduling; + }; #endif From brukman at cs.uiuc.edu Sun Apr 6 19:26:01 2003 From: brukman at cs.uiuc.edu (Misha Brukman) Date: Sun Apr 6 19:26:01 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Target/TargetSchedInfo.h Message-ID: <200304070025.TAA00781@apoc.cs.uiuc.edu> Changes in directory llvm/include/llvm/Target: TargetSchedInfo.h updated: 1.15 -> 1.16 --- Log message: Must use std::pair instead of just 'pair'. --- Diffs of the changes: Index: llvm/include/llvm/Target/TargetSchedInfo.h diff -u llvm/include/llvm/Target/TargetSchedInfo.h:1.15 llvm/include/llvm/Target/TargetSchedInfo.h:1.16 --- llvm/include/llvm/Target/TargetSchedInfo.h:1.15 Sun Apr 6 19:00:36 2003 +++ llvm/include/llvm/Target/TargetSchedInfo.h Sun Apr 6 19:25:09 2003 @@ -291,7 +291,7 @@ } public: - std::vector > resourceNumVector; + std::vector > resourceNumVector; protected: unsigned numSchedClasses;