From lattner at apoc.cs.uiuc.edu Mon Mar 17 12:12:01 2003 From: lattner at apoc.cs.uiuc.edu (Chris Lattner) Date: Mon Mar 17 12:12:01 2003 Subject: [llvm-commits] CVS: llvm/include/Support/BitSetVector.h Message-ID: <200303171811.h2HIBbT20428@apoc.cs.uiuc.edu> Changes in directory llvm/include/Support: BitSetVector.h updated: 1.3 -> 1.4 --- Log message: Fix problems with BitSetVector that makes it not compile under GCC 3.0 and 2.95 --- Diffs of the changes: Index: llvm/include/Support/BitSetVector.h diff -u llvm/include/Support/BitSetVector.h:1.3 llvm/include/Support/BitSetVector.h:1.4 --- llvm/include/Support/BitSetVector.h:1.3 Wed Nov 27 11:46:38 2002 +++ llvm/include/Support/BitSetVector.h Mon Mar 17 12:11:27 2003 @@ -194,8 +194,8 @@ iterator(const iterator& I) : currentBit(I.currentBit),currentWord(I.currentWord),bitvec(I.bitvec) { } iterator& operator=(const iterator& I) { - currentWord == I.currentWord; - currentBit == I.currentBit; + currentWord = I.currentWord; + currentBit = I.currentBit; bitvec = I.bitvec; return *this; } @@ -203,13 +203,13 @@ // Increment and decrement operators (pre and post) iterator& operator++() { if (++currentBit == WORDSIZE) - { currentBit = 0; if (currentWord < bitvec->maxSize) ++currentWord; } + { currentBit = 0; if (currentWord < bitvec->size()) ++currentWord; } return *this; } iterator& operator--() { if (currentBit == 0) { currentBit = WORDSIZE-1; - currentWord = (currentWord == 0)? bitvec->maxSize : --currentWord; + currentWord = (currentWord == 0)? bitvec->size() : --currentWord; } else --currentBit; @@ -220,7 +220,7 @@ // Dereferencing operators reference operator*() { - assert(currentWord < bitvec->maxSize && + assert(currentWord < bitvec->size() && "Dereferencing iterator past the end of a BitSetVector"); return bitvec->getWord(currentWord)[currentBit]; } @@ -234,7 +234,7 @@ protected: static iterator begin(BitSetVector& _bitvec) { return iterator(_bitvec); } static iterator end(BitSetVector& _bitvec) { return iterator(0, - _bitvec.maxSize, _bitvec); } + _bitvec.size(), _bitvec); } friend class BitSetVector; }; }; From jstanley at cypher.cs.uiuc.edu Mon Mar 17 18:43:01 2003 From: jstanley at cypher.cs.uiuc.edu (Joel Stanley) Date: Mon Mar 17 18:43:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/PerfInst.cpp design.txt Message-ID: <200303180049.h2I0ng703172@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: PerfInst.cpp updated: 1.1 -> 1.2 design.txt updated: 1.1 -> 1.2 --- Log message: --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/PerfInst.cpp diff -u llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.1 llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.2 --- llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.1 Wed Mar 5 15:23:56 2003 +++ llvm/lib/Reoptimizer/Inst/PerfInst.cpp Mon Mar 17 18:49:31 2003 @@ -36,7 +36,7 @@ { static bool initialized = false; static Module* pMod = 0; - static std::vector funcList; + static vector funcList; cerr << "phase2 invoked" << endl; @@ -52,17 +52,28 @@ // Gather pointers to functions into funcList for(Module::iterator i = pMod->begin(), e = pMod->end(); i != e; ++i) { - if(!i->isExternal()) { + if(!i->isExternal()) funcList.push_back(&*i); - cerr << "Added to funcList: " << i->getName() << endl; - } } } assert(pMod && "Module must have been parsed"); assert(funcList[methodNum] && "Have not obtained methodNum'th function in funcList"); - cerr << "Completed phase2." << endl; + cerr << "Dumping list of instructions in each function..." << endl; + + for(vector::iterator i = funcList.begin(), e = funcList.end(); i != e; ++i) { + cerr << "Processing function " << (*i)->getName() << endl; + for(Function::iterator bbi = (*i)->begin(), bbe = (*i)->end(); bbi != bbe; ++bbi) { + for(BasicBlock::iterator ii = bbi->begin(), ie = bbi->end(); ii != ie; ++ii) { + cerr << "Processing instruction: " << *ii << endl; + vector vec = getLLVMInstrInfo(&*ii); + cerr << "Obtained the following vector from getInstrInfo:" << endl; + for(unsigned k = 0; k < vec.size(); ++k) + cerr << vec[k] << endl; + } + } + } } Index: llvm/lib/Reoptimizer/Inst/design.txt diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.1 llvm/lib/Reoptimizer/Inst/design.txt:1.2 --- llvm/lib/Reoptimizer/Inst/design.txt:1.1 Wed Mar 5 15:23:56 2003 +++ llvm/lib/Reoptimizer/Inst/design.txt Mon Mar 17 18:49:31 2003 @@ -1,70 +1,498 @@ -The goal of phase 2 is to: +{{{ OVERALL GOALS OF PHASE 2 - identify all loads of global volatile variables and the corresponding stores to temporaries - - replace the load/store pair (together with address calculations?) - with a call to the appropriate function, as determined by the - static global data structures. + - replace the load/store pair with a call to the appropriate + function, as determined by the static global data structures. + + - ensure that the result(s) of the metric function(s) are stored + properly and that the locations in which the results are stored + correspond properly to the metric use sites. + +{{{ Assumptions: -Assumptions: We can locate all load instructions in the code, and find the corresponding stores. Optimization in the compiler is on. This means that we can't really rely on a particular "signature" of generated assembly code. +}}} +{{{ Problems: + + {{{ Finding load insts (answered) + + How can we narrow down the set of load instructions to those instructions that + are loading the global volatile variables? In particular, all we see in the + load instruction is the register parameter which specifies the register that + contains the address being loaded from. Since we cannot rely on a particular + signature of code that is used to place the address in that particular + register, we can't simply look at a few preceding instructions to determine + the address. We know that the address must be a constant, but the optimizing + compiler may compute the address by simply adding some constant value to some + other register (the compiler is very clever and may employ any kind of + value-numbering techniques, etc to do this address calculationg...the peephole + optimizer kills us here). Thus, we basically need a way to discover, at each + load instruction, whether the address stored in the load register is constant + and, if so, what the constant is. Once we know this, we can lookup the + constant in the table and determine what call to insert in its place, etc. + This has to be flow senstive, and possibly interprocedural? NB: Nothing about + this question is unique to doing the modification at runtime, but is rather a + fundamental question specific to the approach in general. + + [Answer: we use a heurstic "backwards search" mechanism to discover the + constant address used as the pointer operand to the (volatile) load + instructions. See the EEL paper for more information] + + }}} + {{{ Removing address calculations (answered) + + How can we safely erase the address calculation instructions without knowing + that the temporary (or final!) values won't later be used for further relative + computations? This essentially requires use-def information to be present, and + might even need to cross procedure boundaries. Talk to Adve about this. NB: + Nothing about this question is unique to doing the modification at runtime, + but is rather a fundamental question specific to the approach in general. + + [Answer to the previous two questions: We can't determine safety without more + analysis than we want to do (and potentially, not even then!), so we don't + overwrite address arithmetic instructions, etc. + + }}} + +}}} +{{{ Musings on trampolines: + +- Assuming that we can leave the address calculation in place for the +time being, here is a description of the *actual* problem: + +The code segment looks like this: + +Snippet 1: + +I1 +I2 +ld (of volatile) +I3 +I4 +... +store (to temporary) +branch +... + +Now, we must over-write the store instruction with a nop, which +shouldn't pose a problem. The primary problem arises when we want to +over-write the load with a call to an instrumentation function. This +call requires a delay slot to follow, and we can't grow the code +size. We can use a trampoline approach as in DynInst, but this +doesn't really solve the problem because the branch-to-tramp +instruction which would replace the load *also* requires a delay +slot. We need to determine how the DynInst folks solved this +problem. Assuming that we don't have the special case (which will +have to be addressed separately): + +load (of volatile) +branch +store (to temporary) + +[this is a problem because there *can* be no delay slot -- perhaps use +some kind of special un-eliminable instruction to pad between the load +and branch so that this doesn't occur; whatever] + +Then the problem is solved _as long as the (non-branch) instruction +following the load is not the target of some other branch_. That is, +snippet 1 would become: + + I1 ____> tramp code + I2 / .... + br -----/ I3 + __> nop / + | I4 <-----------/ + \ ... + -- branch + ... + +The branch-to-tramp is unconditional, so we only have a problem when +we get to the branch that jumps to the previous location of I3: I3 +will not get executed because the nop is in its place. Vikram has +suggested the use of a "anullable" delay slot wherein the branch +instruction delay slot only executes if the branch *is*/*is not* +taken. We must look in the Sparc ISA to determine whether or not this +is feasible. If that turns out to be a dead end, we'll examine how +DynInst *must* have solved this problem and see if we can use their +solution. + +From the Sparc V9 arch spec: For unconditional branches with an +"always" specified condition (the kind of branch we'd be using for the +tramp), if the annul bit is 1, the instruction in the delay slot +*never* executes. If the annul bit is 0, the instruction in the delay +slot is always executed. + +Hypothesis: Assuming we don't have the branch-imm-after-load inst +special case described above, it is my contention that all we really +need to do is over-write the load instruction with an unconditional +branch with the annul bit set to 1 (to signify "never execute"). We +say never execute because I3 must be executed *after* the call to the +instrumentation function in order to preserve the original semantics. +Thus, our trampoline code calls the instrumentation function and +executes I3 in its epilogue. Execution resumes at I4, and I3 is still +a valid target for a later branch instruction. + +The new problem: This probably works, except that the base trampoline +is probably not within range (can it be?) of a PC-relative jump from +the load instruction we're trying to replace. The MDL paper seems to +indicate that it "often takes multiple instructions" for a jump. Why +would this be? Figure this out. According to the arch spec, all branch +instructions are PC relative. I assume that they are referring to the +need to use a long jump (JMPL), which means that the extra +instructions would be required to pack the register with the target +address. + +}}} +{{{ Trampoline-related ideas: + +(Thanks Brian!) :) + +1. Call wrapper + +Ignoring the ugly special case mentioned above (load followed by +branch), what about using a call instruction to invoke a a piece of +code that simply wraps the call to the instrumentation function + +whatever instruction needed to go into the delay slot? Would have to +work out the details of return values from the calls, etc, but the +gist is as follows: + +I1 +I2 +ld (of volatile) +I3 +I4 +... +store (to temporary) +branch +... + +Would become + +I1 +I2 +call wrapperFunc +nop +I4 +... +store (to temporary) +branch +... + +Where wrapperFunc's body is: + call instrumentationFunc + nop + I3 + ret + +This solution is really using a wrapper call instead of a base +trampoline code segment, and it avoids the problem of having to use a +longjmp. + +The remaining problem: branches (direct or indirect) to I3 in the +original code. This may kill this solution. + +2. Statically-generated base trampoline. + +We know what the instrumentation points are, because we don't have +DynInst's need to instrument points dynamically, at runtime. Thus, +what if phase 1 were to (statically) generate what was effectively the +"base tramp" code, ensuring that it was within PC-relative jump range? +Thus our code snippet: + +I1 +I2 +ld (of volatile) +I3 +I4 +... +store (to temporary) +branch +... + +Would become: + +I1 +I2 +br baseTrampN (annul bit set so that I3 is *never* executed) +I3 +I4 +... +store (to temporary) +branch +... + + +baseTrampN: + ... + call instrumentationFunc + I3 + direct branch back to I4 + Problems: + I3/I4 aren't known at phase 1, are they? In our code example, I3/I4 +refer to the post-optimization instructions at the machine code level, +not LLVM bytecode instructions, so this approach may be flawed from +the start. + +Question: Is there any way to guarantee a large enough region of +reserved space in the in-range-of-PC-relative-jump base tramp code so +that the address of I4, etc, could be obtained...? We could +potentially write that code in as well...perhaps using more volatile +loads. :) That is, we generate + +baseTrampN: + call instrumentationFunc + load volatile + load volatile + +Where the last two load instructions are over-written with I3 and the +branch to I4. The only problem here is making sure that the +(arbitrary) compiler doesn't eliminate the generated code for the +baseTrampN, which is effectively dead when it witnesses it. + +3. Entire duplication of function + modification + +In order to avoid re-writing the code segment entirely, we could copy +the entire function body (for each function which had instrumentation) +to a new area in memory where we could add arbitrary instructions as +desired, updating offsets as needed. The original function would be +nop'd over with the exception of the call/longjmp to the duplicated +code. Any direct branches into the old function would need to be +remapped to the new location (may not be possible) -- are indirect +branches an issue? This is a really wasteful and somewhat gross +approach. + +(NB: Requires knowing the size of function bodies) + +}}} +{{{ Considering an LLVM-centered approach: + +[Arguments and justifications -- most of these thoughts were sent to +Vikram on 12 Mar 2003] + +I perceive the following issues to be orthogonal: + +1. Language-level support for metric description & queries. + +2. Attempting to enable more compiler optimizations due to our +approach (i.e. not prohibiting certain optimizations due to the +presence of opaque function calls, etc). + +3. Vendor compiler independence. + +4. Runtime support for the standard library of metrics. + +In our approach thus far, we've attempted to achieve kill the birds #2 +and #3 with one stone by using variable volatility coupled with a +proposed post-link modification of the binary executable. Since I +believe #2 and #3 to be orthogonal, let us see what happens if we +significantly diminish the importance of #3, and consider how we may +benefit from doing so. In particular, I'd like to consider using only +LLVM as the compiler infrastructure of choice, and completely ignoring +other compilations systems which will optimize the code significantly +more than LLVM. + +It is my belief that the real "meat" of our research lies in exploring +the capabilities and limitations uncovered when performance aspects of +the program are exposed to the programmer through the language and +runtime system (that is, #1 and #4 above), as opposed to a traditional +library-based approach. In particular, what happens when we expose +lower-level runtime execution properties as metrics to the application +itself? By "lower-level runtime execution properties", I mean things +like predicted pipeline behavior, cache behavior, TLB stats, etc., but +particularly those that would require the role of the compiler in the +process, such as the pipeline behavior prediction and analysis. +Again, we're to focus on the primary +differences/advantages/disadvantages that arise from NOT using the +traditional library approach. + +With this in mind, I see the prevention of inhibited compiler +optimizations as a "bonus", or at least a peripheral issue. If we can +make things better than the library approach with respect to inhibited +optimizations, great! However, I don't believe that this is really the +"selling point", especially w.r.t. our current volatile approach, +which was used to obtain both #2 and #3 in the list above, because its +efficacy depends greatly on the compiler implementor's interpretation +of the spec w.r.t. volatility (i.e. the particular implementation +determines whether or not anything is truly better over the +traditional approach). We know that most compilers will do better +with a load of a volatile in place of an opaque call, and may wish to +leverage that, but I don't think that this is central to the validity +of the work overall. + +If we tie ourselves to a particular research compiler, we can still +obtain or heavily pursue #2 by using annotations or some other +mechanism which allows us to denote a call site (to an instrumentation +function) as "side-effect-free", and this can enable whatever +optimizations are performed by the compiler. This seems like a +reasonable compiler capability to encourage in vendor compilers in +general, and I don't see why we can't model it as a "beneficial +feature" which allows us to capitalize on #2, but is not something +which is fundamental to the work. + +If we do "tie" ourselves to LLVM, as we're already talking about doing +in some sense with our use of the reoptimizer and (most likely) our +reliance on the LLVM-to-Sparc mapping information, we may also gain +short-term implementation feasibility by side-stepping the complex +binary editing issues. + +}}} + +}}} + +{{{ MEETING MINUTES 14 Mar 2003 + +We discussed the primary differences between our work and the MDL +language/compiler system (by the DynInst folks). We discussed that +there are some fundamental differences in the capabilities of the two +systems. Primarily, + - feedback of performance data cannot occur in MDL + - MDL cannot instrument (or currently doesn't) arbitrary points + - Cannot enable/disable instrumentation w.r.t control flow + +Also, in terms of syntax and specification distinctions: + + our approach uses "data structure" primitives & value histories + - they've got hierarchical constraints and method-globbing + mechanisms + - "decoupling" of metric description from instrumentation point. + +Vikram expressed the need to clearly demonstrate the differences +between the two approaches in any write-up that we do. IT will be +vital to "pull apart" the issues, referring to orthogonality as much +as possible", and decide exactly what is distinct. Alos, he pointed +out that if our Phase1 didn't process the source directly and we +didn't use arbitrary instrumentation points, we can do *exactly* what +MDL can do (we think) without source access. + +Implementation sketch: + +At a high level, in broad sweeping strokes, we're going to use the +trace cache tool as a framework for runtime manipulation of the binary +code. That is, the framework provided by the tracecache allows the +selection of a path from a CFG, its subsequent mapping into an +arbitrarily-sized piece of memory, where it can be manipulated. +Although the tracecache takes a CFG subgraph and maps it into this new +region of memory, it does so in a "straight-line" manner for the hot +path, rewriting intra-region branches as needed. We're not making any +assumptions about a particular path being executed, and so we'll +actually be mapping the entire CFG subgraph into the new region of +memory, keeping the branch structure intact. + +We're currently operating under the assumption that the delimiting +markers (of the scope or point at which a metric evaluation call is to +be palced) for the region of code we wish to transform are visible to +the runtime system (somehow). Assuming we have a start and end point, +[or WOLOG end points (plural) because of multiple exit points of the +target function] then we may demarcate the region of the CFG that is +to be transformed. Call this CFG region R. We will be mapping the CFG +into the new memory region. Before transformation, execution is +semantically equivalent w.r.t the original code in memory and the new +(copied) code in memory. + +We also assume that it is possible to "pad" the start point of R such +that longjump instructions may be placed there to reroute execution to +the copied area of memory. The trace-cache is supposed to handle this +rerouting, but the facility must exist regardless of who provides it: - How can we narrow down the set of load instructions to those - instructions that are loading the global volatile variables? In - particular, all we see in the load instruction is the register - parameter which specifies the register that contains the address - being loaded from. Since we cannot rely on a particular signature - of code that is used to place the address in that particular - register, we can't simply look at a few preceding instructions to - determine the address. We know that the address must be a constant, - but the optimizing compiler may compute the address by simply adding - some constant value to some other register (the compiler is very - clever and may employ any kind of value-numbering techniques, etc to - do this address calculationg...the peephole optimizer kills us - here). Thus, we basically need a way to discover, at each load - instruction, whether the address stored in the load register is - constant and, if so, what the constant is. Once we know this, we - can lookup the constant in the table and determine what call to - insert in its place, etc. This has to be flow senstive, and - possibly interprocedural? + Original code: - How can we safely erase the address calculation instructions without - knowing that the temporary (or final!) values won't later be used - for further relative computations? This essentially requires use-def - information to be present, and might even need to cross procedure - boundaries. Talk to Adve about this. - - NB: Neither of the above two questions are unique to doing the - modification at runtime, but rather problems with performing the - transformation "in general". - - Concrete problems with the runtime framework: - - What is the implementation status? What works, what doesn't - work? - -MILESTONES - - - Prototype implementation for Reopt-based phase 2. Must address - above problems to fully realize this. However, the first step is to - get familiar with the system and record what the concrete problems - are that arise. To this end, the first step: - - - A function which can be called by the target application which - performs "self-examination" of the program: either dumping out - instructions to stderr for comparison with disassembly, or - something close to it. The primary purpose of this is to explore - the current implementation status of the system and to assess its - capabilities and suitablity for the full-blown phase 2 - implementation. Utilizes the VirtualMem object, etc, and is - modelled after the trigger routine in - Reoptimizer/Trigger/Trigger.cpp. + | | + V V + (start of CFG REGION R) =====> (start of R) ---> new code + / \ / \ | + ( CFG REGION ) ( CFG REGION ) | + (end of CFG REGION R) (end of R) <-----/ + +Here are the proposed steps of the transformation. + +1. Given a code segment S that corresponds to CFG region R, copy S +into new memory region S'. Any direct branches in the region are +going to be PC-relative, while jumps out of the region may need to be +rewritten. Indirect jumps will have to be heuristically handled +("backwards address extraction", read EEL paper for more info). + +2. Indirect jumps into the original code are not a problem: the +modified code simply does not get executed (until the next iteration +if in a loop). This is trivial for point instrumentation, but for +interval instrumentation it is taken care of by a set of a flag by the +entry to the region and the check of the flag before calling the +end-instrumentation function. [Actually, this doesn't seem to be an +issue at all, come to think of it: the original code just has the +extraneous loads of volatiles or whatever, it's doesn't contain any +calls: we will have to guard any uses of the metric variables though +(i.e. use in a stat function, etc). This step requires more +evaluation. + +3. Code is grown within the new code region, and branch +targets/offsets are updated appropriately, using heuristics where +possible. The loads of selected volatiles are replaced with calls to +the proper instrumentation function. Those load instructions +themselves are selected by determining what address they are loading, +a constant value which is (again) heuristically determined. + +In all of this, an attempt is made to avoid use of the LLVM-to-Sparc +instruction mapping. This means using lower-level POSIX, ELF, etc, +mechanisms to access everything we need (the global table of +bookkeeping information, the names of functions to instrument, the +starting locations of those functions and their ending locations/size) +to start with. We must also be able to identify the range of the code +segment to which a particular transformation is supposed to apply to +(since the tracecache maps a CFG into a new memory area, how the frell +are we supposed to "not use" the LLVM mapping information when the +tracecache construction seems to require it?), since the tracecache +presumably needs the corresponding CFG region, etc. Look into this. + +Start here: + +The first order of business is to determine the feasibility of this +approach for the long haul. We must first see what information is +needed by the tracecache module to create the new area of memory and +map code into it. After this is done, we will have a better idea of +what information must be provided to it by our preliminary binary +analysis. After this, we need to read up on ELF/POSIX/etc mechanisms +for reading the object file and determining things about it (the size +of functions, for example). Also, we must figure out how to get +access to the global, static table of bookkeeping information without +debugging information (take the address of it in a function, load that +address, look for the address, verify?). + +See TODO list below. + +}}} + +{{{ MILESTONES + +- Extract and report bookkeeping data structure contents from raw +compiled binary. + +- Determine if/how the tracecache framework can be used for a CFG +subgraph "copy" to a new area of memory; determine whether or not it's +worth the effort or whether it should be "done from scratch". + +}}} + +{{{ TODO + +- Read EEL paper to get a better feel for binary modification issues +- Do sample by hand and revisit actions of both phases +- Extract bookkeeping data structure contents, function stats/ends, + etc, using low-level POSIX/ELF mechanisms. + +}}} + +{{{ PENDING QUESTIONS + +[What about violation of register schedules when inserting new code? +Is this an issue?] + +}}} From lattner at apoc.cs.uiuc.edu Tue Mar 18 16:54:02 2003 From: lattner at apoc.cs.uiuc.edu (Chris Lattner) Date: Tue Mar 18 16:54:02 2003 Subject: [llvm-commits] CVS: llvm/test/Feature/constexpr.ll Message-ID: <200303182253.h2IMrTB03791@apoc.cs.uiuc.edu> Changes in directory llvm/test/Feature: constexpr.ll updated: 1.5 -> 1.6 --- Log message: Fix testcase --- Diffs of the changes: Index: llvm/test/Feature/constexpr.ll diff -u llvm/test/Feature/constexpr.ll:1.5 llvm/test/Feature/constexpr.ll:1.6 --- llvm/test/Feature/constexpr.ll:1.5 Sun Oct 6 17:43:49 2002 +++ llvm/test/Feature/constexpr.ll Tue Mar 18 16:53:19 2003 @@ -26,7 +26,7 @@ %array = constant [2 x int] [ int 12, int 52 ] %arrayPtr = global int* getelementptr ([2 x int]* %array, long 0, long 0) ;; int* &%array[0][0] -%arrayPtr5 = global int* getelementptr (int** %arrayPtr, long 0, long 5) ;; int* &%arrayPtr[5] +%arrayPtr5 = global int** getelementptr (int** %arrayPtr, long 5) ;; int* &%arrayPtr[5] %somestr = constant [11x sbyte] c"hello world" %char5 = global sbyte* getelementptr([11x sbyte]* %somestr, long 0, long 5) @@ -54,17 +54,17 @@ %S3 = global %SAType* %S3c ;; Ref. to constant S3 ;; Pointer to float (**%S1).1.0 -%S1fld1a = global float* getelementptr (%SType** %S1, long 0, long 0, ubyte 1, ubyte 0) +%S1fld1a = global float* getelementptr (%SType* %S2c, long 0, ubyte 1, ubyte 0) ;; Another ptr to the same! -%S1fld1b = global float* getelementptr (%SType*** %S1ptr, long 0, long 0, long 0, ubyte 1, ubyte 0) +%S1fld1b = global float* getelementptr (%SType* %S2c, long 0, ubyte 1, ubyte 0) %S1fld1bptr = global float** %S1fld1b ;; Ref. to previous pointer ;; Pointer to ubyte (**%S2).1.1.0 -%S2fld3 = global ubyte* getelementptr (%SType** %S2, long 0, long 0, ubyte 1, ubyte 1, ubyte 0) +%S2fld3 = global ubyte* getelementptr (%SType* %S2c, long 0, ubyte 1, ubyte 1, ubyte 0) ;; Pointer to float (**%S2).1.0[0] -%S3fld3 = global float* getelementptr (%SAType** %S3, long 0, long 0, ubyte 1, ubyte 0, long 0) +;%S3fld3 = global float* getelementptr (%SAType** %S3, long 0, long 0, ubyte 1, ubyte 0, long 0) ;;--------------------------------------------------------- ;; TODO: Test constant expressions for unary and binary operators @@ -79,7 +79,7 @@ %char8a = global int* cast (sbyte* getelementptr([11x sbyte]* %somestr, long 0, long 8) to int*) -%S3fld3 = global float* getelementptr (%SAType** %S3, long 0, long 0, ubyte 1, ubyte 0, long 0) +;%S3fld3 = global float* getelementptr (%SAType** %S3, long 0, long 0, ubyte 1, ubyte 0, long 0) ;;--------------------------------------------------- From lattner at apoc.cs.uiuc.edu Wed Mar 19 14:49:01 2003 From: lattner at apoc.cs.uiuc.edu (Chris Lattner) Date: Wed Mar 19 14:49:01 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Bytecode/Format.h Message-ID: <200303192048.h2JKmbL20559@apoc.cs.uiuc.edu> Changes in directory llvm/include/llvm/Bytecode: Format.h updated: 1.2 -> 1.3 --- Log message: Add new chunk type --- Diffs of the changes: Index: llvm/include/llvm/Bytecode/Format.h diff -u llvm/include/llvm/Bytecode/Format.h:1.2 llvm/include/llvm/Bytecode/Format.h:1.3 --- llvm/include/llvm/Bytecode/Format.h:1.2 Thu Mar 28 21:50:24 2002 +++ llvm/include/llvm/Bytecode/Format.h Wed Mar 19 14:48:27 2003 @@ -22,6 +22,7 @@ ConstantPool, SymbolTable, ModuleGlobalInfo, + GlobalTypePlane, // Method subtypes: MethodInfo = 0x21, From lattner at apoc.cs.uiuc.edu Wed Mar 19 14:55:01 2003 From: lattner at apoc.cs.uiuc.edu (Chris Lattner) Date: Wed Mar 19 14:55:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Bytecode/Reader/ReadConst.cpp Reader.cpp ReaderInternals.h Message-ID: <200303192054.h2JKsc120682@apoc.cs.uiuc.edu> Changes in directory llvm/lib/Bytecode/Reader: ReadConst.cpp updated: 1.42 -> 1.43 Reader.cpp updated: 1.47 -> 1.48 ReaderInternals.h updated: 1.32 -> 1.33 --- Log message: * Bug fixes: - Fix problems where the constant table would not get updated when resolving constants causes other constants to change. Changes to the V2 bytecode format - Null values are implicitly encoded instead of explicitly, this makes things more compact! - More compactly represent ConstantPointerRefs - Bytecode files are represented as: Header|GlobalTypes|GlobalVars/Function Protos|Constants|Functions|SymTab instead of Header|GlobalTypes|Constants|GlobalVars/Function Protos|Functions|SymTab which makes a lot of things simpler. Changes to the reader: - Function loading code is much simpler. We now no longer make function PlaceHolderHelper objects to be replaced with real functions. --- Diffs of the changes: Index: llvm/lib/Bytecode/Reader/ReadConst.cpp diff -u llvm/lib/Bytecode/Reader/ReadConst.cpp:1.42 llvm/lib/Bytecode/Reader/ReadConst.cpp:1.43 --- llvm/lib/Bytecode/Reader/ReadConst.cpp:1.42 Thu Mar 6 11:18:14 2003 +++ llvm/lib/Bytecode/Reader/ReadConst.cpp Wed Mar 19 14:54:26 2003 @@ -316,7 +316,11 @@ case Type::PointerTyID: { const PointerType *PT = cast(Ty); unsigned SubClass; - if (read_vbr(Buf, EndBuf, SubClass)) return true; + if (HasImplicitZeroInitializer) + SubClass = 1; + else + if (read_vbr(Buf, EndBuf, SubClass)) return true; + switch (SubClass) { case 0: // ConstantPointerNull value... V = ConstantPointerNull::get(PT); @@ -333,6 +337,10 @@ if (Val) { if (!(GV = dyn_cast(Val))) return true; BCR_TRACE(5, "Value Found in ValueTable!\n"); + } else if (RevisionNum > 0) { + // Revision #0 could have forward references to globals that were wierd. + // We got rid of this in subsequent revs. + return true; } else { // Nope... find or create a forward ref. for it GlobalRefsType::iterator I = GlobalRefs.find(std::make_pair(PT, Slot)); @@ -341,12 +349,12 @@ GV = cast(I->second); } else { BCR_TRACE(5, "Creating new forward ref to a global variable!\n"); - - // Create a placeholder for the global variable reference... + + // Create a placeholder for the global variable reference... GlobalVariable *GVar = new GlobalVariable(PT->getElementType(), false, true); - // Keep track of the fact that we have a forward ref to recycle it + // Keep track of the fact that we have a forward ref to recycle it GlobalRefs.insert(std::make_pair(std::make_pair(PT, Slot), GVar)); // Must temporarily push this value into the module table... @@ -354,6 +362,7 @@ GV = GVar; } } + V = ConstantPointerRef::get(GV); break; } @@ -375,6 +384,11 @@ return false; } +bool BytecodeParser::ParseGlobalTypes(const uchar *&Buf, const uchar *EndBuf) { + ValueTable T; + return ParseConstantPool(Buf, EndBuf, T, ModuleTypeValues); +} + bool BytecodeParser::ParseConstantPool(const uchar *&Buf, const uchar *EndBuf, ValueTable &Tab, TypeValuesListTy &TypeTab) { @@ -391,20 +405,19 @@ if (parseTypeConstants(Buf, EndBuf, TypeTab, NumEntries)) return true; } else { for (unsigned i = 0; i < NumEntries; ++i) { - Constant *I; + Constant *C; int Slot; - if (parseConstantValue(Buf, EndBuf, Ty, I)) return true; - assert(I && "parseConstantValue returned NULL!"); - BCR_TRACE(4, "Read Constant: '" << I << "'\n"); - if ((Slot = insertValue(I, Tab)) < 0) return true; + if (parseConstantValue(Buf, EndBuf, Ty, C)) return true; + assert(C && "parseConstantValue returned NULL!"); + BCR_TRACE(4, "Read Constant: '" << *C << "'\n"); + if ((Slot = insertValue(C, Tab)) == -1) return true; // If we are reading a function constant table, make sure that we adjust // the slot number to be the real global constant number. // - if (&Tab != &ModuleValues) - Slot += ModuleValues[Typ].size(); - - ResolveReferencesToValue(I, (unsigned)Slot); + if (&Tab != &ModuleValues && Typ < ModuleValues.size()) + Slot += ModuleValues[Typ]->size(); + ResolveReferencesToValue(C, (unsigned)Slot); } } } Index: llvm/lib/Bytecode/Reader/Reader.cpp diff -u llvm/lib/Bytecode/Reader/Reader.cpp:1.47 llvm/lib/Bytecode/Reader/Reader.cpp:1.48 --- llvm/lib/Bytecode/Reader/Reader.cpp:1.47 Thu Mar 6 11:55:45 2003 +++ llvm/lib/Bytecode/Reader/Reader.cpp Wed Mar 19 14:54:26 2003 @@ -55,23 +55,44 @@ return cast_or_null(V); } -int BytecodeParser::insertValue(Value *Val, std::vector &ValueTab) { +int BytecodeParser::insertValue(Value *Val, ValueTable &ValueTab) { + assert((!HasImplicitZeroInitializer || !isa(Val) || + Val->getType()->isPrimitiveType() || + !cast(Val)->isNullValue()) && + "Cannot read null values from bytecode!"); unsigned type; if (getTypeSlot(Val->getType(), type)) return -1; assert(type != Type::TypeTyID && "Types should never be insertValue'd!"); - while (ValueTab.size() <= type) { - ValueTab.push_back(ValueList()); - if (HasImplicitZeroInitializer) // add a zero initializer if appropriate - ValueTab.back().push_back( - Constant::getNullValue(getType(ValueTab.size()-1))); + if (ValueTab.size() <= type) { + unsigned OldSize = ValueTab.size(); + ValueTab.resize(type+1); + while (OldSize != type+1) + ValueTab[OldSize++] = new ValueList(); } //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() // << "] = " << Val << "\n"; - ValueTab[type].push_back(Val); + ValueTab[type]->push_back(Val); - return ValueTab[type].size()-1; + bool HasOffset = HasImplicitZeroInitializer && + !Val->getType()->isPrimitiveType(); + + return ValueTab[type]->size()-1 + HasOffset; +} + + +void BytecodeParser::setValueTo(ValueTable &ValueTab, unsigned Slot, + Value *Val) { + assert(&ValueTab == &ModuleValues && "Can only setValueTo on Module values!"); + unsigned type; + if (getTypeSlot(Val->getType(), type)) + assert(0 && "getTypeSlot failed!"); + + assert((!HasImplicitZeroInitializer || Slot != 0) && + "Cannot change zero init"); + assert(type < ValueTab.size() && Slot <= ValueTab[type]->size()); + ValueTab[type]->setOperand(Slot-HasImplicitZeroInitializer, Val); } Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) { @@ -102,25 +123,25 @@ return 0; } + if (HasImplicitZeroInitializer && type >= FirstDerivedTyID) { + if (Num == 0) + return Constant::getNullValue(Ty); + --Num; + } + if (type < ModuleValues.size()) { - if (Num < ModuleValues[type].size()) - return ModuleValues[type][Num]; - Num -= ModuleValues[type].size(); + if (Num < ModuleValues[type]->size()) + return ModuleValues[type]->getOperand(Num); + Num -= ModuleValues[type]->size(); } - if (Values.size() > type && Values[type].size() > Num) - return Values[type][Num]; + if (Values.size() > type && Values[type]->size() > Num) + return Values[type]->getOperand(Num); if (!Create) return 0; // Do not create a placeholder? Value *d = 0; switch (Ty->getPrimitiveID()) { - case Type::FunctionTyID: - std::cerr << "Creating function pholder! : " << type << ":" << oNum << " " - << Ty->getName() << "\n"; - d = new FunctionPHolder(Ty, oNum); - if (insertValue(d, LateResolveModuleValues) == -1) return 0; - return d; case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break; @@ -144,8 +165,10 @@ if (Value *V = getValue(Ty, Slot, false)) return dyn_cast(V); // If we already have the value parsed... - GlobalRefsType::iterator I = GlobalRefs.find(std::make_pair(Ty, Slot)); - if (I != GlobalRefs.end()) { + std::pair Key(Ty, Slot); + GlobalRefsType::iterator I = GlobalRefs.lower_bound(Key); + + if (I != GlobalRefs.end() && I->first == Key) { BCR_TRACE(5, "Previous forward ref found!\n"); return cast(I->second); } else { @@ -155,29 +178,28 @@ Constant *C = new ConstPHolder(Ty, Slot); // Keep track of the fact that we have a forward ref to recycle it - GlobalRefs.insert(std::make_pair(std::make_pair(Ty, Slot), C)); + GlobalRefs.insert(I, std::make_pair(Key, C)); return C; } } - bool BytecodeParser::postResolveValues(ValueTable &ValTab) { bool Error = false; - for (unsigned ty = 0; ty < ValTab.size(); ++ty) { - ValueList &DL = ValTab[ty]; - unsigned Size; - while ((Size = DL.size())) { - unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]); - - Value *D = DL[Size-1]; + while (!ValTab.empty()) { + ValueList &DL = *ValTab.back(); + ValTab.pop_back(); + + while (!DL.empty()) { + Value *D = DL.back(); + unsigned IDNumber = getValueIDNumberFromPlaceHolder(D); DL.pop_back(); Value *NewDef = getValue(D->getType(), IDNumber, false); if (NewDef == 0) { Error = true; // Unresolved thinger std::cerr << "Unresolvable reference found: <" - << D->getType()->getDescription() << ">:" << IDNumber <<"!\n"; + << *D->getType() << ">:" << IDNumber <<"!\n"; } else { // Fixup all of the uses of this placeholder def... D->replaceAllUsesWith(NewDef); @@ -187,6 +209,7 @@ delete D; // memory, 'cause otherwise we can't remove all uses! } } + delete &DL; } return Error; @@ -235,15 +258,15 @@ if (read(Buf, EndBuf, Name, false)) // Not aligned... return true; - Value *D = getValue(Ty, slot, false); // Find mapping... - if (D == 0) { + Value *V = getValue(Ty, slot, false); // Find mapping... + if (V == 0) { BCR_TRACE(3, "FAILED LOOKUP: Slot #" << slot << "\n"); return true; } - BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << D; - if (!isa(D)) std::cerr << "\n"); + BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << *V; + if (!isa(V)) std::cerr << "\n"); - D->setName(Name, ST); + V->setName(Name, ST); } } @@ -259,52 +282,40 @@ BCR_TRACE(3, "Mutating forward refs!\n"); Value *VPH = I->second; // Get the placeholder... - // Loop over all of the uses of the Value. What they are depends - // on what NewV is. Replacing a use of the old reference takes the - // use off the use list, so loop with !use_empty(), not the use_iterator. - while (!VPH->use_empty()) { - Constant *C = cast(VPH->use_back()); - unsigned numReplaced = C->mutateReferences(VPH, NewV); - assert(numReplaced > 0 && "Supposed user wasn't really a user?"); - - if (GlobalValue* GVal = dyn_cast(NewV)) { - // Remove the placeholder GlobalValue from the module... - GVal->getParent()->getGlobalList().remove(cast(VPH)); - } - } + VPH->replaceAllUsesWith(NewV); + + // If this is a global variable being resolved, remove the placeholder from + // the module... + if (GlobalValue* GVal = dyn_cast(NewV)) + GVal->getParent()->getGlobalList().remove(cast(VPH)); delete VPH; // Delete the old placeholder GlobalRefs.erase(I); // Remove the map entry for it } + bool BytecodeParser::ParseFunction(const uchar *&Buf, const uchar *EndBuf) { // Clear out the local values table... - Values.clear(); if (FunctionSignatureList.empty()) { Error = "Function found, but FunctionSignatureList empty!"; return true; // Unexpected function! } - const PointerType *PMTy = FunctionSignatureList.back().first; // PtrMeth - const FunctionType *MTy = dyn_cast(PMTy->getElementType()); - if (MTy == 0) return true; // Not ptr to function! - unsigned isInternal; if (read_vbr(Buf, EndBuf, isInternal)) return true; - unsigned MethSlot = FunctionSignatureList.back().second; + Function *F = FunctionSignatureList.back().first; + unsigned FunctionSlot = FunctionSignatureList.back().second; FunctionSignatureList.pop_back(); - Function *M = new Function(MTy, isInternal != 0); + F->setInternalLinkage(isInternal != 0); - BCR_TRACE(2, "FUNCTION TYPE: " << MTy << "\n"); - - const FunctionType::ParamTypes &Params = MTy->getParamTypes(); - Function::aiterator AI = M->abegin(); + const FunctionType::ParamTypes &Params =F->getFunctionType()->getParamTypes(); + Function::aiterator AI = F->abegin(); for (FunctionType::ParamTypes::const_iterator It = Params.begin(); It != Params.end(); ++It, ++AI) { if (insertValue(AI, Values) == -1) { Error = "Error reading function arguments!\n"; - delete M; return true; + return true; } } @@ -313,34 +324,31 @@ const unsigned char *OldBuf = Buf; if (readBlock(Buf, EndBuf, Type, Size)) { Error = "Error reading Function level block!"; - delete M; return true; + return true; } switch (Type) { case BytecodeFormat::ConstantPool: BCR_TRACE(2, "BLOCK BytecodeFormat::ConstantPool: {\n"); - if (ParseConstantPool(Buf, Buf+Size, Values, FunctionTypeValues)) { - delete M; return true; - } + if (ParseConstantPool(Buf, Buf+Size, Values, FunctionTypeValues)) + return true; break; case BytecodeFormat::BasicBlock: { BCR_TRACE(2, "BLOCK BytecodeFormat::BasicBlock: {\n"); BasicBlock *BB; if (ParseBasicBlock(Buf, Buf+Size, BB) || - insertValue(BB, Values) == -1) { - delete M; return true; // Parse error... :( - } + insertValue(BB, Values) == -1) + return true; // Parse error... :( - M->getBasicBlockList().push_back(BB); + F->getBasicBlockList().push_back(BB); break; } case BytecodeFormat::SymbolTable: BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n"); - if (ParseSymbolTable(Buf, Buf+Size, &M->getSymbolTable())) { - delete M; return true; - } + if (ParseSymbolTable(Buf, Buf+Size, &F->getSymbolTable())) + return true; break; default: @@ -353,41 +361,21 @@ if (align32(Buf, EndBuf)) { Error = "Error aligning Function level block!"; - delete M; // Malformed bc file, read past end of block. - return true; + return true; // Malformed bc file, read past end of block. } } - if (postResolveValues(LateResolveValues) || - postResolveValues(LateResolveModuleValues)) { + if (postResolveValues(LateResolveValues)) { Error = "Error resolving function values!"; - delete M; return true; // Unresolvable references! + return true; // Unresolvable references! } - Value *FunctionPHolder = getValue(PMTy, MethSlot, false); - assert(FunctionPHolder && "Something is broken, no placeholder found!"); - assert(isa(FunctionPHolder) && "Not a function?"); - - unsigned type; // Type slot - assert(!getTypeSlot(MTy, type) && "How can meth type not exist?"); - getTypeSlot(PMTy, type); - - TheModule->getFunctionList().push_back(M); - - // Replace placeholder with the real function pointer... - ModuleValues[type][MethSlot] = M; + ResolveReferencesToValue(F, FunctionSlot); // Clear out function level types... FunctionTypeValues.clear(); - // If anyone is using the placeholder make them use the real function instead - FunctionPHolder->replaceAllUsesWith(M); - - // We don't need the placeholder anymore! - delete FunctionPHolder; - - ResolveReferencesToValue(M, MethSlot); - + freeTable(Values); return false; } @@ -409,40 +397,25 @@ return true; } - const PointerType *PTy = cast(Ty); - const Type *ElTy = PTy->getElementType(); - - Constant *Initializer = 0; - if (VarType & 2) { // Does it have an initalizer? - // Do not improvise... values must have been stored in the constant pool, - // which should have been read before now. - // - unsigned InitSlot; - if (read_vbr(Buf, End, InitSlot)) return true; - - Value *V = getValue(ElTy, InitSlot, false); - if (V == 0) return true; - Initializer = cast(V); - } + const Type *ElTy = cast(Ty)->getElementType(); // Create the global variable... GlobalVariable *GV = new GlobalVariable(ElTy, VarType & 1, VarType & 4, - Initializer); + 0, "", TheModule); int DestSlot = insertValue(GV, ModuleValues); if (DestSlot == -1) return true; - - TheModule->getGlobalList().push_back(GV); - + BCR_TRACE(2, "Global Variable of type: " << *Ty << "\n"); ResolveReferencesToValue(GV, (unsigned)DestSlot); - BCR_TRACE(2, "Global Variable of type: " << PTy->getDescription() - << " into slot #" << DestSlot << "\n"); - + if (VarType & 2) { // Does it have an initalizer? + unsigned InitSlot; + if (read_vbr(Buf, End, InitSlot)) return true; + GlobalInits.push_back(std::make_pair(GV, InitSlot)); + } if (read_vbr(Buf, End, VarType)) return true; } - // Read the function signatures for all of the functions that are coming, and - // create fillers in the Value tables. + // Read the function objects for all of the functions that are coming unsigned FnSignature; if (read_vbr(Buf, End, FnSignature)) return true; while (FnSignature != Type::VoidTyID) { // List is terminated by Void @@ -462,20 +435,16 @@ // this placeholder is replaced. // Insert the placeholder... - Value *Val = new FunctionPHolder(Ty, 0); - if (insertValue(Val, ModuleValues) == -1) return true; - - // Figure out which entry of its typeslot it went into... - unsigned TypeSlot; - if (getTypeSlot(Val->getType(), TypeSlot)) return true; + Function *Func = new Function(cast(Ty), false, "", TheModule); + int DestSlot = insertValue(Func, ModuleValues); + if (DestSlot == -1) return true; + ResolveReferencesToValue(Func, (unsigned)DestSlot); - unsigned SlotNo = ModuleValues[TypeSlot].size()-1; - - // Keep track of this information in a linked list that is emptied as - // functions are loaded... + // Keep track of this information in a list that is emptied as functions are + // loaded... // - FunctionSignatureList.push_back( - std::make_pair(cast(Val->getType()), SlotNo)); + FunctionSignatureList.push_back(std::make_pair(Func, DestSlot)); + if (read_vbr(Buf, End, FnSignature)) return true; BCR_TRACE(2, "Function of type: " << Ty << "\n"); } @@ -505,12 +474,17 @@ switch (RevisionNum) { case 0: // Initial revision + // Version #0 didn't have any of the flags stored correctly, and in fact as + // only valid with a 14 in the flags values. Also, it does not support + // encoding zero initializers for arrays compactly. + // if (Version != 14) return true; // Unknown revision 0 flags? FirstDerivedTyID = 14; HasImplicitZeroInitializer = false; isBigEndian = hasLongPointers = true; break; case 1: + // Version #1 has two bit fields: isBigEndian and hasLongPointers FirstDerivedTyID = 14; break; default: @@ -544,20 +518,26 @@ const unsigned char *OldBuf = Buf; if (readBlock(Buf, EndBuf, Type, Size)) return true; switch (Type) { - case BytecodeFormat::ConstantPool: - BCR_TRACE(1, "BLOCK BytecodeFormat::ConstantPool: {\n"); - if (ParseConstantPool(Buf, Buf+Size, ModuleValues, ModuleTypeValues)) - return true; + case BytecodeFormat::GlobalTypePlane: + BCR_TRACE(1, "BLOCK BytecodeFormat::GlobalTypePlane: {\n"); + if (ParseGlobalTypes(Buf, Buf+Size)) return true; break; case BytecodeFormat::ModuleGlobalInfo: BCR_TRACE(1, "BLOCK BytecodeFormat::ModuleGlobalInfo: {\n"); - if (ParseModuleGlobalInfo(Buf, Buf+Size)) return true; + if (ParseModuleGlobalInfo(Buf, Buf+Size)) return true; + break; + + case BytecodeFormat::ConstantPool: + BCR_TRACE(1, "BLOCK BytecodeFormat::ConstantPool: {\n"); + if (ParseConstantPool(Buf, Buf+Size, ModuleValues, ModuleTypeValues)) + return true; break; case BytecodeFormat::Function: { BCR_TRACE(1, "BLOCK BytecodeFormat::Function: {\n"); - if (ParseFunction(Buf, Buf+Size)) return true; // Error parsing function + if (ParseFunction(Buf, Buf+Size)) + return true; // Error parsing function break; } @@ -577,6 +557,21 @@ if (align32(Buf, EndBuf)) return true; } + // After the module constant pool has been read, we can safely initialize + // global variables... + while (!GlobalInits.empty()) { + GlobalVariable *GV = GlobalInits.back().first; + unsigned Slot = GlobalInits.back().second; + GlobalInits.pop_back(); + + // Look up the initializer value... + if (Value *V = getValue(GV->getType()->getElementType(), Slot, false)) { + if (GV->hasInitializer()) return true; + GV->setInitializer(cast(V)); + } else + return true; + } + if (!FunctionSignatureList.empty()) { // Expected more functions! Error = "Function expected, but bytecode stream at end!"; return true; @@ -592,7 +587,6 @@ } Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) { - LateResolveValues.clear(); unsigned Sig; // Read and check signature... if (read(Buf, EndBuf, Sig) || Index: llvm/lib/Bytecode/Reader/ReaderInternals.h diff -u llvm/lib/Bytecode/Reader/ReaderInternals.h:1.32 llvm/lib/Bytecode/Reader/ReaderInternals.h:1.33 --- llvm/lib/Bytecode/Reader/ReaderInternals.h:1.32 Thu Mar 6 11:55:45 2003 +++ llvm/lib/Bytecode/Reader/ReaderInternals.h Wed Mar 19 14:54:26 2003 @@ -18,7 +18,8 @@ #define TRACE_LEVEL 0 #if TRACE_LEVEL // ByteCodeReading_TRACEer -#define BCR_TRACE(n, X) if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X +#define BCR_TRACE(n, X) \ + if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X #else #define BCR_TRACE(n, X) #endif @@ -45,6 +46,11 @@ // Define this in case we don't see a ModuleGlobalInfo block. FirstDerivedTyID = Type::FirstDerivedTyID; } + ~BytecodeParser() { + freeTable(Values); + freeTable(LateResolveValues); + freeTable(ModuleValues); + } Module *ParseBytecode(const uchar *Buf, const uchar *EndBuf); @@ -55,6 +61,23 @@ } private: // All of this data is transient across calls to ParseBytecode + struct ValueList : public User { + ValueList() : User(Type::TypeTy, Value::TypeVal) { + } + ~ValueList() {} + + // vector compatibility methods + unsigned size() const { return getNumOperands(); } + void push_back(Value *V) { Operands.push_back(Use(V, this)); } + Value *back() const { return Operands.back(); } + void pop_back() { Operands.pop_back(); } + bool empty() const { return Operands.empty(); } + + virtual void print(std::ostream& OS) const { + OS << "Bytecode Reader UseHandle!"; + } + }; + Module *TheModule; // Current Module being read into... // Information about the module, extracted from the bytecode revision number. @@ -63,18 +86,16 @@ bool HasImplicitZeroInitializer; // Is entry 0 of every slot implicity zeros? bool isBigEndian, hasLongPointers;// Information about the target compiled for - typedef std::vector ValueList; - typedef std::vector ValueTable; + typedef std::vector ValueTable; ValueTable Values, LateResolveValues; - ValueTable ModuleValues, LateResolveModuleValues; + ValueTable ModuleValues; // GlobalRefs - This maintains a mapping between 's and forward // references to global values or constants. Such values may be referenced // before they are defined, and if so, the temporary object that they // represent is held here. // - typedef std::map, - Value*> GlobalRefsType; + typedef std::map, Value*> GlobalRefsType; GlobalRefsType GlobalRefs; // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used @@ -84,14 +105,27 @@ TypeValuesListTy ModuleTypeValues; TypeValuesListTy FunctionTypeValues; - // When the ModuleGlobalInfo section is read, we load the type of each - // function and the 'ModuleValues' slot that it lands in. We then load a - // placeholder into its slot to reserve it. When the function is loaded, this - // placeholder is replaced. + // When the ModuleGlobalInfo section is read, we create a function object for + // each function in the module. When the function is loaded, this function is + // filled in. + // + std::vector > FunctionSignatureList; + + // Constant values are read in after global variables. Because of this, we + // must defer setting the initializers on global variables until after module + // level constants have been read. In the mean time, this list keeps track of + // what we must do. // - std::vector > FunctionSignatureList; + std::vector > GlobalInits; private: + void freeTable(ValueTable &Tab) { + while (!Tab.empty()) { + delete Tab.back(); + Tab.pop_back(); + } + } + bool ParseModule (const uchar * Buf, const uchar *End); bool ParseVersionInfo (const uchar *&Buf, const uchar *End); bool ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End); @@ -102,6 +136,7 @@ BasicBlock *BB /*HACK*/); bool ParseRawInst (const uchar *&Buf, const uchar *End, RawInst &); + bool ParseGlobalTypes(const uchar *&Buf, const uchar *EndBuf); bool ParseConstantPool(const uchar *&Buf, const uchar *EndBuf, ValueTable &Tab, TypeValuesListTy &TypeTab); bool parseConstantValue(const uchar *&Buf, const uchar *End, @@ -114,7 +149,8 @@ const Type *getType(unsigned ID); Constant *getConstantValue(const Type *Ty, unsigned num); - int insertValue(Value *D, std::vector &D); // -1 = Failure + int insertValue(Value *V, ValueTable &Table); // -1 = Failure + void setValueTo(ValueTable &D, unsigned Slot, Value *V); bool postResolveValues(ValueTable &ValTab); bool getTypeSlot(const Type *Ty, unsigned &Slot); @@ -152,12 +188,6 @@ } }; -struct FunctionPlaceHolderHelper : public Function { - FunctionPlaceHolderHelper(const Type *Ty) - : Function(cast(Ty), true) { - } -}; - struct ConstantPlaceHolderHelper : public Constant { ConstantPlaceHolderHelper(const Type *Ty) : Constant(Ty) {} @@ -166,7 +196,6 @@ typedef PlaceholderDef ValPHolder; typedef PlaceholderDef BBPHolder; -typedef PlaceholderDef FunctionPHolder; typedef PlaceholderDef ConstPHolder; @@ -177,7 +206,6 @@ // else discriminate by type switch (Val->getType()->getPrimitiveID()) { case Type::LabelTyID: return ((BBPHolder*)Val)->getID(); - case Type::FunctionTyID: return ((FunctionPHolder*)Val)->getID(); default: return ((ValPHolder*)Val)->getID(); } } From lattner at apoc.cs.uiuc.edu Wed Mar 19 14:57:00 2003 From: lattner at apoc.cs.uiuc.edu (Chris Lattner) Date: Wed Mar 19 14:57:00 2003 Subject: [llvm-commits] CVS: llvm/lib/Bytecode/Writer/WriteConst.cpp Writer.cpp WriterInternals.h Message-ID: <200303192056.h2JKuv720700@apoc.cs.uiuc.edu> Changes in directory llvm/lib/Bytecode/Writer: WriteConst.cpp updated: 1.20 -> 1.21 Writer.cpp updated: 1.30 -> 1.31 WriterInternals.h updated: 1.10 -> 1.11 --- Log message: Changes to the V2 bytecode format: - Null values are implicitly encoded instead of explicitly, this makes things more compact! - More compactly represent ConstantPointerRefs - Bytecode files are represented as: Header|GlobalTypes|GlobalVars/Function Protos|Constants|Functions|SymTab instead of Header|GlobalTypes|Constants|GlobalVars/Function Protos|Functions|SymTab which makes a lot of things simpler. Writer changes: - We now explictly encode versioning information in the bytecode files. - This allows new code to read bytecode files produced by old code, but new bytecode files can have enhancements such as the above. Although this makes the reader a bit more complex (having to deal with old formats), the writer only needs to be able to produce the most recent version. --- Diffs of the changes: Index: llvm/lib/Bytecode/Writer/WriteConst.cpp diff -u llvm/lib/Bytecode/Writer/WriteConst.cpp:1.20 llvm/lib/Bytecode/Writer/WriteConst.cpp:1.21 --- llvm/lib/Bytecode/Writer/WriteConst.cpp:1.20 Tue Jul 30 13:54:20 2002 +++ llvm/lib/Bytecode/Writer/WriteConst.cpp Wed Mar 19 14:56:46 2003 @@ -13,8 +13,6 @@ #include "llvm/Constants.h" #include "llvm/SymbolTable.h" #include "llvm/DerivedTypes.h" -#include -using std::cerr; void BytecodeWriter::outputType(const Type *T) { output_vbr((unsigned)T->getPrimitiveID(), Out); @@ -52,7 +50,7 @@ int Slot = Table.getValSlot(AT->getElementType()); assert(Slot != -1 && "Type used but not available!!"); output_vbr((unsigned)Slot, Out); - //cerr << "Type slot = " << Slot << " Type = " << T->getName() << endl; + //std::cerr << "Type slot = " << Slot << " Type = " << T->getName() << endl; output_vbr(AT->getNumElements(), Out); break; @@ -89,13 +87,15 @@ //case Type::PackedTyID: default: - cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" - << " Type '" << T->getDescription() << "'\n"; + std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " Type '" << T->getDescription() << "'\n"; break; } } bool BytecodeWriter::outputConstant(const Constant *CPV) { + assert((CPV->getType()->isPrimitiveType() || !CPV->isNullValue()) && + "Shouldn't output null constants!"); // We must check for a ConstantExpr before switching by type because // a ConstantExpr can be of any type, and has no explicit value. @@ -121,9 +121,9 @@ switch (CPV->getType()->getPrimitiveID()) { case Type::BoolTyID: // Boolean Types if (cast(CPV)->getValue()) - output_vbr((unsigned)1, Out); + output_vbr(1U, Out); else - output_vbr((unsigned)0, Out); + output_vbr(0U, Out); break; case Type::UByteTyID: // Unsigned integer types... @@ -171,17 +171,11 @@ case Type::PointerTyID: { const ConstantPointer *CPP = cast(CPV); - if (isa(CPP)) { - output_vbr((unsigned)0, Out); - } else if (const ConstantPointerRef *CPR = - dyn_cast(CPP)) { - output_vbr((unsigned)1, Out); - int Slot = Table.getValSlot((Value*)CPR->getValue()); - assert(Slot != -1 && "Global used but not available!!"); - output_vbr((unsigned)Slot, Out); - } else { - assert(0 && "Unknown ConstantPointer Subclass!"); - } + assert(!isa(CPP) && "Null should be already emitted!"); + const ConstantPointerRef *CPR = cast(CPP); + int Slot = Table.getValSlot((Value*)CPR->getValue()); + assert(Slot != -1 && "Global used but not available!!"); + output_vbr((unsigned)Slot, Out); break; } @@ -199,8 +193,8 @@ case Type::VoidTyID: case Type::LabelTyID: default: - cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" - << " type '" << CPV->getType()->getName() << "'\n"; + std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " type '" << CPV->getType()->getName() << "'\n"; break; } return false; Index: llvm/lib/Bytecode/Writer/Writer.cpp diff -u llvm/lib/Bytecode/Writer/Writer.cpp:1.30 llvm/lib/Bytecode/Writer/Writer.cpp:1.31 --- llvm/lib/Bytecode/Writer/Writer.cpp:1.30 Wed Nov 20 12:32:01 2002 +++ llvm/lib/Bytecode/Writer/Writer.cpp Wed Mar 19 14:56:46 2003 @@ -43,19 +43,36 @@ // Emit the top level CLASS block. BytecodeBlock ModuleBlock(BytecodeFormat::Module, Out); - // Output the ID of first "derived" type: - output_vbr((unsigned)Type::FirstDerivedTyID, Out); + bool isBigEndian = true; + bool hasLongPointers = true; + + // Output the version identifier... we are currently on bytecode version #1 + unsigned Version = (1 << 4) | isBigEndian | (hasLongPointers << 1); + output_vbr(Version, Out); align32(Out); - // Output module level constants, including types used by the function protos - outputConstants(false); + { + BytecodeBlock CPool(BytecodeFormat::GlobalTypePlane, Out); + + // Write the type plane for types first because earlier planes (e.g. for a + // primitive type like float) may have constants constructed using types + // coming later (e.g., via getelementptr from a pointer type). The type + // plane is needed before types can be fwd or bkwd referenced. + const std::vector &Plane = Table.getPlane(Type::TypeTyID); + assert(!Plane.empty() && "No types at all?"); + unsigned ValNo = Type::FirstDerivedTyID; // Start at the derived types... + outputConstantsInPlane(Plane, ValNo); // Write out the types + } - // The ModuleInfoBlock follows directly after the Module constant pool + // The ModuleInfoBlock follows directly after the type information outputModuleInfoBlock(M); + // Output module level constants, used for global variable initializers + outputConstants(false); + // Do the whole module now! Process each function at a time... for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) - processMethod(I); + outputFunction(I); // If needed, output the symbol table for the module... outputSymbolTable(M->getSymbolTable()); @@ -68,8 +85,9 @@ &Plane, unsigned StartNo) { unsigned ValNo = StartNo; - // Scan through and ignore function arguments... - for (; ValNo < Plane.size() && isa(Plane[ValNo]); ValNo++) + // Scan through and ignore function arguments/global values... + for (; ValNo < Plane.size() && (isa(Plane[ValNo]) || + isa(Plane[ValNo])); ValNo++) /*empty*/; unsigned NC = ValNo; // Number of constants @@ -98,7 +116,7 @@ // << Out.size() << "\n"; outputConstant(CPV); } else { - outputType(cast(V)); + outputType(cast(V)); } } } @@ -108,26 +126,21 @@ unsigned NumPlanes = Table.getNumPlanes(); - // Write the type plane for types first because earlier planes - // (e.g. for a primitive type like float) may have constants constructed - // using types coming later (e.g., via getelementptr from a pointer type). - // The type plane is needed before types can be fwd or bkwd referenced. - if (!isFunction) { - const std::vector &Plane = Table.getPlane(Type::TypeTyID); - assert(!Plane.empty() && "No types at all?"); - unsigned ValNo = Type::FirstDerivedTyID; // Start at the derived types... - outputConstantsInPlane(Plane, ValNo); // Write out the types - } - for (unsigned pno = 0; pno != NumPlanes; pno++) { const std::vector &Plane = Table.getPlane(pno); - if (!Plane.empty()) { // Skip empty type planes... + if (!Plane.empty()) { // Skip empty type planes... unsigned ValNo = 0; - if (isFunction) // Don't reemit module constants - ValNo = Table.getModuleLevel(pno); + if (isFunction) // Don't reemit module constants + ValNo += Table.getModuleLevel(pno); else if (pno == Type::TypeTyID) // If type plane wasn't written out above continue; + if (pno >= Type::FirstDerivedTyID) { + // Skip zero initializer + if (ValNo == 0) + ValNo = 1; + } + outputConstantsInPlane(Plane, ValNo); // Write out constants in the plane } } @@ -142,7 +155,7 @@ assert(Slot != -1 && "Module global vars is broken!"); // Fields: bit0 = isConstant, bit1 = hasInitializer, bit2=InternalLinkage, - // bit3+ = slot# + // bit3+ = Slot # for type unsigned oSlot = ((unsigned)Slot << 3) | (I->hasInternalLinkage() << 2) | (I->hasInitializer() << 1) | I->isConstant(); output_vbr(oSlot, Out); @@ -165,11 +178,10 @@ } output_vbr((unsigned)Table.getValSlot(Type::VoidTy), Out); - align32(Out); } -void BytecodeWriter::processMethod(const Function *F) { +void BytecodeWriter::outputFunction(const Function *F) { BytecodeBlock FunctionBlock(BytecodeFormat::Function, Out); output_vbr((unsigned)F->hasInternalLinkage(), Out); // Only output the constant pool and other goodies if needed... Index: llvm/lib/Bytecode/Writer/WriterInternals.h diff -u llvm/lib/Bytecode/Writer/WriterInternals.h:1.10 llvm/lib/Bytecode/Writer/WriterInternals.h:1.11 --- llvm/lib/Bytecode/Writer/WriterInternals.h:1.10 Sun Jul 14 18:05:53 2002 +++ llvm/lib/Bytecode/Writer/WriterInternals.h Wed Mar 19 14:56:46 2003 @@ -25,8 +25,8 @@ BytecodeWriter(std::deque &o, const Module *M); protected: - void outputConstants(bool isMethod); - void processMethod(const Function *F); + void outputConstants(bool isFunction); + void outputFunction(const Function *F); void processBasicBlock(const BasicBlock &BB); void processInstruction(const Instruction &I); From lattner at apoc.cs.uiuc.edu Wed Mar 19 14:58:00 2003 From: lattner at apoc.cs.uiuc.edu (Chris Lattner) Date: Wed Mar 19 14:58:00 2003 Subject: [llvm-commits] CVS: llvm/lib/VMCore/SlotCalculator.cpp Message-ID: <200303192057.h2JKvWV20713@apoc.cs.uiuc.edu> Changes in directory llvm/lib/VMCore: SlotCalculator.cpp updated: 1.25 -> 1.26 --- Log message: * Change the order that globals and constants are processed in * Add support for implicit zero initializers --- Diffs of the changes: Index: llvm/lib/VMCore/SlotCalculator.cpp diff -u llvm/lib/VMCore/SlotCalculator.cpp:1.25 llvm/lib/VMCore/SlotCalculator.cpp:1.26 --- llvm/lib/VMCore/SlotCalculator.cpp:1.25 Wed Nov 20 12:33:41 2002 +++ llvm/lib/VMCore/SlotCalculator.cpp Wed Mar 19 14:57:22 2003 @@ -21,7 +21,7 @@ #include #if 0 -#define SC_DEBUG(X) cerr << X +#define SC_DEBUG(X) std::cerr << X #else #define SC_DEBUG(X) #endif @@ -67,26 +67,26 @@ void SlotCalculator::processModule() { SC_DEBUG("begin processModule!\n"); - // Add all of the constants that the global variables might refer to first. + // Add all of the global variables to the value table... // for (Module::const_giterator I = TheModule->gbegin(), E = TheModule->gend(); I != E; ++I) - if (I->hasInitializer()) - insertValue(I->getInitializer()); - - // Add all of the global variables to the value table... - // - for(Module::const_giterator I = TheModule->gbegin(), E = TheModule->gend(); - I != E; ++I) insertValue(I); // Scavenge the types out of the functions, then add the functions themselves // to the value table... // - for(Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) insertValue(I); + // Add all of the module level constants used as initializers + // + for (Module::const_giterator I = TheModule->gbegin(), E = TheModule->gend(); + I != E; ++I) + if (I->hasInitializer()) + insertValue(I->getInitializer()); + // Insert constants that are named at module level into the slot pool so that // the module symbol table can refer to them... // @@ -141,8 +141,7 @@ SC_DEBUG("Inserting function constants:\n"; for (constant_iterator I = constant_begin(M), E = constant_end(M); I != E; ++I) { - cerr << " " << *I->getType() - << " " << *I << "\n"; + std::cerr << " " << *I->getType() << " " << *I << "\n"; }); // Emit all of the constants that are being used by the instructions in the @@ -164,8 +163,6 @@ // Iterate over basic blocks, adding them to the value table... for (Function::const_iterator I = M->begin(), E = M->end(); I != E; ++I) insertValue(I); - /* for_each(M->begin(), M->end(), - bind_obj(this, &SlotCalculator::insertValue));*/ SC_DEBUG("Inserting Instructions:\n"); @@ -230,21 +227,21 @@ } -int SlotCalculator::insertValue(const Value *D) { - if (isa(D) || isa(D)) { - const User *U = cast(D); - // This makes sure that if a constant has uses (for example an array - // of const ints), that they are inserted also. Same for global variable - // initializers. - // - for(User::const_op_iterator I = U->op_begin(), E = U->op_end(); I != E; ++I) - if (!isa(*I)) // Don't chain insert global values - insertValue(*I); - } - - int SlotNo = getValSlot(D); // Check to see if it's already in! +int SlotCalculator::insertValue(const Value *V) { + int SlotNo = getValSlot(V); // Check to see if it's already in! if (SlotNo != -1) return SlotNo; - return insertVal(D); + + if (!isa(V)) + if (const Constant *C = dyn_cast(V)) { + // This makes sure that if a constant has uses (for example an array of + // const ints), that they are inserted also. + // + for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); + I != E; ++I) + insertValue(*I); + } + + return insertVal(V); } @@ -260,7 +257,7 @@ if (D->getType() == Type::VoidTy || // Ignore void type nodes (IgnoreNamedNodes && // Ignore named and constants (D->hasName() || isa(D)) && !isa(D))) { - SC_DEBUG("ignored value " << D << "\n"); + SC_DEBUG("ignored value " << *D << "\n"); return -1; // We do need types unconditionally though } @@ -316,20 +313,32 @@ // cerr << "Inserting type '" << cast(D)->getDescription() << "'!\n"; if (Typ->isDerivedType()) { - int DefSlot = getValSlot(Typ); - if (DefSlot == -1) { // Have we already entered this type? + int ValSlot = getValSlot(Typ); + if (ValSlot == -1) { // Have we already entered this type? // Nope, this is the first we have seen the type, process it. - DefSlot = insertVal(Typ, true); - assert(DefSlot != -1 && "ProcessType returned -1 for a type?"); + ValSlot = insertVal(Typ, true); + assert(ValSlot != -1 && "ProcessType returned -1 for a type?"); } - Ty = (unsigned)DefSlot; + Ty = (unsigned)ValSlot; } else { Ty = Typ->getPrimitiveID(); } if (Table.size() <= Ty) // Make sure we have the type plane allocated... Table.resize(Ty+1, TypePlane()); - + + // If this is the first value to get inserted into the type plane, make sure + // to insert the implicit null value... + if (Table[Ty].empty() && Ty >= Type::FirstDerivedTyID && !IgnoreNamedNodes) { + Value *ZeroInitializer = Constant::getNullValue(Typ); + + // If we are pushing zeroinit, it will be handled below. + if (D != ZeroInitializer) { + Table[Ty].push_back(ZeroInitializer); + NodeMap[ZeroInitializer] = 0; + } + } + // Insert node into table and NodeMap... unsigned DestSlot = NodeMap[D] = Table[Ty].size(); Table[Ty].push_back(D); From jstanley at cs.uiuc.edu Thu Mar 20 08:42:01 2003 From: jstanley at cs.uiuc.edu (Joel Stanley) Date: Thu Mar 20 08:42:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/design.txt Message-ID: <200303201449.IAA11814@cypher.cs.uiuc.edu> Changes in directory llvm/lib/Reoptimizer/Inst: design.txt updated: 1.2 -> 1.3 --- Log message: --- Diffs of the changes: Index: llvm/lib/Reoptimizer/Inst/design.txt diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.2 llvm/lib/Reoptimizer/Inst/design.txt:1.3 --- llvm/lib/Reoptimizer/Inst/design.txt:1.2 Mon Mar 17 18:49:31 2003 +++ llvm/lib/Reoptimizer/Inst/design.txt Thu Mar 20 08:49:06 2003 @@ -1,4 +1,4 @@ -{{{ OVERALL GOALS OF PHASE 2 +{{{ OVERALL GOALS OF PHASE 2 AND GENERAL STUFF - identify all loads of global volatile variables and the corresponding stores to temporaries @@ -17,7 +17,6 @@ Optimization in the compiler is on. This means that we can't really rely on a particular "signature" of generated assembly code. - }}} {{{ Problems: @@ -59,9 +58,89 @@ analysis than we want to do (and potentially, not even then!), so we don't overwrite address arithmetic instructions, etc. - }}} + }}} + {{{ Inlined functions (not answered) + + How do we best deal with functions that are inlined by the black-box + compiler? In particular, the naive approach of recording (in the + global bookkeeping table or GBT) the names of instrumented functions + in phase 1 so that they can be looked up via the ELF symtable in + phase 2 doesn't work if the instrumented functions got inlined. We + could try a 2-step approach to finding the function bodies that + contain instrumentation points: 1) For each function name in the + GBT, look it up in the ELF symtab; if present, done, otherwise try + step 2. 2) Scan the entire program for load-volatile instructions, + obtain the address of those instructions, and then find out what + function body address interval contains that address. This approach + seems like a lot of work, but might just do it. + + The previous approach, which may or may not be a viable approach, + assumes that we can actually obtain the GBT contents. Our previous + plan for obtaining the GBT base address was to take its address in + the documentation function pre-compilation, then (post-compilation) + look up the documentation function (by name), parse the load + instruction (or however the GBT address was witnessed), and obtain + the GBT. However, this approach is flawed since the documentation + function might get removed (if it's dead) or inlined (if it's + called). Perhaps the address of the GBT should just be taken in a + non-dead way at the entry to main itself? A write of the GBT address + to a volatile global (yet another one!) should ensure that the copy + isn't removed. + + Another approach for finding the GBT base addr (since we're + operating exclusively on ELF) is to simply look it up in the ELF + symtab. This will work because the static structure contents won't + be able to be eliminated if the struct is global, since other + compilation units may refer to it directly using extern. However, + the linker itself may prevent it from being included in the final + executable if there are no references to it. Perhaps we can + introduce a benign use of the GBT (taking it address and storing the + result into a global volatile) simply to ensure that there is *a* + reference to the structure. E-mail Chris about this. + + {{{ Response from Chris + + > If I've got a global statically-initialized struct that isn't used + > anywhere within its compliation unit, any compiler wouldn't be able to + > remove it because some other compilation unit may refer to it in an extern + > manner, correct? + + True, unless it's declared static. + + > However, if the above is the case, *and* no other compilation unit refers + > to the struct in an extern manner, a clever linker would be able to delete + > the structure because nothing needed to bind to the symbol. Right? + + Yes, or simple IPO. + + > So...I need to introduce a global struct that can't be removed by the + > compiler or linker, without changing the semantics of the program. Then I + > need to read the contents of the struct directly from the ELF executable + > after looking up its name in the ELF symbol table. The way I'm currently + > planning on doing this is by inserting an un-removable & benign reference + > to the global struct in, say, main() so that a clever linker can't remove + > it. Does this sound lame? :) + + That should work. Note that normal linkers won't delete these structure + references, so it may not even be a problem unless you're trying to be more + portable... + + -Chris + + }}} + + KIS concession: Grab the base address of the GBT directly from the ELF symtab, + and worry about it getting deleted if/when that actually occurs. -}}} + }}} + {{{ Violation of register schedules (issue?) + + What about violation of register schedules when inserting new code? + Is this even an issue? + + }}} + +}}} {{{ Musings on trampolines: - Assuming that we can leave the address calculation in place for the @@ -149,8 +228,8 @@ instructions would be required to pack the register with the target address. -}}} -{{{ Trampoline-related ideas: + +Trampoline-related ideas: (Thanks Brian!) :) @@ -371,7 +450,44 @@ didn't use arbitrary instrumentation points, we can do *exactly* what MDL can do (we think) without source access. -Implementation sketch: +}}} + +{{{ MEETING MINUTES 20 Mar 2003 + +Agenda: + - Address pending issues already sent via e-mail. + - Confidence of approach, assurance of validity w.r.t time commitment. + - Inlining of functions and how to handle + + - Register schedule violation; or "how do we determine what registers should + hold values when insert code?". Rather, should we simply adopt the policy + of 'always spill' or is doing otherwise an optimization that should be + considered later? In particular, if we always spill, AND can't remove the + address arithmetic instructions (for volatile temps) without more robust + analysis, at what point do we consider phase 2 "too expensive" when compared + with plain old opaque function calls at instrumentation points? + + - From the e-mail(s): + + (a) We have to balance the benefit of a vendor-independent implementation + vs. the opportunity to do something "more conceptually novel" with the + metrics. + + (b) We can discuss instrumenting functions at function entry; of course, + this point is moot if we do not take the binary editing approach. + + - What is the purpose of "exit stubs" in Trigger/TraceCache? What is the + role of the branch map and call map? + + - As long as the new code fits within the 64KB segment, we have the + capability to add new code right? + +Minutes: + + +}}} + +{{{ IMPLEMENTATION SKETCH At a high level, in broad sweeping strokes, we're going to use the trace cache tool as a framework for runtime manipulation of the binary @@ -469,30 +585,222 @@ {{{ MILESTONES -- Extract and report bookkeeping data structure contents from raw -compiled binary. - -- Determine if/how the tracecache framework can be used for a CFG -subgraph "copy" to a new area of memory; determine whether or not it's -worth the effort or whether it should be "done from scratch". +- Perform the "tracecache experiment" described in the TODO section. }}} {{{ TODO +- Answer the following questions about the tracecache: + {{{ + + - To what extent does it use the LLVM bytecode and/or mapping information + to map a particular path into the cache? + + It appears that the code in the TraceCache object itself doesn't require + any of the LLVM mapping information. However, as inputs to addTrace(), it + does need a "call map", a "branch map", and a vector called + "exitStubs". I'm not clear what the exit stubs are for yet, exactly, nor + the precise role of the call/branch maps (although I think they are just + the redirected branch destinations or some such thing). The trigger + routine *does* use the LLVM mapping information to construct these maps, + so it may be difficult to determine how to form the maps without the + specific mapping information...but it might be possible. + + - What kind of modifications would be needed to map an entire function body + into the tracecache region such that "hot paths" weren't considered and + path activity wasn't tracked? What kind of dependence does this induce on + the LLVM mapping and/or bytecode representation? + + Good news: the "hot path" and LLVM specific stuff seems confined to the + trigger routine. The TraceCache class itself seems to operate on raw + instruction ranges, etc. NB: No mmaping of the executable is performed + because all of the contextual information about a particular function is + obtained via the LLVM mapping information. + + - Perform the following experiement to help answer these questions: + + Use the tracecache/BinInterface/VirtualMem/etc mechanisms as they + currently exist, together with te ELF library and phase 1, to do the + following: + + Insert a call to our phase2 function in main; the phase2 function will + be responsible for doing all of the binary analysis and + transformations. + + For using ELF mechanisms that we need to use, determine how the + tracecache is currently (if it is) mmap'ing the executable, and how to + direct the ELF library to use the executable image in memory instead + of loading it from disk. + + Given the name of a function that exists in the ELF object file, + obtain its starting and ending address _in the address space of the + running application_. + + ^^^ At this point, the application should be running and, at RUNTIME, + spit out (at the very least) the function boundary addresses; + preferably, it can spit out the BinInterface-obtained disassembly as + well so that we can compare it against the static disassembly. + + Copy this address region to the cache and reroute execution, + preferably modifying some code in the cache so that the rerouted + execution is apparent during execution. [This step is really the key + investigatory point: do we need to access the LLVM-bytecode CFG to do + this? Does the copy mechanism only support a copy of a specified path + into the cache, or will it operate on an arbitrary CFG/CFG subgraph?] + + }}} + - Read EEL paper to get a better feel for binary modification issues -- Do sample by hand and revisit actions of both phases -- Extract bookkeeping data structure contents, function stats/ends, - etc, using low-level POSIX/ELF mechanisms. }}} -{{{ PENDING QUESTIONS +{{{ BY-HAND EXAMPLE OF PHASE ACTIONS -[What about violation of register schedules when inserting new code? -Is this an issue?] + {{{ High-level code (i.e. no sigfuns): -}}} +pp_interval eth; + +void bar() { + int cnt = 0; + + { + sample eth; + while(cnt++ != 15) { + foo(); + printf(...); + } + } + ... + printf("avg reading was %f\n", pp_avg(eth)); +} + + }}} + {{{ Sigfun-level code (input to phase 1) + +void main() { + pp_interval("eth", elapsedTimeStart, elapsedTimeEnd, "bounded_series", "size=20"); +} + +[[The processing of pp_interval call in main() results in declaration: + double eth[20]; +which is used by name elsewhere... +]] + +void bar() { + int cnt = 0; + + { + pp_sigfun_interval_start("eth", elapsedTimeStart); + + while(cnt++ != 15) { + foo(); + printf(...); + } + + pp_sigfun_interval_end("eth", elapsedTimeEnd); + } + ... + printf("avg reading was %f\n", pp_avg(eth)); +} + + }}} + {{{ Post-phase1 code (quasi-high-level) + +struct GBT { + // fields for GBT go here... +} the_gbt = { initializer }; + +volatile global instSite1; // instSite1 = start of region +volatile global instSite1_tmp; +volatile global instSite2; // instSite2 = end of region +volatile global instSite2_tmp; + +double eth[20]; + +void bar() { + int cnt = 0; + double z; // <-- inserted for the ret val of end of region + // inst call (call inserted by phase 2) + + { + instSite1_tmp = instSite1; // <-- record the address of this instSite1; a + // load of this address identifies this location + // in the code; the code: + // double y = elapsedTimeStart() is to be + // inserted here by phase 2 [replacing ld] + // Was: pp_sigfun_interval_start("eth", elapsedTimeStart); + + while(cnt++ != 15) { + foo(); + printf(...); + } + + instSite2_tmp = instSite2; // <-- record the address of this instSite2; a + // load of this address identifies this location + // in this code; the code: + // z = elapsedTimeEnd(&y) is to be + // inserted by phase 2 [replacing ld] + // Was: pp_sigfun_interval_end("eth", elapsedTimeEnd); + + pp_series_add(eth, z); // inserted by phase 1, uses z even though it + // hasn't been written to. z and eth both exist. + } + ... + printf("avg reading was %f\n", pp_avg(eth)); +} + + }}} + {{{ Post-phase2 code (high level) + +struct GBT { + // fields for GBT go here... +} the_gbt = { initializer }; + +volatile global instSite1; // instSite1 = start of region +volatile global instSite1_tmp; +volatile global instSite2; // instSite2 = end of region +volatile global instSite2_tmp; + +double eth[20]; + +void bar() { + int cnt = 0; + double z; + + { + double y = elapsedTimeStart(); // <-- y must be alloca'd, ugh. + + while(cnt++ != 15) { + foo(); + printf(...); + } + + z = elapsedTimeEnd(&y); + pp_series_add(eth, z); + } + ... + printf("avg reading was %f\n", pp_avg(eth)); +} + + }}} +Aside from the obvious difficulties with phase 2 (find the load locations, etc), +an additional difficulty exists: we must alloca the temporary for the return +value of the first instrumentation function for a region. Originally, I thought +that this meant we'd need to have to (enough) reserved space at the entry of the +instrumented function to place 'n' alloca calls, etc: one for each temporary. +However, I believe that since our current approach allows essentially arbitrary +code to be inserted into the tracecache region (supposedly), we no longer have +this problem: invoke alloca immediately before the call. The only problem with +this is finding available registers to use, something which I don't understand +at all. If we must use a temporary that exists at the end of phase 1, that is +also a possibility, but then we've got to place un-removable uses of all of +those temporaries at the start of main (or something) so that they do not get +eliminated. This shouldn't be a problem though. The more general problem of +"register schedule violation potential", however, may still be a problem: +consider taking the address of the alloca'd temporary and passing it to the +end-region instrumentation function, for example. +}}} From lattner at cs.uiuc.edu Thu Mar 20 15:22:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Thu Mar 20 15:22:01 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Analysis/Dominators.h Message-ID: <200303202121.PAA03612@apoc.cs.uiuc.edu> Changes in directory llvm/include/llvm/Analysis: Dominators.h updated: 1.31 -> 1.32 --- Log message: Add more graph traits specializations for dominator tree nodes --- Diffs of the changes: Index: llvm/include/llvm/Analysis/Dominators.h diff -u llvm/include/llvm/Analysis/Dominators.h:1.31 llvm/include/llvm/Analysis/Dominators.h:1.32 --- llvm/include/llvm/Analysis/Dominators.h:1.31 Thu Feb 27 14:24:17 2003 +++ llvm/include/llvm/Analysis/Dominators.h Thu Mar 20 15:21:05 2003 @@ -347,18 +347,25 @@ // DominatorTree GraphTraits specialization so the DominatorTree can be // iterable by generic graph iterators. -template <> struct GraphTraits { +template <> struct GraphTraits { typedef DominatorTree::Node NodeType; typedef NodeType::iterator ChildIteratorType; - static NodeType *getEntryNode(DominatorTree *DT) { - return DT->getNode(DT->getRoot()); + static NodeType *getEntryNode(NodeType *N) { + return N; } static inline ChildIteratorType child_begin(NodeType* N) { return N->begin(); } static inline ChildIteratorType child_end(NodeType* N) { return N->end(); + } +}; + +template <> struct GraphTraits + : public GraphTraits { + static NodeType *getEntryNode(DominatorTree *DT) { + return DT->getNode(DT->getRoot()); } }; From lattner at cs.uiuc.edu Fri Mar 21 15:41:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri Mar 21 15:41:01 2003 Subject: [llvm-commits] CVS: llvm/include/Support/PostOrderIterator.h Message-ID: <200303212140.PAA22013@apoc.cs.uiuc.edu> Changes in directory llvm/include/Support: PostOrderIterator.h updated: 1.7 -> 1.8 --- Log message: Update comment --- Diffs of the changes: Index: llvm/include/Support/PostOrderIterator.h diff -u llvm/include/Support/PostOrderIterator.h:1.7 llvm/include/Support/PostOrderIterator.h:1.8 --- llvm/include/Support/PostOrderIterator.h:1.7 Sun Oct 27 20:11:16 2002 +++ llvm/include/Support/PostOrderIterator.h Fri Mar 21 15:40:39 2003 @@ -113,7 +113,7 @@ // // This class should be used like this: // { -// ReversePostOrderTraversal RPOT(MethodPtr); // Expensive to create +// ReversePostOrderTraversal RPOT(FuncPtr); // Expensive to create // for (rpo_iterator I = RPOT.begin(); I != RPOT.end(); ++I) { // ... // } From lattner at cs.uiuc.edu Fri Mar 21 15:42:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri Mar 21 15:42:01 2003 Subject: [llvm-commits] CVS: llvm/include/llvm/Pass.h Message-ID: <200303212141.PAA22031@apoc.cs.uiuc.edu> Changes in directory llvm/include/llvm: Pass.h updated: 1.30 -> 1.31 --- Log message: Add helper method --- Diffs of the changes: Index: llvm/include/llvm/Pass.h diff -u llvm/include/llvm/Pass.h:1.30 llvm/include/llvm/Pass.h:1.31 --- llvm/include/llvm/Pass.h:1.30 Wed Feb 26 13:10:28 2003 +++ llvm/include/llvm/Pass.h Fri Mar 21 15:40:53 2003 @@ -142,6 +142,14 @@ return dynamic_cast(Resolver->getAnalysisToUpdate(PI)); } + /// mustPreserveAnalysisID - This method serves the same function as + /// getAnalysisToUpdate, but works if you just have an AnalysisID. This + /// obviously cannot give you a properly typed instance of the class if you + /// don't have the class name available (use getAnalysisToUpdate if you do), + /// but it can tell you if you need to preserve the pass at least. + /// + bool mustPreserveAnalysisID(const PassInfo *AnalysisID) const; + /// getAnalysis() - This function is used by subclasses to get /// to the analysis information that they claim to use by overriding the /// getAnalysisUsage function. From lattner at cs.uiuc.edu Fri Mar 21 15:42:02 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri Mar 21 15:42:02 2003 Subject: [llvm-commits] CVS: llvm/lib/VMCore/Pass.cpp Message-ID: <200303212141.PAA22039@apoc.cs.uiuc.edu> Changes in directory llvm/lib/VMCore: Pass.cpp updated: 1.40 -> 1.41 --- Log message: Add helper method --- Diffs of the changes: Index: llvm/lib/VMCore/Pass.cpp diff -u llvm/lib/VMCore/Pass.cpp:1.40 llvm/lib/VMCore/Pass.cpp:1.41 --- llvm/lib/VMCore/Pass.cpp:1.40 Mon Oct 21 15:00:28 2002 +++ llvm/lib/VMCore/Pass.cpp Fri Mar 21 15:41:02 2003 @@ -142,6 +142,10 @@ PM->addPass(this, AU); } +bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const { + return Resolver->getAnalysisToUpdate(AnalysisID) != 0; +} + // dumpPassStructure - Implement the -debug-passes=Structure option void Pass::dumpPassStructure(unsigned Offset) { std::cerr << std::string(Offset*2, ' ') << getPassName() << "\n"; From lattner at cs.uiuc.edu Fri Mar 21 15:44:01 2003 From: lattner at cs.uiuc.edu (Chris Lattner) Date: Fri Mar 21 15:44:01 2003 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/BreakCriticalEdges.cpp Message-ID: <200303212143.PAA22095@apoc.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: BreakCriticalEdges.cpp (r1.8) removed --- Log message: Move BreakCriticalEdges pass to lib/Transforms/Utils --- Diffs of the changes: