Fixed problem Oscar found with instrumenting tail-call optimized
[dyninst.git] / dyninstAPI / src / inst-sparc-solaris.C
1 /*
2  * Copyright (c) 1996 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * This license is for research uses.  For such uses, there is no
12  * charge. We define "research use" to mean you may freely use it
13  * inside your organization for whatever purposes you see fit. But you
14  * may not re-distribute Paradyn or parts of Paradyn, in any form
15  * source or binary (including derivatives), electronic or otherwise,
16  * to any other organization or entity without our permission.
17  * 
18  * (for other uses, please contact us at paradyn@cs.wisc.edu)
19  * 
20  * All warranties, including without limitation, any warranty of
21  * merchantability or fitness for a particular purpose, are hereby
22  * excluded.
23  * 
24  * By your use of Paradyn, you understand and agree that we (or any
25  * other person or entity with proprietary rights in Paradyn) are
26  * under no obligation to provide either maintenance services,
27  * update services, notices of latent defects, or correction of
28  * defects for Paradyn.
29  * 
30  * Even if advised of the possibility of such damages, under no
31  * circumstances shall we (or any other person or entity with
32  * proprietary rights in the software licensed hereunder) be liable
33  * to you or any third party for direct, indirect, or consequential
34  * damages of any character regardless of type of action, including,
35  * without limitation, loss of profits, loss of use, loss of good
36  * will, or computer failure or malfunction.  You agree to indemnify
37  * us (and any other person or entity with proprietary rights in the
38  * software licensed hereunder) for any and all liability it may
39  * incur to third parties resulting from your use of Paradyn.
40  */
41
42 #include "dyninstAPI/src/inst-sparc.h"
43 #include "dyninstAPI/src/instPoint.h"
44 #include "util/h/debugOstream.h"
45
46 // Another constructor for the class instPoint. This one is called
47 // for the define the instPoints for regular functions which means
48 // multiple instructions is going to be moved to based trampoline.
49 // Since we will use the instruction CALL to branch to the base
50 // tramp(so it doesn't have any code size restriction), things are
51 // a little more complicated because instruction CALL changes the 
52 // value in the link register.
53 instPoint::instPoint(pd_Function *f, const instruction &instr, 
54                      const image *owner, Address &adr,
55                      bool delayOK,
56                      instPointType pointType)
57 : addr(adr), originalInstruction(instr), inDelaySlot(false), isDelayed(false),
58   callIndirect(false), callAggregate(false), callee(NULL), func(f),
59   ipType(pointType), image_ptr(owner), firstIsConditional(false),
60   relocated_(false), isLongJump(false)
61 {
62
63   isBranchOut = false;
64   size = 0;
65
66   // When the function has a stack frame
67   if (!this->hasNoStackFrame()) {
68
69       // we will treat the first instruction after the SAVE instruction
70       // in the nonleaf procedure as the function entry.  
71       if (ipType == functionEntry) {
72
73           assert(isInsnType(instr, SAVEmask, SAVEmatch));
74           saveInsn.raw = owner->get_instruction(addr);
75           addr += 4;
76           originalInstruction.raw = owner->get_instruction(addr);
77           delaySlotInsn.raw = owner->get_instruction(addr+4);
78           size += 2*sizeof(instruction);
79
80           // If the second instruction is DCTI, we need to move the
81           // the instruction in the delayed slot.
82           if (IS_DELAYED_INST(delaySlotInsn)) {
83               isDelayed = true; 
84               isDelayedInsn.raw = owner->get_instruction(addr+8);
85               size += 1*sizeof(instruction);
86
87               // Life is hard. If the second instruction is actually
88               // an CALL instruction, we need to move the instruction
89               // after the instruction in the delayed slot if the 
90               // return value of this function is a aggregate value.
91               aggregateInsn.raw = owner->get_instruction(addr+12);
92               if (isCallInsn(delaySlotInsn)) {
93                   if (!IS_VALID_INSN(aggregateInsn) && aggregateInsn.raw != 0) {
94                       callAggregate = true;
95                       size += 1*sizeof(instruction);
96                   }
97               }
98           }
99
100       // The following are easier.        
101       } else if (ipType == callSite) {
102           delaySlotInsn.raw = owner->get_instruction(addr+4);
103           size += 2*sizeof(instruction);
104
105           aggregateInsn.raw = owner->get_instruction(addr+8);
106           if (!IS_VALID_INSN(aggregateInsn) && aggregateInsn.raw != 0) {
107               callAggregate = true;
108               size += 1*sizeof(instruction);
109           }
110       } else {
111           delaySlotInsn.raw = owner->get_instruction(addr+4);
112           size += 2*sizeof(instruction);
113       }
114   }
115
116   // When the function is a leaf function
117   else {
118
119       // For the leaf procedure, there are no function calls in
120       // this procdure. So we don't need to consider the 
121       // aggregate instuction.
122       if (ipType == functionEntry) {
123
124           otherInstruction.raw = owner->get_instruction(addr+4);
125           delaySlotInsn.raw = owner->get_instruction(addr+8);
126           size += 2*sizeof(instruction);
127
128           if (IS_DELAYED_INST(delaySlotInsn)) {
129               isDelayed = true;
130               isDelayedInsn.raw = owner->get_instruction(addr+12);
131               size += 2*sizeof(instruction);
132           }
133
134       } else if (ipType == functionExit) {
135           
136           addr -= 4;
137
138           if (owner->isValidAddress(addr-4)) {
139               instruction iplus1;
140               iplus1.raw = owner->get_instruction(addr-4);
141               if (IS_DELAYED_INST(iplus1) && !delayOK) {
142                   addr -= 4;
143                   inDelaySlot = true;
144                   size += 1*sizeof(instruction);
145                   if(isCondBranch(iplus1)){
146                       instruction previous_inst; 
147                       previous_inst.raw = owner->get_instruction(addr-4);
148                       firstIsConditional = true;
149                       addr -= sizeof(instruction);
150                       size += 1*sizeof(instruction);
151                   }
152               }
153           }
154
155           originalInstruction.raw = owner->get_instruction(addr);
156           otherInstruction.raw = owner->get_instruction(addr+4);
157           delaySlotInsn.raw = owner->get_instruction(addr+8);
158           size += 3*sizeof(instruction);
159
160           if (inDelaySlot) {
161               inDelaySlotInsn.raw = owner->get_instruction(addr+12);
162               if(firstIsConditional) {
163                   extraInsn.raw = owner->get_instruction(addr+16);
164               }
165           }
166
167       } else {
168           assert(ipType == callSite);
169           // Usually, a function without a stack frame won't have any call sites
170           extern debug_ostream metric_cerr;
171           metric_cerr << "inst-sparc-solaris.C WARNING: found a leaf fn (no stack frame)" << endl;
172           metric_cerr << "which makes a function call" << endl;
173           metric_cerr << "This fn is " << func->prettyName() << endl;
174
175           //abort();
176       }
177   }
178
179   // return the address in the code segment after this instruction
180   // sequence. (there's a -1 here because one will be added up later in
181   // the function findInstPoints)  
182   adr = addr + (size - 1*sizeof(instruction));
183 }
184
185
186 void AstNode::sysFlag(instPoint *location)
187 {
188     if (location -> ipType == functionEntry) {
189         astFlag = (location -> isLongJump)? false:true; 
190     } else if (location -> ipType == functionExit) {
191        astFlag = location -> hasNoStackFrame(); // formerly "isLeaf()"
192     } else
193         astFlag = false;
194
195     if (loperand)
196         loperand->sysFlag(location);
197     if (roperand)
198         roperand->sysFlag(location); 
199
200     for (unsigned u = 0; u < operands.size(); u++) {
201         operands[u]->sysFlag(location);
202     }
203 }
204
205 // Determine if the called function is a "library" function or a "user" function
206 // This cannot be done until all of the functions have been seen, verified, and
207 // classified
208 //
209 void pd_Function::checkCallPoints() {
210   instPoint *p;
211   Address loc_addr;
212
213   //cerr << "pd_Function:: checkCallPoints called, *this = " << *this;
214
215   vector<instPoint*> non_lib;
216
217   for (unsigned i=0; i<calls.size(); ++i) {
218     /* check to see where we are calling */
219     p = calls[i];
220     assert(p);
221
222     if (isInsnType(p->originalInstruction, CALLmask, CALLmatch)) {
223       //cerr << " isIsinType TRUE" << endl;
224       // Direct call
225       loc_addr = p->addr + (p->originalInstruction.call.disp30 << 2);
226       pd_Function *pdf = (file_->exec())->findFunction(loc_addr);
227       if (pdf) {
228         p->callee = pdf;
229         non_lib += p;
230         //cerr << "  pdf (called func?) non-NULL = " << *pdf;
231       } else if(!pdf){
232            //cerr << "  pdf (called func) NULL" << endl;
233            // if this is a call outside the fuction, keep it
234            if((loc_addr < getAddress(0))||(loc_addr > (getAddress(0)+size()))){
235                 //cerr << "   apparent call outside function, adding p to non_lib" \
236                         << endl;
237                 p->callIndirect = true;
238                 p->callee = NULL;
239                 non_lib += p;
240            }
241            else {
242                //cerr << "   apparent call inside function, deleting p" << endl;
243                delete p;
244            }
245       } 
246     } else {
247       //cerr << " isIsinType FALSE, assuming call to unnamed user function" << endl;
248       // Indirect call -- be conservative, assume it is a call to 
249       // an unnamed user function
250       assert(!p->callee); assert(p->callIndirect);
251       p->callee = NULL;
252       non_lib += p;
253     }
254   }
255   calls = non_lib;
256 }
257
258 // TODO we cannot find the called function by address at this point in time
259 // because the called function may not have been seen.
260 // reloc_info is 0 if the function is not currently being relocated
261 Address pd_Function::newCallPoint(Address &adr, const instruction instr,
262                                  const image *owner, bool &err, 
263                                  int &callId, Address &oldAddr,
264                                  relocatedFuncInfo *reloc_info,
265                                  const instPoint *&location)
266 {
267     Address ret=adr;
268     instPoint *point;
269
270     err = true;
271     if (isTrap) {
272         point = new instPoint(this, instr, owner, adr, false, callSite, oldAddr);
273     } else {
274         point = new instPoint(this, instr, owner, adr, false, callSite);
275     }
276
277     if (!isInsnType(instr, CALLmask, CALLmatch)) {
278       point->callIndirect = true;
279       point->callee = NULL;
280     } else{
281       point->callIndirect = false;
282     }
283
284     if (isTrap) {
285         if (!reloc_info) {
286             calls += point;
287             calls[callId] -> instId = callId++;
288         } else {
289             // calls to a location within the function are not
290             // kept in the calls vector
291             assert(callId >= 0);
292             assert(((u_int)callId) < calls.size());
293             point->relocated_ = true;
294             // if the location was this call site, then change its value
295             if(location && (calls[callId] == location)) { 
296                 assert(calls[callId]->instId  == location->instId);
297                 location = point; 
298             } 
299             point->instId = callId++;
300             reloc_info->addFuncCall(point);
301         }
302     } else {
303         if (!reloc_info) {
304             calls += point;
305         }
306         else {
307             point->relocated_ = true;
308             reloc_info->addFuncCall(point);
309         }
310     }
311     err = false;
312     return ret;
313 }
314
315 /*
316  * Given and instruction, relocate it to a new address, patching up
317  *   any relative addressing that is present.
318  *
319  */
320 void relocateInstruction(instruction *insn, u_int origAddr, u_int targetAddr,
321                          process *proc)
322 {
323     int newOffset;
324
325     // If the instruction is a CALL instruction, calculate the new
326     // offset
327     if (isInsnType(*insn, CALLmask, CALLmatch)) {
328         newOffset = origAddr  - targetAddr + (insn->call.disp30 << 2);
329         insn->call.disp30 = newOffset >> 2;
330     } else if (isInsnType(*insn, BRNCHmask, BRNCHmatch)||
331                isInsnType(*insn, FBRNCHmask, FBRNCHmatch)) {
332
333         // If the instruction is a Branch instruction, calculate the 
334         // new offset. If the new offset is out of reach after the 
335         // instruction is moved to the base Trampoline, we would do
336         // the following:
337         //    b  address  ......    address: save
338         //                                   call new_offset             
339         //                                   restore 
340         newOffset = origAddr - targetAddr + (insn->branch.disp22 << 2);
341
342         // if the branch is too far, then allocate more space in inferior
343         // heap for a call instruction to branch target.  The base tramp 
344         // will branch to this new inferior heap code, which will call the
345         // target of the branch
346         if (!offsetWithinRangeOfBranchInsn(newOffset)) {
347 //      if (ABS(newOffset) > getMaxBranch1Insn()) {
348             int ret = inferiorMalloc(proc,3*sizeof(instruction), textHeap);
349             u_int old_offset = insn->branch.disp22 << 2;
350             insn->branch.disp22  = (ret - targetAddr)>>2;
351             instruction insnPlus[3];
352             genImmInsn(insnPlus, SAVEop3, REG_SP, -112, REG_SP);
353             generateCallInsn(insnPlus+1, ret+sizeof(instruction), 
354                              origAddr+old_offset);
355             genSimpleInsn(insnPlus+2, RESTOREop3, 0, 0, 0); 
356             proc->writeDataSpace((caddr_t)ret, sizeof(insnPlus), 
357                          (caddr_t) insnPlus);
358         } else {
359             insn->branch.disp22 = newOffset >> 2;
360         }
361     } else if (isInsnType(*insn, TRAPmask, TRAPmatch)) {
362         // There should be no probelm for moving trap instruction
363         // logLine("attempt to relocate trap\n");
364     } 
365     /* The rest of the instructions should be fine as is */
366 }
367
368 /*
369  * Install a base tramp -- fill calls with nop's for now.
370  *
371  * This one install the base tramp for the regular functions.
372  *
373  */
374 trampTemplate *installBaseTramp(instPoint *&location, process *proc)
375 {
376     unsigned baseAddr = inferiorMalloc(proc, baseTemplate.size, textHeap);
377
378     instruction *code = new instruction[baseTemplate.size];
379     assert(code);
380
381     memcpy((char *) code, (char*) baseTemplate.trampTemp, baseTemplate.size);
382
383     instruction *temp;
384     unsigned currAddr;
385     for (temp = code, currAddr = baseAddr; 
386         (currAddr - baseAddr) < (unsigned) baseTemplate.size;
387         temp++, currAddr += sizeof(instruction)) {
388
389         if (temp->raw == EMULATE_INSN) {
390
391             // Load the value of link register from stack 
392             // If no stack frame, genereate a RESTORE instruction
393             // since there's an instruction SAVE generated and put in the
394             // code segment.
395             if (location -> hasNoStackFrame()) {
396
397                 Address baseAddress = 0;
398                 proc->getBaseAddress(location->image_ptr,baseAddress);
399                 baseAddress += location -> addr;
400
401                 if (in1BranchInsnRange(baseAddress, baseAddr) == false) {
402                     //cerr << "This happen very rarely, I suppose "<< endl;
403                     //cerr << "Let's see if this is going to be executed..." << endl;
404                     location -> isLongJump = true;
405                     genImmInsn(temp, RESTOREop3, 0, 0, 0);
406                 } else {
407                     generateNOOP(temp);
408                 }
409                 temp++;
410                 currAddr += sizeof(instruction);
411             } 
412             // Same for the leaf and nonleaf functions.
413             // First, relocate the "FIRST instruction" in the sequence;  
414             Address fromAddr = location->addr;
415
416             if (!(location -> hasNoStackFrame())) {
417                 if (location -> ipType == functionEntry) {
418                     *temp = location -> saveInsn;
419                     temp++;
420                     currAddr += sizeof(instruction);
421                 }
422             }
423             *temp = location->originalInstruction;
424
425             // compute the real from address if this instrumentation
426             // point is from a shared object image
427             Address baseAddress = 0;
428             if(proc->getBaseAddress(location->image_ptr,baseAddress)){
429                 fromAddr += baseAddress;                
430             }
431
432             // If the instruction is a call instruction to a location somewhere 
433             // within the function, then the 07 regester must be saved and 
434             // resored around the relocated call from the base tramp...the call
435             // instruction changes the value of 07 to be the PC value, and if
436             // we move the call instruction to the base tramp, its value will
437             // be incorrect when we use it in the function.  We generate the
438             // following base tramp code:
439             //          original delay slot instruction 
440             //          save
441             //          original call instruction
442             //          restore
443             // This case should only occur for function entry points in
444             // functions from shared objects, and there should be no append
445             // trampolene code because the relocated call instruction will
446             // not return to the base tramp
447             if (isInsnType(*temp, CALLmask, CALLmatch)) {
448                 Address offset = fromAddr + (temp->call.disp30 << 2);
449                 if ((offset > (location->func->getAddress(0)+ baseAddress)) && 
450                     (offset < ((location->func->getAddress(0)+ baseAddress)+
451                                  location->func->size()))) {
452                     // offset > adr; "=" means recursive function which is allowed
453                     // offset < adr + size; "=" does not apply to this case
454
455                     // TODO: this assumes that the delay slot instruction is not
456                     // a call instruction....is this okay?
457                     
458                     // assume this situation only happens at function entry point 
459                     // for the shared library routine. And it is definately nees
460                     // long jump support
461                     assert(location -> ipType == functionEntry); 
462                     location -> isLongJump = true;
463                     
464                     // In this situation, save instruction is discarded
465                     // Rollback!! 
466                     assert(location->hasNoStackFrame() == false);
467                     temp--;
468                     currAddr -= sizeof(instruction);
469                     
470                     *temp = location->delaySlotInsn;  
471                     temp++; 
472                     currAddr += sizeof(instruction);
473                     genImmInsn(temp, SAVEop3, REG_SP, -112, REG_SP); 
474                     temp++; 
475                     currAddr += sizeof(instruction);  
476                     *temp = location->originalInstruction;
477                     relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
478                     temp++; 
479                     fromAddr += sizeof(instruction); 
480                     currAddr += sizeof(instruction);
481                     genImmInsn(temp, RESTOREop3, 0, 0, 0);
482                     continue;
483                 }
484             }   
485
486             relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
487
488             // Again, for leaf function, one more is needed to move for one
489             // more spot;
490             if (location->hasNoStackFrame()) {
491                 // check to see if the otherInstruction is a call instruction
492                 // to itself, if so then generate the following
493                 // before               after           basetramp
494                 // ------               -----           ---------
495                 // mov   originalInsn   mov             sethi
496                 // call  otherInsn      call            save
497                 // sethi delaySlot      nop             call
498                 //                                      restore
499                 // the idea is to not really relocate the originalInsn, and
500                 // relocate only the call otherInsn and delaySlot instrn
501                 // then do a save and restore around the relocated call to
502                 // save the value of the o7 register from the call to base tramp
503                 if (isInsnType(location->otherInstruction,CALLmask,CALLmatch)) {
504                   *temp = location->otherInstruction;
505                   fromAddr += sizeof(instruction);
506                   Address offset = fromAddr + (temp->call.disp30 << 2);
507                   if ((offset > (location->func->getAddress(0)+baseAddress)) && 
508                     (offset < ((location->func->getAddress(0)+ baseAddress)+
509                                  location->func->size()))) {
510                        location -> isLongJump = true;
511                        // need to replace retore instr with nop 
512                        temp--;
513                        generateNOOP(temp);
514                        // relocate delaySlot instr
515                        temp++;
516                        *temp = location->delaySlotInsn;
517                        fromAddr += sizeof(instruction);
518                        relocateInstruction(temp,fromAddr,currAddr, 
519                                           (process *)proc);
520                        temp++; 
521                        currAddr += sizeof(instruction);
522                        genImmInsn(temp, SAVEop3, REG_SP, -112, REG_SP); 
523
524                        // relocate the call instruction     
525                        temp++; 
526                        currAddr += sizeof(instruction);
527                        fromAddr -= sizeof(instruction);
528                        *temp = location->otherInstruction;
529                        relocateInstruction(temp,fromAddr,currAddr, 
530                                            (process *)proc);
531                        temp++; 
532                        fromAddr += sizeof(instruction); 
533                        currAddr += sizeof(instruction);
534                        genImmInsn(temp, RESTOREop3, 0, 0, 0);
535                        continue;
536                   }
537                 }
538
539                 // otherwise relocate the other instruction
540                 fromAddr += sizeof(instruction);
541                 currAddr += sizeof(instruction);
542                 *++temp = location->otherInstruction;
543                 relocateInstruction(temp, fromAddr, currAddr, 
544                                     (process *)proc);
545             }     
546             
547             // Second, relocate the "NEXT instruction";
548             fromAddr += sizeof(instruction);
549             currAddr += sizeof(instruction);
550             *++temp = location->delaySlotInsn;
551  
552             // if the NEXT instruction is a call instruction to a location
553             // within the function, then the 07 regester must be saved and 
554             // resored around the relocated call from the base tramp...the call
555             // instruction changes the value of 07 to be the PC value, and if
556             // we move the call instruction to the base tramp, its value will
557             // be incorrect when we use it in the function.  We generate:
558             //
559             //  orignial            relocated to base tramp
560             //  --------            -----------------------
561             //  save                nop  // SAVE added above, replace w/nop 
562             //  original insn       original instruction // already relocated
563             //  delaySlotInsn       isDelayedInsn
564             //  isDelayedInsn       save
565             //                      delaySlotInsn  (call with offset - 4)
566             //                      restore
567             //  In the function, the call to the base tramp will have an
568             //  additional add instruction to adjust the 07 register
569             //  orignial            relocated to base tramp
570             //  --------            -----------------------
571             //  save                 save       
572             //  mov                  call
573             //  call                 nop
574             //  sethi                add $o7 4   
575             //
576             if (isInsnType(*temp, CALLmask, CALLmatch)) {
577                 Address offset = fromAddr + (temp->call.disp30 << 2);
578                 if ((offset > (location->func->getAddress(0)+ baseAddress)) && 
579                     (offset < ((location->func->getAddress(0)+ baseAddress)+
580                                  location->func->size()))) {
581                     
582                     temp--;
583                     temp--;
584                     generateNOOP(temp);  
585                     temp++;
586                     temp++;
587                     location->isLongJump = true;
588                     // assert(location->hasNoStackFrame() == false);
589                     // assume that this is not a delayed instr.
590                     *temp = location->isDelayedInsn;  
591                     temp++; 
592                     currAddr += sizeof(instruction);
593                     genImmInsn(temp, SAVEop3, REG_SP, -112, REG_SP); 
594                     temp++; 
595                     currAddr += sizeof(instruction);  
596                     *temp = location->delaySlotInsn;
597                     Address new_call_addr = fromAddr - sizeof(instruction);
598                     relocateInstruction(temp,new_call_addr,currAddr,proc);
599                     temp++; 
600                     fromAddr += sizeof(instruction); 
601                     currAddr += sizeof(instruction);
602                     genImmInsn(temp, RESTOREop3, 0, 0, 0);
603                     continue;
604                 }
605             }   
606
607             // otherwise relocate the NEXT instruction
608             relocateInstruction(temp, fromAddr, currAddr,
609                                 (process *)proc);
610             
611             // Third, if the "NEXT instruction" is a DCTI, 
612             if (location->isDelayed) {
613                 fromAddr += sizeof(instruction);
614                 currAddr += sizeof(instruction);
615                 *++temp = location->isDelayedInsn;
616                 relocateInstruction(temp, fromAddr, currAddr,
617                                     (process *)proc);
618                 
619                 // Then, possibly, there's an callAggregate instruction
620                 // after this. 
621                 if (location->callAggregate) {
622                     currAddr += sizeof(instruction);
623                     *++temp = location->aggregateInsn;
624                     continue;
625                 }       
626             }
627             
628             // If the "FIRST instruction" is a DCTI, then our so called 
629             // "NEXT instruction" is in the delayed Slot and this might
630             // happen. (actullay, it happened)
631             if (location->callAggregate) {
632                 currAddr += sizeof(instruction);
633                 *++temp = location->aggregateInsn;
634                 continue;
635             }   
636             
637             // For the leaf function, if there's an inDelaySlot instruction,
638             // move this one to the base Tramp.(i.e. at the function exit,
639             // if the first instruction is in the delayed slot the previous
640             // instruction, we have to move that one too, so we count from 
641             // that one and the last one is this sequence is called inDelaySlot
642             // instruction.)
643             // Well, after all these, another SAVE instruction is generated
644             // so we are prepared to handle the returning to our application's
645             // code segment. 
646             if (location->hasNoStackFrame()) {
647                 if (location->inDelaySlot) {
648                     fromAddr += sizeof(instruction);
649                     currAddr += sizeof(instruction);
650                     *++temp = location->inDelaySlotInsn;
651                     relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
652                     if(location->firstIsConditional){
653                         fromAddr += sizeof(instruction);
654                         currAddr += sizeof(instruction);
655                         *++temp = location->extraInsn;
656                         relocateInstruction(temp, fromAddr, currAddr, proc);
657                     }
658                 } 
659                 
660                 genImmInsn(temp+1, SAVEop3, REG_SP, -112, REG_SP);
661             }
662             
663         } else if (temp->raw == RETURN_INSN) {
664             // compute the real from address if this instrumentation
665             // point is from a shared object image
666             Address baseAddress = 0;
667             if(proc->getBaseAddress(location->image_ptr,baseAddress)){
668             }
669             // Back to the code segement of the application.
670             // If the location is in the leaf procedure, generate an RESTORE
671             // instruction right after the CALL instruction to restore all
672             // the values in the registers.
673             if (location -> hasNoStackFrame()) {
674                 generateCallInsn(temp, currAddr, 
675                                 (baseAddress + location->addr)+location->size);
676                 genImmInsn(temp+1, RESTOREop3, 0, 0, 0);
677             } else {
678                 generateCallInsn(temp, currAddr, 
679                                 (baseAddress + location->addr)+location->size);
680             }
681         } else if (temp->raw == SKIP_PRE_INSN) {
682             unsigned offset;
683             offset = baseAddr+baseTemplate.updateCostOffset-currAddr;
684             generateBranchInsn(temp,offset);
685
686         } else if (temp->raw == SKIP_POST_INSN) {
687             unsigned offset;
688             offset = baseAddr+baseTemplate.returnInsOffset-currAddr;
689             generateBranchInsn(temp,offset);
690
691         } else if (temp->raw == UPDATE_COST_INSN) {
692             
693             baseTemplate.costAddr = currAddr;
694             generateNOOP(temp);
695         } else if ((temp->raw == LOCAL_PRE_BRANCH) ||
696                    (temp->raw == GLOBAL_PRE_BRANCH) ||
697                    (temp->raw == LOCAL_POST_BRANCH) ||
698                    (temp->raw == GLOBAL_POST_BRANCH)) {
699 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
700             if ((temp->raw == LOCAL_PRE_BRANCH) ||
701                 (temp->raw == LOCAL_POST_BRANCH)) {
702                 temp -= NUM_INSN_MT_PREAMBLE;
703                 unsigned numIns=0;
704                 generateMTpreamble((char *)temp, numIns, proc);
705                 temp += NUM_INSN_MT_PREAMBLE;
706             }
707 #endif
708             /* fill with no-op */
709             generateNOOP(temp);
710         }
711     }
712     // TODO cast
713     proc->writeDataSpace((caddr_t)baseAddr, baseTemplate.size,(caddr_t) code);
714
715     delete [] code;
716
717     trampTemplate *baseInst = new trampTemplate;
718     *baseInst = baseTemplate;
719     baseInst->baseAddr = baseAddr;
720     return baseInst;
721 }
722
723 /*
724  * Install the base Tramp for the function relocated.
725  * (it means the base tramp that don't need to bother with long jump and
726  *  is the one we used before for all the functions(since there's no
727  *  long jumps)
728  *  for system calls
729  */ 
730 trampTemplate *installBaseTrampSpecial(const instPoint *&location,
731                                        process *proc,
732                                        vector<instruction> &extra_instrs) 
733 {
734     unsigned currAddr;
735     instruction *code;
736     instruction *temp;
737
738     unsigned baseAddr = inferiorMalloc(proc, baseTemplate.size, textHeap);
739
740     if(!(location->func->isInstalled(proc))) {
741         location->func->relocateFunction(proc,location,extra_instrs);
742     }
743     else if(!location->relocated_){
744         // need to find new instPoint for location...it has the pre-relocated
745         // address of the instPoint
746         location->func->modifyInstPoint(location,proc);      
747     }
748
749     code = new instruction[baseTemplate.size];
750     memcpy((char *) code, (char*) baseTemplate.trampTemp, baseTemplate.size);
751
752     for (temp = code, currAddr = baseAddr; 
753         (currAddr - baseAddr) < (unsigned) baseTemplate.size;
754         temp++, currAddr += sizeof(instruction)) {
755
756         if (temp->raw == EMULATE_INSN) {
757             if (location->isBranchOut) {
758                 // the original instruction is a branch that goes out of a 
759                 // function.  We don't relocate the original instruction. We 
760                 // only get to the tramp if the branch is taken, so we generate
761                 // an unconditional branch to the target of the original 
762                 // instruction here 
763                 assert(location->branchTarget);
764                 int disp = location->branchTarget - currAddr;
765
766                 if (in1BranchInsnRange(currAddr,location->branchTarget)) {
767                   generateBranchInsn(temp, disp);
768                   disp = temp->branch.disp22;
769                 } else {
770                   generateCallInsn(temp, currAddr, disp);
771                 }
772                 continue;
773             }
774             else {
775                 *temp = location->originalInstruction;
776                 Address fromAddress = location->addr;
777                 relocateInstruction(temp, fromAddress, currAddr, proc);
778                 if (location->isDelayed) {
779                     /* copy delay slot instruction into tramp instance */
780                     currAddr += sizeof(instruction);  
781                     *++temp = location->delaySlotInsn;
782                 }
783                 if (location->callAggregate) {
784                     /* copy invalid insn with aggregate size in it */
785                     currAddr += sizeof(instruction);  
786                     *++temp = location->aggregateInsn;
787                 }
788             }
789         } else if (temp->raw == RETURN_INSN) {
790             generateBranchInsn(temp, 
791                 (location->addr+ sizeof(instruction) - currAddr));
792             if (location->isDelayed) {
793                 /* skip the delay slot instruction */
794                 temp->branch.disp22 += 1;
795             }
796             if (location->callAggregate) {
797                 /* skip the aggregate size slot */
798                 temp->branch.disp22 += 1;
799             }
800         } else if (temp->raw == SKIP_PRE_INSN) {
801           unsigned offset;
802           offset = baseAddr+baseTemplate.updateCostOffset-currAddr;
803           generateBranchInsn(temp,offset);
804         } else if (temp->raw == SKIP_POST_INSN) {
805           unsigned offset;
806           offset = baseAddr+baseTemplate.returnInsOffset-currAddr;
807           generateBranchInsn(temp,offset);
808         } else if (temp->raw == UPDATE_COST_INSN) {
809             
810             baseTemplate.costAddr = currAddr;
811             generateNOOP(temp);
812         } else if ((temp->raw == LOCAL_PRE_BRANCH) ||
813                    (temp->raw == GLOBAL_PRE_BRANCH) ||
814                    (temp->raw == LOCAL_POST_BRANCH) ||
815                    (temp->raw == GLOBAL_POST_BRANCH)) {
816 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
817             if ((temp->raw == LOCAL_PRE_BRANCH) ||
818                 (temp->raw == LOCAL_POST_BRANCH)) {
819                 temp -= NUM_INSN_MT_PREAMBLE;
820                 unsigned numIns=0;
821                 generateMTpreamble((char *)temp, numIns, proc);
822                 temp += NUM_INSN_MT_PREAMBLE;
823             }
824 #endif
825             /* fill with no-op */
826             generateNOOP(temp);
827         }
828     }
829     // TODO cast
830     proc->writeDataSpace((caddr_t)baseAddr, baseTemplate.size,(caddr_t) code);
831
832     delete [] code;
833
834     trampTemplate *baseInst = new trampTemplate;
835     *baseInst = baseTemplate;
836     baseInst->baseAddr = baseAddr;
837     return baseInst;
838 }
839
840 /*
841  * Allocate the space for the base Trampoline, and generate the instruction
842  * we need for modifying the code segment.
843  *
844  * 'retInstance' tells you how to modify the code to jump to the base tramp
845  *
846  */
847 trampTemplate *findAndInstallBaseTramp(process *proc, 
848                                  instPoint *&location,
849                                  returnInstance *&retInstance,
850                                  bool)
851 {
852     Address adr = location->addr;
853     retInstance = NULL;
854
855     
856     trampTemplate *ret;
857     if (proc->baseMap.find((const instPoint *)location, ret)) // writes to ret if found
858        // This base tramp already exists; nothing to do.
859        return ret;
860
861     if (location->func->isTrapFunc()) {
862        // get the base Address of this function if it is a 
863        // shared object
864        Address baseAddress = 0;
865        if(!proc->getBaseAddress(location->image_ptr,baseAddress)){
866           // TODO: what should be done here?    
867           logLine("Error:findAndInstallBaseTramp call getBaseAddress\n"); 
868        }
869        // Install Base Tramp for the functions which are 
870        // relocated to the heap.
871        vector<instruction> extra_instrs;
872
873        ret = installBaseTrampSpecial(location, proc,extra_instrs);
874
875        // add a branch from relocated function to the base tramp
876        // if function was just relocated then location has old address
877        // otherwise location will have address in already relocated func
878        if (!location->func->isInstalled(proc)){
879           if (location->isBranchOut){
880              changeBranch(proc, location->addr, 
881                           (int) ret->baseAddr, location->originalInstruction);
882            } else {
883              generateBranch(proc, location->addr, (int)ret->baseAddr);
884            }
885        }
886        else {  // location's address is correct...it is in the heap
887           if (location->isBranchOut){
888              changeBranch(proc, location->addr, 
889                           (int) ret->baseAddr, location->originalInstruction);
890           } else {
891              generateBranch(proc, location->addr, (int)ret->baseAddr);
892           }
893        }
894
895        // If for this process, a call to the relocated function has not
896        // yet be installed in its original location, then genterate the
897        // following instructions at the begining of the function:
898        //   SAVE;             CALL;         RESTORE.
899        // so that it would jump the start of the relocated function
900        // which is in heap.
901        if (!location->func->isInstalled(proc)){
902           location->func->setInstalled(proc);
903           u_int e_size = extra_instrs.size();
904           instruction *insn = new instruction[3 + e_size];
905           Address adr = location-> func -> getAddress(0);
906           genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
907           generateCallInsn(insn+1, adr+baseAddress+4, 
908                            location->func->getAddress(proc));
909           genSimpleInsn(insn+2, RESTOREop3, 0, 0, 0); 
910           for(u_int i=0; i < e_size; i++){
911              insn[3+i] = extra_instrs[i];
912           }
913           retInstance = new returnInstance((instructUnion *)insn, 
914                                            (3+e_size)*sizeof(instruction), 
915                                            adr+baseAddress, 
916                                            location->func->size());
917           assert(retInstance);
918
919           //cerr << "created a new return instance (relocated fn)!" << endl;
920        }
921     } else {
922        // It's not a trap-function; it's a "normal" function
923        // compute the real from address if this instrumentation
924        // point is from a shared object image
925        Address baseAddress = 0;
926        if (proc->getBaseAddress(location->image_ptr,baseAddress)){
927           adr += baseAddress;           
928        }
929
930        ret = installBaseTramp(location, proc);
931        // check to see if this is an entry point and if the delay 
932        // slot instruction is a call insn, if so, then if the 
933        // call is to a location within the function, then we need to 
934        // add an extra instruction after the restore to correctly
935        // set the o7 register
936        bool need_to_add = false;
937        if (location->ipType==functionEntry &&
938            isInsnType(location->delaySlotInsn,CALLmask,CALLmatch)) {
939           Address call_offset = location->addr + 8 + 
940                                 (location->delaySlotInsn.call.disp30<<2);
941           Address fun_addr = location->func->getAddress(0);
942           u_int fun_size = location->func->size();
943           if (call_offset>fun_addr && call_offset<(fun_addr+fun_size)) {
944              assert(location->isLongJump);
945              need_to_add = true;
946           }
947        }        
948
949        if (location->hasNoStackFrame()) {
950           // if it is the leaf function, we need to generate
951           // the following instruction sequence:
952           //     SAVE;      CALL;      NOP.
953
954           if (location -> isLongJump == false) {
955              instruction *insn = new instruction;
956              generateBranchInsn(insn, (int)(ret->baseAddr-adr));
957              retInstance = new returnInstance((instructUnion *)insn,
958                                               sizeof(instruction), adr, 
959                                               sizeof(instruction));
960           } else if (need_to_add) {
961              // generate  original; call; add $o7 imm4 
962              instruction *insn = new instruction[2];
963              generateCallInsn(insn, adr+4, (int) ret->baseAddr);
964              genImmInsn(insn+1,ADDop3,REG_O7,4,REG_O7);
965              retInstance = new returnInstance((instructUnion *)insn,
966                                  2*sizeof(instruction), adr+4,
967                                  2*sizeof(instruction));
968           } else {
969             bool already_done = false; 
970             // check to see if the otherInstruction is a call instruction
971             // to itself, if so then generate the following
972             // before                   after           basetramp
973             // ------                   -----           ---------
974             // mov     originalInsn     mov             sethi
975             // call    otherInsn        call            save
976             // sethi   delaySlot        nop             call
977             //                                          restore
978             // only generate a call and nop...leave the originalInsn
979             //
980             if (isInsnType(location->otherInstruction, CALLmask, CALLmatch)) {
981               Address offset = location-> func -> getAddress(0)+4 + 
982                                (location->otherInstruction.call.disp30 << 2);
983               if ((offset > (location->func->getAddress(0))) && 
984                   (offset < ((location->func->getAddress(0))+
985                          location->func->size()))) {
986                      instruction *insn = new instruction[2];
987                      generateCallInsn(insn, adr+4, (int) ret->baseAddr);
988                      generateNOOP(insn+1);
989                      retInstance = new returnInstance((instructUnion *)insn, 
990                                              2*sizeof(instruction), adr+4, 
991                                              2*sizeof(instruction));
992
993                      already_done = true;
994                  }
995              }
996
997              if(!already_done) {
998                  instruction *insn = new instruction[3];
999                  genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
1000                  generateCallInsn(insn+1, adr+4, (int) ret->baseAddr);
1001                  generateNOOP(insn+2);
1002                  retInstance = new returnInstance((instructUnion *)insn, 
1003                                              3*sizeof(instruction), adr, 
1004                                              3*sizeof(instruction));
1005              }  
1006           }
1007                 
1008           assert(retInstance);
1009        } else {
1010           // It's not a leaf.
1011           // Generate branch instruction from the application to the
1012           // base trampoline and no SAVE instruction is needed
1013                 
1014           if (in1BranchInsnRange(adr, ret->baseAddr)) {
1015             // make sure that the isLongJump won't be true
1016             // which only is possible for shlib entry point 
1017             //assert(location->isLongJump == false);
1018             if (location->isLongJump) {
1019               instruction *insn = new instruction[2];   
1020               generateCallInsn(insn, adr, (int) ret->baseAddr);
1021               assert(location->ipType == functionEntry);
1022               generateNOOP(insn+1);
1023               retInstance = new returnInstance((instructUnion *)insn, 
1024                                               2*sizeof(instruction), adr, 
1025                                               2*sizeof(instruction));
1026               assert(retInstance);
1027             } else {
1028               instruction *insn = new instruction;
1029               if (location -> ipType == functionEntry) {
1030                   generateBranchInsn(insn, (int)(ret->baseAddr-adr+sizeof(instruction))); 
1031                   retInstance = new returnInstance((instructUnion *)insn,
1032                                                    sizeof(instruction), 
1033                                                    adr - sizeof(instruction), 
1034                                                    sizeof(instruction));
1035               } else {
1036                   generateBranchInsn(insn,(int)(ret->baseAddr-adr));
1037                   retInstance = new returnInstance((instructUnion *)insn,
1038                                                    sizeof(instruction), 
1039                                                    adr, 
1040                                                    sizeof(instruction));
1041               }
1042             }
1043           } else if(need_to_add) {
1044              // the delay slot instruction is is a call to a location
1045              // within the same function, then need to generate 3 instrs
1046              //    call
1047              //    nop          // delay slot (originally call insn)
1048              //    add o7 imm4  // sets o7 register to correct value
1049              instruction *insn = new instruction[3];    
1050              generateCallInsn(insn, adr, (int) ret->baseAddr);
1051              generateNOOP(insn+1);
1052              genImmInsn(insn+2,ADDop3,REG_O7,4,REG_O7);
1053              retInstance = new returnInstance((instructUnion *)insn, 
1054                                               3*sizeof(instruction), adr, 
1055                                               3*sizeof(instruction));
1056           } else {
1057              instruction *insn = new instruction[2];    
1058              generateCallInsn(insn, adr, (int) ret->baseAddr);
1059              if (location -> ipType == functionEntry) {
1060                 if (location -> isLongJump)
1061                    generateNOOP(insn+1);
1062                 else
1063                    genSimpleInsn(insn+1, RESTOREop3, 0, 0, 0);
1064              } else
1065                 generateNOOP(insn+1);
1066
1067              retInstance = new returnInstance((instructUnion *)insn, 
1068                                               2*sizeof(instruction), adr, 
1069                                               2*sizeof(instruction));
1070              assert(retInstance);
1071           }
1072        }
1073     }
1074
1075     proc->baseMap[(const instPoint *)location] = ret;
1076         
1077     return(ret);
1078        // remember, ret was the result of either installBaseTramp() or
1079        // installBaseTrampSpecial()
1080 }
1081
1082 /*
1083  * Install a single tramp.
1084  *
1085  */
1086 void installTramp(instInstance *inst, char *code, int codeSize) 
1087 {
1088     totalMiniTramps++;
1089     insnGenerated += codeSize/sizeof(int);
1090     
1091     // TODO cast
1092     (inst->proc)->writeDataSpace((caddr_t)inst->trampBase, codeSize, code);
1093
1094     unsigned atAddr;
1095     if (inst->when == callPreInsn) {
1096         if (inst->baseInstance->prevInstru == false) {
1097             atAddr = inst->baseInstance->baseAddr+baseTemplate.skipPreInsOffset;
1098             inst->baseInstance->cost += inst->baseInstance->prevBaseCost;
1099             inst->baseInstance->prevInstru = true;
1100             generateNoOp(inst->proc, atAddr);
1101         }
1102     } else {
1103         if (inst->baseInstance->postInstru == false) {
1104             atAddr = inst->baseInstance->baseAddr+baseTemplate.skipPostInsOffset; 
1105             inst->baseInstance->cost += inst->baseInstance->postBaseCost;
1106             inst->baseInstance->postInstru = true;
1107             generateNoOp(inst->proc, atAddr);
1108         }
1109     }
1110 }
1111
1112
1113 unsigned emitFuncCall(opCode op, 
1114                       registerSpace *rs,
1115                       char *i, unsigned &base, 
1116                       const vector<AstNode *> &operands, 
1117                       const string &callee, process *proc,
1118                       bool noCost)
1119 {
1120         assert(op == callOp);
1121         unsigned addr;
1122         bool err;
1123         vector <reg> srcs;
1124         void cleanUpAndExit(int status);
1125
1126         addr = proc->findInternalAddress(callee, false, err);
1127
1128         if (err) {
1129             function_base *func = proc->findOneFunction(callee);
1130             if (!func) {
1131                   ostrstream os(errorLine, 1024, ios::out);
1132                   os << "Internal error: unable to find addr of " << callee << endl;
1133                   showErrorCallback(80, (const char *) errorLine);
1134                   P_abort();
1135             }
1136             // TODO: is this correct or should we get relocated address?
1137             addr = func->getAddress(0);
1138         }
1139         
1140         for (unsigned u = 0; u < operands.size(); u++)
1141             srcs += operands[u]->generateCode(proc, rs, i, base, noCost);
1142
1143         // TODO cast
1144         instruction *insn = (instruction *) ((void*)&i[base]);
1145
1146         for (unsigned u=0; u<srcs.size(); u++){
1147             if (u >= 5) {
1148                  string msg = "Too many arguments to function call in instrumentation code: only 5 arguments can be passed on the sparc architecture.\n";
1149                  fprintf(stderr, msg.string_of());
1150                  showErrorCallback(94,msg);
1151                  cleanUpAndExit(-1);
1152             }
1153             genSimpleInsn(insn, ORop3, 0, srcs[u], u+8); insn++;
1154             base += sizeof(instruction);
1155             rs->freeRegister(srcs[u]);
1156         }
1157
1158         // As Ling pointed out to me, the following is rather inefficient.  It does:
1159         //   sethi %hi(addr), %o5
1160         //   jmpl %o5 + %lo(addr), %o7   ('call' pseudo-instr)
1161         //   nop
1162         // We can do better:
1163         //   call <addr>    (but note that the call true-instr is pc-relative jump)
1164         //   nop
1165         generateSetHi(insn, addr, 13); insn++;
1166         genImmInsn(insn, JMPLop3, 13, LOW10(addr), 15); insn++;
1167         generateNOOP(insn);
1168
1169         base += 3 * sizeof(instruction);
1170
1171         // return value is the register with the return value from the
1172         //   function.
1173         // This needs to be %o0 since it is back in the callers scope.
1174         return(8);
1175 }
1176  
1177 unsigned emit(opCode op, reg src1, reg src2, reg dest, char *i, unsigned &base,
1178               bool noCost)
1179 {
1180     // TODO cast
1181     instruction *insn = (instruction *) ((void*)&i[base]);
1182
1183     if (op == loadConstOp) {
1184       // dest = src1:imm    TODO
1185       if (src1 > MAX_IMM13 || src1 < MIN_IMM13) {
1186             // src1 is out of range of imm13, so we need an extra instruction
1187             generateSetHi(insn, src1, dest);
1188             base += sizeof(instruction);
1189             insn++;
1190
1191             // or regd,imm,regd
1192
1193             // Chance for optimization: we should check for LOW10(src1)==0, and
1194             // if so, don't generate the following bitwise-or instruction, since
1195             // in that case nothing would be done.
1196
1197             genImmInsn(insn, ORop3, dest, LOW10(src1), dest);
1198             base += sizeof(instruction);
1199         } else {
1200             // really or %g0,imm,regd
1201             genImmInsn(insn, ORop3, 0, src1, dest);
1202
1203             base += sizeof(instruction);
1204         }
1205     } else if (op ==  loadOp) {
1206         // dest = [src1]   TODO
1207         generateSetHi(insn, src1, dest);
1208         insn++;
1209
1210         generateLoad(insn, dest, LOW10(src1), dest);
1211
1212         base += sizeof(instruction)*2;
1213     } else if (op ==  loadIndirOp) {
1214         generateLoad(insn, src1, 0, dest);
1215         base += sizeof(instruction);
1216     } else if (op ==  storeOp) {
1217         insn->sethi.op = FMT2op;
1218         insn->sethi.rd = src2;
1219         insn->sethi.op2 = SETHIop2;
1220         insn->sethi.imm22 = HIGH22(dest);
1221         insn++;
1222
1223         generateStore(insn, src1, src2, LOW10(dest));
1224
1225         base += sizeof(instruction)*2;
1226     } else if (op ==  storeIndirOp) {
1227         generateStore(insn, src1, dest, 0);
1228         base += sizeof(instruction);
1229     } else if (op ==  ifOp) {
1230         // cmp src1,0
1231         genImmInsn(insn, SUBop3cc, src1, 0, 0); insn++;
1232         //genSimpleInsn(insn, SUBop3cc, src1, 0, 0); insn++;
1233
1234         insn->branch.op = 0;
1235         insn->branch.cond = BEcond;
1236         insn->branch.op2 = BICCop2;
1237         insn->branch.anneal = false;
1238         insn->branch.disp22 = dest/4;
1239         insn++;
1240
1241         generateNOOP(insn);
1242         base += sizeof(instruction)*3;
1243         return(base - 2*sizeof(instruction));
1244     } else if (op == branchOp) {
1245         // Unconditional branch
1246         generateBranchInsn(insn, dest); insn++;
1247
1248         generateNOOP(insn);
1249         base += sizeof(instruction)*2;
1250         return(base - 2*sizeof(instruction));
1251     } else if (op ==  updateCostOp) {
1252         // generate code to update the observed cost.
1253         if (!noCost) {
1254            // sethi %hi(dest), %l0
1255            generateSetHi(insn, dest, REG_L0);
1256            base += sizeof(instruction);
1257            insn++;
1258   
1259            // ld [%l0+ lo(dest)], %l1
1260            generateLoad(insn, REG_L0, LOW10(dest), REG_L1);
1261            base += sizeof(instruction);
1262            insn++;
1263   
1264            // update value (src1 holds the cost, in cycles; e.g. 19)
1265            if (src1 <= MAX_IMM13) {
1266               genImmInsn(insn, ADDop3, REG_L1, src1, REG_L1);
1267               base += sizeof(instruction);
1268               insn++;
1269
1270               generateNOOP(insn);
1271               base += sizeof(instruction);
1272               insn++;
1273
1274               generateNOOP(insn);
1275               base += sizeof(instruction);
1276               insn++;
1277            } else {
1278               // load in two parts
1279               generateSetHi(insn, src1, REG_L2);
1280               base += sizeof(instruction);
1281               insn++;
1282
1283               // or regd,imm,regd
1284               genImmInsn(insn, ORop3, REG_L2, LOW10(src1), REG_L2);
1285               base += sizeof(instruction);
1286               insn++;
1287
1288               // now add it
1289               genSimpleInsn(insn, ADDop3, REG_L1, REG_L2, REG_L1);
1290               base += sizeof(instruction);
1291               insn++;
1292            }
1293   
1294            // store result st %l1, [%l0+ lo(dest)];
1295            generateStore(insn, REG_L1, REG_L0, LOW10(dest));
1296            base += sizeof(instruction);
1297            insn++;
1298         } // if (!noCost)
1299     } else if (op ==  trampPreamble) {
1300 #ifdef ndef
1301         // save and restore are done inthe base tramp now
1302         genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
1303         base += sizeof(instruction);
1304         insn++;
1305
1306         // generate code to save global registers
1307         for (unsigned u = 0; u < 4; u++) {
1308           genStoreD(insn, 2*u, REG_FP, - (8 + 8*u));
1309           base += sizeof(instruction);
1310           insn++;
1311         }
1312 #endif
1313     } else if (op ==  trampTrailer) {
1314 #ifdef ndef
1315         // save and restore are done inthe base tramp now
1316         // generate code to restore global registers
1317         for (unsigned u = 0; u < 4; u++) {
1318           genLoadD(insn, REG_FP, - (8 + 8*u), 2*u);
1319           base += sizeof(instruction);
1320           insn++;
1321         }
1322
1323         // sequence: restore; nop; b,a back to base tramp; nop
1324         // we can do better.  How about putting the restore in
1325         // the delay slot of the branch instruction, as in:
1326         // b <back to base tramp>; restore
1327         genSimpleInsn(insn, RESTOREop3, 0, 0, 0); 
1328         base += sizeof(instruction);
1329         insn++;
1330
1331         generateNOOP(insn);
1332         base += sizeof(instruction);
1333         insn++;
1334 #endif
1335         // dest is in words of offset and generateBranchInsn is bytes offset
1336         generateBranchInsn(insn, dest << 2);
1337         base += sizeof(instruction);
1338         insn++;
1339
1340         // add no-op, SS-5 sometimes seems to try to decode this insn - jkh 2/14
1341         generateNOOP(insn);
1342         insn++;
1343         base += sizeof(instruction);
1344
1345         return(base -  2 * sizeof(instruction));
1346     } else if (op == noOp) {
1347         generateNOOP(insn);
1348         base += sizeof(instruction);
1349     } else if (op == getParamOp) {
1350 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1351         // saving CT/vector address on the stack
1352         generateStore(insn, REG_MT, REG_FP, -40);
1353         insn++;
1354 #endif
1355         // first 8 parameters are in register 24 ....
1356         genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1357         insn++;
1358
1359         generateStore(insn, 24+src1, REG_SP, 68+4*src1); 
1360         insn++;
1361               
1362         genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
1363         insn++;
1364
1365         generateLoad(insn, REG_SP, 112+68+4*src1, 24+src1); 
1366         insn++;
1367
1368 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1369         // restoring CT/vector address back in REG_MT
1370         generateLoad(insn, REG_FP, -40, REG_MT);
1371         insn++;
1372         base += 6*sizeof(instruction);
1373 #else
1374         base += 4*sizeof(instruction);
1375 #endif
1376         
1377         if (src1 <= 8) {
1378             return(24+src1);
1379         }
1380         
1381         abort();
1382     } else if (op == getSysParamOp) {
1383         
1384         if (src1 <= 8) {
1385             return(24+src1);
1386         }       
1387     } else if (op == getRetValOp) {
1388         // return value is in register 24
1389         genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1390         insn++;
1391
1392         generateStore(insn, 24, REG_SP, 68); 
1393         insn++;
1394               
1395         genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
1396         insn++;
1397
1398         generateLoad(insn, REG_SP, 112+68, 24); 
1399         insn++;
1400
1401         base += 4*sizeof(instruction);
1402
1403         return(24);
1404
1405     } else if (op == getSysRetValOp) {
1406
1407         return(24);
1408     } else if (op == saveRegOp) {
1409         // should never be called for this platform.
1410         abort();
1411     } else {
1412       int op3=-1;
1413         switch (op) {
1414             // integer ops
1415             case plusOp:
1416                 op3 = ADDop3;
1417                 break;
1418
1419             case minusOp:
1420                 op3 = SUBop3;
1421                 break;
1422
1423             case timesOp:
1424                 op3 = SMULop3;
1425                 break;
1426
1427             case divOp:
1428                 op3 = SDIVop3;
1429                 //need to set the Y register to Zero, Zhichen
1430                 genImmInsn(insn, WRYop3, REG_G0, 0, 0);
1431                 base += sizeof(instruction);
1432                 insn = (instruction *) ((void*)&i[base]);
1433                 break;
1434
1435             // Bool ops
1436             case orOp:
1437                 op3 = ORop3;
1438                 break;
1439
1440             case andOp:
1441                 op3 = ANDop3;
1442                 break;
1443
1444             // rel ops
1445             // For a particular condition (e.g. <=) we need to use the
1446             // the opposite in order to get the right value (e.g. for >=
1447             // we need BLTcond) - naim
1448             case eqOp:
1449                 genRelOp(insn, BNEcond, src1, src2, dest, base);
1450                 return(0);
1451                 break;
1452
1453             case neOp:
1454                 genRelOp(insn, BEcond, src1, src2, dest, base);
1455                 return(0);
1456                 break;
1457
1458             case lessOp:
1459                 genRelOp(insn, BGEcond, src1, src2, dest, base);
1460                 return(0);
1461                 break;
1462
1463             case leOp:
1464                 genRelOp(insn, BGTcond, src1, src2, dest, base);
1465                 return(0);
1466                 break;
1467
1468             case greaterOp:
1469                 genRelOp(insn, BLEcond, src1, src2, dest, base);
1470                 return(0);
1471                 break;
1472
1473             case geOp:
1474                 genRelOp(insn, BLTcond, src1, src2, dest, base);
1475                 return(0);
1476                 break;
1477
1478             default:
1479                 abort();
1480                 break;
1481         }
1482         genSimpleInsn(insn, op3, src1, src2, dest);
1483
1484         base += sizeof(instruction);
1485       }
1486     return(0);
1487 }
1488
1489 /*
1490  * Find the instPoints of this function.
1491  */
1492 bool pd_Function::findInstPoints(const image *owner) {
1493
1494
1495    //cerr << "pd_Function::findInstPoints called " << *this;
1496    if (size() == 0) {
1497      //cerr << " size = 0, returning FALSE" << endl;
1498      return false;
1499    }
1500
1501    noStackFrame = true;
1502
1503    Address adr;
1504    Address adr1 = getAddress(0);
1505    instruction instr;
1506    instr.raw = owner->get_instruction(adr1);
1507    if (!IS_VALID_INSN(instr)) {
1508      //cerr << " IS_VALID_ISIN(adr1) == 0, returning FALSE" << endl;
1509      return false;
1510    }
1511
1512    // If it contains an instruction, I assume it would be s system call
1513    // which will be treat differently. 
1514    isTrap = false;
1515    bool func_entry_found = false;
1516
1517    for ( ; adr1 < getAddress(0) + size(); adr1 += 4) {
1518        instr.raw = owner->get_instruction(adr1);
1519
1520        // If there's an TRAP instruction in the function, we 
1521        // assume that it is an system call and will relocate it 
1522        // to the heap
1523        if (isInsnType(instr, TRAPmask, TRAPmatch)) {
1524            isTrap = true;
1525            //cerr << " TRAP instrcution detected, returning findInstPoints" << endl;
1526            return findInstPoints(owner, getAddress(0), 0);
1527        } 
1528
1529        // TODO: This is a hacking for the solaris(solaris2.5 actually)
1530        // We will relocate that function if the function has been 
1531        // tail-call optimazed.
1532        // (Actully, the reason of this is that the system calls like 
1533        //  read, write, etc have the tail-call optimazation to call
1534        //  the _read, _write etc. which contain the TRAP instruction 
1535        //  This is only done if libc is statically linked...if the
1536        //  libTag is set, otherwise we instrument read and _read
1537        //  both for the dynamically linked case
1538        
1539        if (isCallInsn(instr)) {
1540            instruction nexti; 
1541            nexti.raw = owner->get_instruction(adr1+4);
1542                
1543            if (nexti.rest.op == 2 
1544                && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
1545                        || nexti.rest.op3 == RESTOREop3)) {
1546                isTrap = true;
1547                // ALERT ALERT
1548                //cerr << " tail call optimization pattern detected, returning findInstPoints, function name = " << prettyName().string_of()  << endl;
1549                return findInstPoints(owner, getAddress(0), 0);
1550            }
1551        }   
1552        
1553
1554        // The function Entry is defined as the first SAVE instruction plus
1555        // the instructions after this.
1556        // ( The first instruction for the nonleaf function is not 
1557        //   necessarily a SAVE instruction. ) 
1558        if (isInsnType(instr, SAVEmask, SAVEmatch) && !func_entry_found) {
1559            //cerr << " save instruction found" << endl;
1560            noStackFrame = false;
1561
1562            func_entry_found = true;
1563            funcEntry_ = new instPoint(this, instr, owner, adr1, true,
1564                                       functionEntry);
1565            adr = adr1;
1566            assert(funcEntry_);
1567        }
1568    }
1569
1570    // If there's no SAVE instruction found, this is a leaf function and
1571    // and function Entry will be defined from the first instruction
1572    if (noStackFrame) {
1573        //cerr << " noStackFrame, apparently leaf function" << endl;
1574        adr = getAddress(0);
1575        instr.raw = owner->get_instruction(adr);
1576        funcEntry_ = new instPoint(this, instr, owner, adr, true,
1577                                   functionEntry);
1578        assert(funcEntry_);
1579    }
1580
1581    for ( ; adr < getAddress(0) + size(); adr += sizeof(instruction)) {
1582
1583      instr.raw = owner->get_instruction(adr);
1584
1585      bool done;
1586
1587      // check for return insn and as a side affect decide if we are at the
1588      //   end of the function.
1589      if (isReturnInsn(owner, adr, done)) {
1590        // define the return point
1591        funcReturns += new instPoint(this, instr, owner, adr, false,
1592                                     functionExit);
1593
1594      } else if (instr.branch.op == 0 
1595                 && (instr.branch.op2 == 2 || instr.branch.op2 == 6) 
1596                 && (instr.branch.cond == 0 ||instr.branch.cond == 8)) {
1597        // find if this branch is going out of the function
1598        int disp = instr.branch.disp22;
1599        Address target = adr +  (disp << 2);
1600        if ((target < (getAddress(0)))  
1601            || (target >= (getAddress(0) + size()))) {
1602          instPoint *point = new instPoint(this, instr, owner, adr, false,
1603                                           functionExit);
1604          funcReturns += point;
1605        }
1606
1607      } else if (isCallInsn(instr)) {
1608
1609        // if the call target is the address of the call instruction
1610        // then this is not something that we can instrument...
1611        // this occurs in functions with code that is modifined when 
1612        // they are loaded by the run-time linker, or when the .init
1613        // section is executed.  In this case the instructions in the
1614        // parsed image file are different from the ones in the executable
1615        // process.
1616        if(instr.call.op == CALLop) { 
1617            Address call_target = adr + (instr.call.disp30 << 2);
1618            if(call_target == adr){ 
1619                 return false;
1620            }
1621        }
1622        // first, check for tail-call optimization: a call where the instruction 
1623        // in the delay slot write to register %o7(15), usually just moving
1624        // the caller's return address, or doing a restore
1625        // Tail calls are instrumented as return points, not call points.
1626
1627
1628        instruction nexti; 
1629        nexti.raw = owner->get_instruction(adr+4);
1630
1631        if (nexti.rest.op == 2 
1632            && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
1633               || nexti.rest.op3 == RESTOREop3)) {
1634          // ALERT ALERT
1635          //fprintf(stderr, "#### Tail-call optimization in function %s, addr %x\n",
1636          //     prettyName().string_of(), adr);
1637          //cerr << "tail-call optimization detected for function " << prettyName().string_of() << endl;
1638          funcReturns += new instPoint(this, instr, owner, adr, false,
1639                                       functionExit);
1640
1641        } else {
1642          // define a call point
1643          // this may update address - sparc - aggregate return value
1644          // want to skip instructions
1645          bool err;
1646          int dummyId;
1647          instPoint *blah = 0;
1648          adr = newCallPoint(adr, instr, owner, err, dummyId, adr,0,blah);
1649        }
1650      }
1651
1652      else if (isInsnType(instr, JMPLmask, JMPLmatch)) {
1653        /* A register indirect jump. Some jumps may exit the function 
1654           (e.g. read/write on SunOS). In general, the only way to 
1655           know if a jump is exiting the function is to instrument
1656           the jump to test if the target is outside the current 
1657           function. Instead of doing this, we just check the 
1658           previous two instructions, to see if they are loading
1659           an address that is out of the current function.
1660           This should catch the most common cases (e.g. read/write).
1661           For other cases, we would miss a return point.
1662
1663           This is the case considered:
1664
1665              sethi addr_hi, r
1666              or addr_lo, r, r
1667              jump r
1668         */
1669
1670        reg jumpreg = instr.rest.rs1;
1671        instruction prev1;
1672        instruction prev2;
1673
1674        prev1.raw = owner->get_instruction(adr-4);
1675        prev2.raw = owner->get_instruction(adr-8);
1676
1677        unsigned targetAddr;
1678
1679        if (instr.rest.rd == 0 && (instr.rest.i == 1 || instr.rest.rs2 == 0)
1680            && prev2.sethi.op == FMT2op && prev2.sethi.op2 == SETHIop2 
1681            && prev2.sethi.rd == (unsigned)jumpreg
1682            && prev1.rest.op == RESTop 
1683            && prev1.rest.rd == (unsigned)jumpreg && prev1.rest.i == 1
1684            && prev1.rest.op3 == ORop3 && prev1.rest.rs1 == (unsigned)jumpreg) {
1685
1686          targetAddr = (prev2.sethi.imm22 << 10) & 0xfffffc00;
1687          targetAddr |= prev1.resti.simm13;
1688          if ((targetAddr<getAddress(0))||(targetAddr>=(getAddress(0)+size()))){
1689            instPoint *point = new instPoint(this, instr, owner, adr, false, 
1690                                             functionExit);
1691            funcReturns += point;
1692          }
1693        }
1694
1695      }
1696  }
1697    
1698  return (checkInstPoints(owner)); 
1699 }
1700
1701 /*
1702  * Check all the instPoints within this function to see if there's 
1703  * any conficts happen.
1704  */
1705 bool pd_Function::checkInstPoints(const image *owner) {
1706
1707     // Our own library function, skip the test.
1708     if (prettyName().prefixed_by("DYNINST")) 
1709         return true;
1710
1711 #ifndef BPATCH_LIBRARY /* XXX Users of libdyninstAPI might not agree. */
1712     // The function is too small to be worthing instrumenting.
1713     if (size() <= 12){
1714         return false;
1715     }
1716 #endif
1717
1718     // No function return! return false;
1719     if (sizeof(funcReturns) == 0) {
1720         return false;
1721     }
1722
1723     instruction instr;
1724     Address adr = getAddress(0);
1725
1726     bool retl_inst = false;
1727     // Check if there's any branch instruction jump to the middle
1728     // of the instruction sequence in the function entry point
1729     // and function exit point.
1730     for ( ; adr < getAddress(0) + size(); adr += sizeof(instruction)) {
1731
1732         instr.raw = owner->get_instruction(adr);
1733         if(isInsnType(instr, RETLmask, RETLmatch)) retl_inst = true;
1734
1735         if (isInsnType(instr, BRNCHmask, BRNCHmatch)||
1736             isInsnType(instr, FBRNCHmask, FBRNCHmatch)) {
1737
1738             int disp = instr.branch.disp22;
1739             Address target = adr + (disp << 2);
1740
1741             if ((target > funcEntry_->addr)&&
1742                 (target < (funcEntry_->addr + funcEntry_->size))) {
1743                 if (adr > (funcEntry_->addr+funcEntry_->size)){
1744                     //cout << "Function " << prettyName().string_of() <<" entry" << endl;
1745                     return false;
1746             } }
1747
1748             for (u_int i = 0; i < funcReturns.size(); i++) {
1749                 if ((target > funcReturns[i]->addr)&&
1750                     (target < (funcReturns[i]->addr + funcReturns[i]->size))) {
1751                     if ((adr < funcReturns[i]->addr)||
1752                         (adr > (funcReturns[i]->addr + funcReturns[i]->size))){
1753                         return false;
1754                 } }
1755             }
1756         }
1757     }
1758
1759     // if there is a retl instruction and we don't think this is a leaf
1760     // function then this is a way messed up function...well, at least we
1761     // we can't deal with this...the only example I can find is _cerror
1762     // and _cerror64 in libc.so.1
1763     if(retl_inst && !noStackFrame){
1764          return false;
1765     }
1766
1767     // check that no instrumentation points could overlap
1768     Address func_entry = funcEntry_->addr + funcEntry_->size; 
1769     for (u_int i = 0; i < funcReturns.size(); i++) {
1770         if(func_entry >= funcReturns[i]->addr){
1771            return false;
1772         }
1773         if(i >= 1){ // check if return points overlap
1774             Address prev_exit = funcReturns[i-1]->addr+funcReturns[i-1]->size;  
1775             if(funcReturns[i]->addr < prev_exit) {
1776                 return false;
1777             } 
1778         }
1779     }
1780
1781     return true;        
1782 }
1783
1784
1785 /* The maximum length of relocatable function is 1k instructions */  
1786 // This function is to find the inst Points for a function
1787 // that will be relocated if it is instrumented. 
1788 bool pd_Function::findInstPoints(const image *owner, Address newAdr, process*){
1789
1790    int i;
1791    if (size() == 0) {
1792      return false;
1793    }
1794    relocatable_ = true;
1795
1796    Address adr = getAddress(0);
1797    instruction instr;
1798    instr.raw = owner->get_instruction(adr);
1799    if (!IS_VALID_INSN(instr))
1800      return false;
1801    
1802    if (size() <= 3*sizeof(instruction)) 
1803        return false;
1804
1805    instPoint *point = new instPoint(this, instr, owner, newAdr, true, 
1806                                     functionEntry, adr);
1807
1808    funcEntry_ = point;
1809
1810    // if the second instruction in a relocated function is a call instruction
1811    // or a branch instruction, then we can't deal with this.
1812    // New: only problem if call is to location outside of function, or
1813    //  a jump to itself....
1814    if(size() > sizeof(instruction)){
1815        Address second_adr = adr + sizeof(instruction);
1816        instruction second_instr;
1817        second_instr.raw =  owner->get_instruction(second_adr); 
1818
1819        if (isCallInsn(second_instr)) {
1820            Address call_target = second_adr + (second_instr.call.disp30 << 2);
1821            // if call dest. is outside of function, assume real
1822            //  call site.  Assuming cant deal with this case!!!!
1823            if (!(call_target >= adr && call_target <= adr + size()) || \
1824                (call_target == second_adr)) {
1825                return false;
1826            }
1827        }
1828
1829        if (second_instr.branch.op == 0 && 
1830                       (second_instr.branch.op2 == 2 || 
1831                       second_instr.branch.op2 == 6)) {
1832            return false;
1833        }
1834    }    
1835
1836    assert(funcEntry_);
1837    int retId = 0;
1838    int callsId = 0; 
1839
1840    for (i = 0; adr < getAddress(0) + size(); adr += sizeof(instruction),  
1841         newAdr += sizeof(instruction), i++) {
1842
1843      instr.raw = owner->get_instruction(adr);
1844      newInstr[i] = instr;
1845      bool done;
1846
1847      // check for return insn and as a side affect decide if we are at the
1848      //   end of the function.
1849      if (isReturnInsn(owner, adr, done)) {
1850        // define the return point
1851        instPoint *point = new instPoint(this, instr, owner, newAdr, false, 
1852                                         functionExit, adr);
1853        funcReturns += point;
1854        funcReturns[retId] -> instId = retId++;
1855      } else if (instr.branch.op == 0 
1856                 && (instr.branch.op2 == 2 || instr.branch.op2 == 6)) {
1857        // find if this branch is going out of the function
1858        int disp = instr.branch.disp22;
1859        Address target = adr + (disp << 2);
1860        if (target < getAddress(0) || target >= getAddress(0) + size()) {
1861            instPoint *point = new instPoint(this, newInstr[i], owner, 
1862                                             newAdr, false, 
1863                                             functionExit, adr);
1864            if ((instr.branch.cond != 0) && (instr.branch.cond != 8)) {  
1865                point->isBranchOut = true;
1866                point->branchTarget = target;
1867            }
1868            funcReturns += point;
1869            funcReturns[retId] -> instId = retId++;
1870        }
1871
1872      } else if (isCallInsn(instr)) {
1873
1874        // first, check for tail-call optimization: a call where the instruction 
1875        // in the delay slot write to register %o7(15), usually just moving
1876        // the caller's return address, or doing a restore
1877        // Tail calls are instrumented as return points, not call points.
1878        instruction nexti; 
1879        nexti.raw = owner->get_instruction(adr+4);
1880
1881        if (nexti.rest.op == 2 
1882            && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
1883               || nexti.rest.op3 == RESTOREop3)) {
1884
1885            instPoint *point = new instPoint(this, instr, owner, newAdr, false,
1886                                       functionExit, adr);
1887            funcReturns += point;
1888            funcReturns[retId] -> instId = retId++;
1889
1890        } else {
1891          // if this is a call instr to a location within the function, and if 
1892          // the offest is not 8 then do not define this function 
1893          if(instr.call.op == CALLop){ 
1894            Address call_target = adr + (instr.call.disp30 << 2);
1895            if((call_target >= getAddress(0)) 
1896               && (call_target <= (getAddress(0)+size()))){ 
1897               if((instr.call.disp30 << 2) != 2*sizeof(instruction)) {
1898                 return false;
1899               }
1900            }
1901          }
1902          // define a call point
1903          // this may update address - sparc - aggregate return value
1904          // want to skip instructions
1905          bool err;
1906          instPoint *blah = 0;
1907          adr = newCallPoint(newAdr, instr, owner, err, callsId, adr,0,blah);
1908          if (err)
1909            return false;
1910        }
1911      }
1912
1913      else if (isInsnType(instr, JMPLmask, JMPLmatch)) {
1914        /* A register indirect jump. Some jumps may exit the function 
1915           (e.g. read/write on SunOS). In general, the only way to 
1916           know if a jump is exiting the function is to instrument
1917           the jump to test if the target is outside the current 
1918           function. Instead of doing this, we just check the 
1919           previous two instructions, to see if they are loading
1920           an address that is out of the current function.
1921           This should catch the most common cases (e.g. read/write).
1922           For other cases, we would miss a return point.
1923
1924           This is the case considered:
1925
1926              sethi addr_hi, r
1927              or addr_lo, r, r
1928              jump r
1929         */
1930
1931          reg jumpreg = instr.rest.rs1;
1932          instruction prev1;
1933          instruction prev2;
1934          
1935          prev1.raw = owner->get_instruction(adr-4);
1936          prev2.raw = owner->get_instruction(adr-8);
1937
1938          unsigned targetAddr;
1939
1940          if (instr.rest.rd == 0 && (instr.rest.i == 1 || instr.rest.rs2 == 0)
1941              && prev2.sethi.op == FMT2op && prev2.sethi.op2 == SETHIop2 
1942              && prev2.sethi.rd == (unsigned)jumpreg
1943              && prev1.rest.op == RESTop 
1944              && prev1.rest.rd == (unsigned)jumpreg && prev1.rest.i == 1
1945              && prev1.rest.op3 == ORop3 && prev1.rest.rs1 == (unsigned)jumpreg){
1946              
1947              targetAddr = (prev2.sethi.imm22 << 10) & 0xfffffc00;
1948              targetAddr |= prev1.resti.simm13;
1949              if ((targetAddr < getAddress(0)) 
1950                  || (targetAddr >= (getAddress(0)+size()))) {
1951                  instPoint *point = new instPoint(this, instr, owner, 
1952                                                   newAdr, false,
1953                                                   functionExit, adr);
1954                  funcReturns += point;
1955                  funcReturns[retId] -> instId = retId++;
1956              }
1957          }
1958      }
1959  }
1960  return true;
1961 }
1962
1963 // This function assigns new address to instrumentation points of  
1964 // a function that has been relocated
1965 bool pd_Function::findNewInstPoints(const image *owner, 
1966                                 const instPoint *&location,
1967                                 Address newAdr,
1968                                 process *proc,
1969                                 vector<instruction> &callInstrs,
1970                                 relocatedFuncInfo *reloc_info) {
1971
1972    int i, orig_call_insn;
1973    if (size() == 0) {
1974      return false;
1975    }
1976    assert(reloc_info);
1977
1978    // Note : newInstr defined as array 1024 long in inst-sparc.h
1979    //  bad things (tm) can happen if thats not true....
1980    assert((size() + RELOCATED_FUNC_EXTRA_SPACE) <= NEW_INSTR_ARRAY_LEN);
1981
1982    Address adr = getAddress(0);
1983    instruction instr;
1984    instr.raw = owner->get_instruction(adr);
1985    if (!IS_VALID_INSN(instr))
1986      return false;
1987
1988    instPoint *point = new instPoint(this, instr, owner, newAdr, true, 
1989                                     functionEntry, adr);
1990    point->relocated_ = true;
1991    // if location was the entry point then change location's value to new pt
1992    if(location == funcEntry_) { 
1993         location = point;
1994    }
1995
1996    reloc_info->addFuncEntry(point);
1997    assert(reloc_info->funcEntry());
1998    int retId = 0;
1999    int callsId = 0; 
2000
2001    // get baseAddress if this is a shared object
2002    Address baseAddress = 0;
2003    if(!(proc->getBaseAddress(owner,baseAddress))){
2004         baseAddress =0;
2005    }
2006
2007    // if we have call instructions that need to be added after the instrs
2008    // to call the relocated instruction, the first address we can use is
2009    // the address of the 4th instruction in the function
2010    Address call_start_addr = getAddress(0)+baseAddress + 3*sizeof(instruction);
2011
2012    for (i = 0; adr < getAddress(0) + size(); adr += 4,  newAdr += 4, i++) {
2013     
2014      instr.raw = owner->get_instruction(adr);
2015      newInstr[i] = instr;
2016
2017      bool done;
2018
2019      // check for return insn and as a side affect decide if we are at the
2020      //   end of the function.
2021      if (isReturnInsn(owner, adr, done)) {
2022        // define the return point
2023        instPoint *point = new instPoint(this, instr, owner, newAdr, false, 
2024                                         functionExit, adr);
2025        point->relocated_ = true;
2026        // if location was this point, change it to new point
2027        if(location == funcReturns[retId]) { 
2028            location = point;
2029        }
2030        retId++;
2031        reloc_info->addFuncReturn(point);
2032      } else if (instr.branch.op == 0 
2033                 && (instr.branch.op2 == 2 || instr.branch.op2 == 6)) {
2034        // find if this branch is going out of the function
2035        int disp = instr.branch.disp22;
2036        Address target = adr + baseAddress + (disp << 2);
2037
2038        // getAddress(0) gives the addr of the fn before it's relocated
2039        if ((target < (getAddress(0) + baseAddress)) 
2040            || (target >= (getAddress(0) + baseAddress + size()))) {
2041            // the original branch went out of the function...
2042
2043            relocateInstruction(&newInstr[i],adr+baseAddress,newAdr,proc);
2044            instPoint *point = new instPoint(this, newInstr[i], owner, 
2045                                             newAdr, false, 
2046                                             functionExit, adr);
2047            point->relocated_ = true;
2048            disp = newInstr[i].branch.disp22;
2049            if ((instr.branch.cond != 0) && (instr.branch.cond != 8)) {  
2050                point->isBranchOut = true;
2051                point->branchTarget = adr + (disp<<2);
2052            }
2053            // if location was this point, change it to new point
2054            if(location == funcReturns[retId]) { 
2055                location = point;
2056            }
2057            retId++;
2058            reloc_info->addFuncReturn(point);
2059        }
2060
2061      } else if (isCallInsn(instr)) {
2062
2063        // first, check for tail-call optimization: a call where the instruction 
2064        // in the delay slot write to register %o7(15), usually just moving
2065        // the caller's return address, or doing a restore
2066        // Tail calls are instrumented as return points, not call points.
2067        instruction nexti; 
2068        nexti.raw = owner->get_instruction(adr+4);
2069
2070        if (nexti.rest.op == 2 
2071            && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
2072               || nexti.rest.op3 == RESTOREop3)) {
2073             // Undoing the tail-call optimazation when the function
2074             // is relocated. Here is an example:
2075             //   before:          --->             after
2076             // ---------------------------------------------------
2077             //   call  %reg                       mov %reg %g1
2078             //   restore                          restore    
2079             //                                    st  %i0, [ %fp + 0x44 ]
2080             //                                    mov %o7 %i0
2081             //                                    call %g1 
2082             //                                    nop
2083             //                                    mov %i0,%o7
2084             //                                    st  [ %fp + 0x44 ], %i0
2085             //                                    retl
2086             //                                    nop
2087             //  ********    OR    ********
2088             //   before:          --->             after
2089             // ---------------------------------------------------
2090             //   call  PC_REL_ADDR                want : move ABS_ADD,
2091             //                                     convert PC_REL_ADDR to ABS_ADDR
2092             //                                     sethi ADDR', %g1
2093             //                                     or %g1, ADDR'', g1
2094             //                                     where ADDR' is high-22 bits of 
2095             //                                     ABS_ADDR, and ADDR'' is low 10 
2096             //                                     bits
2097             //   restore                          restore    
2098             //                                    st  %i0, [ %fp + 0x44 ]
2099             //                                    mov %o7 %i0
2100             //                                    call %g1 
2101             //                                    nop
2102             //                                    mov %i0,%o7
2103             //                                    st  [ %fp + 0x44 ], %i0
2104             //                                    retl
2105             //                                    nop
2106             //  ********    OR    ********
2107             //   before:          --->             after
2108             // ---------------------------------------------------
2109             //   call  PC_REL_ADDR                 ADDR' = PC_REL_ADDR - diff
2110             //                                     between instruction addresses
2111             //                                     (orig call and new call).... 
2112             //   restore                          restore    
2113             //                                    st  %i0, [ %fp + 0x44 ]
2114             //                                    mov %o7 %i0
2115             //                                    call PC_REL_ADDR' 
2116             //                                    nop
2117             //                                    mov %i0,%o7
2118             //                                    st  [ %fp + 0x44 ], %i0
2119             //                                    retl
2120             //                                    nop
2121          //    Note : Assuming that %g1 is safe to use, since g1 is scratch
2122          //     register that is defined to be volatile across procedure
2123          //     calls.
2124          //    My barf for hand written assembly code which violates this
2125          //     assumption.
2126             // Q: Here the assumption that register i1 is available 
2127             //    might be an question, is it?
2128             // A: I think it is appropriate because:
2129             //      (in situation A calls B and B calls C)
2130             //      The procedure C called via tail call is a leaf 
2131             //      procedure, the value arguments and return value between
2132             //      A and C are passed by register (o1...o5, o7)
2133             //      So even If B mess up the value of i0, it won't affect the
2134             //      commnucation between A and C. Also, we saved the value of
2135             //      i0 on stack and when we return from B, the value of i0
2136             //      won't be affected.
2137             //      If C is not a leaf procedure, it should be fine
2138             //      as it is.
2139             //    ( If you could give an counter-example, please
2140             //      let me know.                         --ling )
2141
2142
2143             bool true_call = isTrueCallInsn(instr);
2144             bool jmpl_call = isJmplCallInsn(instr);
2145             
2146             if (true_call) {
2147                 //cerr << "tail-call opt undo : found CALL call pattern for sym " << \
2148                   prettyName().string_of() << endl;
2149             } else {
2150                 //cerr << "tail-call opt undo : found JMPL call pattern for sym" << \
2151                   prettyName().string_of() << endl;
2152             }
2153
2154             if (!true_call && !jmpl_call) {
2155                 cerr << "WARN : attempting to unwind tail-call optimization, call instruction appears to be neither TRUE call nor JMPL call, bailing...." << endl;
2156                 return FALSE;
2157             }
2158
2159             // if the call instruction was a call to a register, stick in extra
2160             //  initial mov as above....
2161             if (jmpl_call) {
2162                 // added extra mv *, g1
2163                 // translation : mv inst %1 %2 is synthetic inst. implemented as
2164                 //  orI %1, 0, %2  
2165                 genImmInsn(&newInstr[i++], ORop3, instr.rest.rs1, 0, 1);
2166             } else {
2167                 orig_call_insn = i;
2168             }
2169
2170             genSimpleInsn(&newInstr[i++], RESTOREop3, 0, 0, 0);
2171             generateStore(&newInstr[i++], 24, REG_FP, 0x44);
2172             genImmInsn(&newInstr[i++], ORop3, 15, 0, 24); 
2173
2174             // if origional call instruction was call to a register, that
2175             //  register should have been pushed into g0, so generate a call
2176             //  to %g0.
2177             if (jmpl_call) { 
2178                 // note: changed here.
2179                 // origional was : 
2180                 //  replicate origional jump instruction from target code.
2181                 //  newInstr[i++].raw = owner->get_instruction(adr);
2182                 // new is :
2183                 //  generate <call %g1>
2184                 generateJmplInsn(&newInstr[i++], 1, 0, 15);
2185             } else {
2186                 // if the origional call was a call to an ADDRESS, then want
2187                 //  to copy the origional call.  There is, however, a potential
2188                 //  caveat:  The sparc- CALL instruction is apparently PC
2189                 //  relative (even though disassemblers like that in gdb will
2190                 //  show a call to an absolute address.
2191                 //  As such, want to change the call target to account for 
2192                 //  the difference is PCs.
2193
2194                 newInstr[i].raw = owner->get_instruction(adr);
2195                 relocateInstruction(&newInstr[i], \
2196                         adr+baseAddress,
2197                         newAdr + (i - orig_call_insn) * 4, proc);
2198                 //cerr << "adr+baseAddress = " << adr+baseAddress << " (i - orig_call_insn) = " << (i - orig_call_insn) << " newAdr = " << newAdr << endl;
2199                 i++;
2200
2201                 // newInstr[i].raw = owner->get_instruction(adr);
2202                 // WRONG!!!!
2203                 //  let old.a be address of orig instr
2204                 //  want to branch to old.a (absolute).
2205                 //  what this is relative needs to take into account
2206                 //  the location of this new code.
2207                 //  newInstr[i].call.disp30 -= (i - orig_call_insn);
2208                 // i++;
2209             }
2210
2211             generateNOOP(&newInstr[i++]);
2212             genImmInsn(&newInstr[i++], ORop3, 24, 0, 15);
2213             generateLoad(&newInstr[i++], REG_FP, 0x44, 24);         
2214             generateJmplInsn(&newInstr[i++], 15, 8 ,0);
2215             newAdr += 28;
2216             generateNOOP(&newInstr[i]);
2217             instPoint *point = new instPoint(this, instr, owner, newAdr, false,
2218                                       functionExit, adr);
2219             point-> originalInstruction = newInstr[i-1];
2220             point-> delaySlotInsn = newInstr[i];
2221             point-> isDelayed = true;
2222             point->relocated_ = true;
2223             // if location was this point, change it to new point
2224             if(location == funcReturns[retId]) { 
2225                 location = point;
2226             }
2227             retId++;
2228             reloc_info->addFuncReturn(point);
2229        } else {
2230          // if the second instruction in the function is a call instruction
2231          // then this cannot go in the delay slot of the branch to the
2232          // base tramp, so add a noop between first and second instructions
2233          // in the relocated function (check out write in libc.so.1 for
2234          // and example of this):
2235          //
2236          //     save  %sp, -96, %sp             brach to base tramp
2237          //     call  0x73b70                   nop
2238          //                                     call 0x73b70
2239          if(adr == (getAddress(0)+4)){
2240              newInstr[i+1] = instr;
2241              generateNOOP(&newInstr[i]);
2242              i++;
2243              newAdr += 4;
2244          }
2245
2246          // if this is a call to an address within the same function, then
2247          // we need to set the 07 register to have the same value as it
2248          // would before the function was relocated
2249          // to do this we generate a call instruction back to the original
2250          // function location, and then at this location we generate a call 
2251          // instruction back to the relocated instruction.  In the delay 
2252          // slot of the second instruction the value of 07 is changed by 
2253          // the difference between the origninal call instruction, and 
2254          // the location of the call instruction back to the relocated
2255          // function.  This way the 07 register will contain the address
2256          // of the original call instruction
2257          Address call_target = adr + (instr.call.disp30 << 2);
2258          if((call_target >= getAddress(0)) 
2259                 && (call_target <= (getAddress(0) + size()))){ 
2260             assert((newInstr[i].call.disp30 << 2) == 8);
2261
2262             // generating call instruction to orginal function address
2263             // after the SAVE call RESTORE instr.s that call the relocated
2264             // function 
2265             newInstr[i].call.disp30 = ((call_start_addr -newAdr) >> 2); 
2266
2267             // generate call to relocated function from original function 
2268             // (this will get almost correct value for register 07)
2269             instruction new_inst;
2270             generateCallInsn(&new_inst,call_start_addr,
2271                              newAdr+sizeof(instruction));
2272             callInstrs += new_inst;
2273            
2274             // generate add isntruction to get correct value for 07 register 
2275             // this will go in delay slot of previous call instr.
2276             genImmInsn(&new_inst,ADDop3,REG_O7,
2277                        (adr+baseAddress-call_start_addr),REG_O7);
2278             callInstrs += new_inst;
2279             call_start_addr += 2*sizeof(instruction);
2280          }
2281          else {
2282             // otherwise, this is a call instruction to a location
2283             // outside the function
2284             bool err;
2285             relocateInstruction(&newInstr[i],adr+baseAddress,newAdr,proc);
2286             (void)newCallPoint(newAdr, newInstr[i], owner, err, 
2287                                callsId, adr,reloc_info,location);
2288             if (err) return false;
2289          }
2290        }
2291      }
2292
2293      else if (isInsnType(instr, JMPLmask, JMPLmatch)) {
2294        /* A register indirect jump. Some jumps may exit the function 
2295           (e.g. read/write on SunOS). In general, the only way to 
2296           know if a jump is exiting the function is to instrument
2297           the jump to test if the target is outside the current 
2298           function. Instead of doing this, we just check the 
2299           previous two instructions, to see if they are loading
2300           an address that is out of the current function.
2301           This should catch the most common cases (e.g. read/write).
2302           For other cases, we would miss a return point.
2303
2304           This is the case considered:
2305
2306              sethi addr_hi, r
2307              or addr_lo, r, r
2308              jump r
2309         */
2310
2311          reg jumpreg = instr.rest.rs1;
2312          instruction prev1;
2313          instruction prev2;
2314          
2315          prev1.raw = owner->get_instruction(adr-4);
2316          prev2.raw = owner->get_instruction(adr-8);
2317
2318          unsigned targetAddr;
2319
2320          if (instr.rest.rd == 0 && (instr.rest.i == 1 || instr.rest.rs2 == 0)
2321              && prev2.sethi.op == FMT2op && prev2.sethi.op2 == SETHIop2 
2322              && prev2.sethi.rd == (unsigned)jumpreg
2323              && prev1.rest.op == RESTop 
2324              && prev1.rest.rd == (unsigned)jumpreg && prev1.rest.i == 1
2325              && prev1.rest.op3 == ORop3 && prev1.rest.rs1 == (unsigned)jumpreg){
2326              
2327              targetAddr = (prev2.sethi.imm22 << 10) & 0xfffffc00;
2328              targetAddr |= prev1.resti.simm13;
2329              if ((targetAddr < getAddress(0)) 
2330                  || (targetAddr >= (getAddress(0)+size()))) {
2331                  instPoint *point = new instPoint(this, instr, owner, 
2332                                                   newAdr, false,
2333                                                   functionExit, adr);
2334                  point->relocated_ = true;
2335                  // if location was this point, change it to new point
2336                  if(location == funcReturns[retId]) { 
2337                      location = point;
2338                  }
2339                  retId++;
2340                  reloc_info->addFuncReturn(point);
2341              }
2342          }
2343      }
2344  }
2345    
2346    return true;
2347 }