isLeaf() --> hasNoStackFrame() for instPoint and pd_Function.
[dyninst.git] / dyninstAPI / src / inst-sparc-solaris.C
1 /*
2  * Copyright (c) 1996 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * This license is for research uses.  For such uses, there is no
12  * charge. We define "research use" to mean you may freely use it
13  * inside your organization for whatever purposes you see fit. But you
14  * may not re-distribute Paradyn or parts of Paradyn, in any form
15  * source or binary (including derivatives), electronic or otherwise,
16  * to any other organization or entity without our permission.
17  * 
18  * (for other uses, please contact us at paradyn@cs.wisc.edu)
19  * 
20  * All warranties, including without limitation, any warranty of
21  * merchantability or fitness for a particular purpose, are hereby
22  * excluded.
23  * 
24  * By your use of Paradyn, you understand and agree that we (or any
25  * other person or entity with proprietary rights in Paradyn) are
26  * under no obligation to provide either maintenance services,
27  * update services, notices of latent defects, or correction of
28  * defects for Paradyn.
29  * 
30  * Even if advised of the possibility of such damages, under no
31  * circumstances shall we (or any other person or entity with
32  * proprietary rights in the software licensed hereunder) be liable
33  * to you or any third party for direct, indirect, or consequential
34  * damages of any character regardless of type of action, including,
35  * without limitation, loss of profits, loss of use, loss of good
36  * will, or computer failure or malfunction.  You agree to indemnify
37  * us (and any other person or entity with proprietary rights in the
38  * software licensed hereunder) for any and all liability it may
39  * incur to third parties resulting from your use of Paradyn.
40  */
41
42 #include "dyninstAPI/src/inst-sparc.h"
43 #include "dyninstAPI/src/instPoint.h"
44 #include "util/h/debugOstream.h"
45
46 // Another constructor for the class instPoint. This one is called
47 // for the define the instPoints for regular functions which means
48 // multiple instructions is going to be moved to based trampoline.
49 // Since we will use the instruction CALL to branch to the base
50 // tramp(so it doesn't have any code size restriction), things are
51 // a little more complicated because instruction CALL changes the 
52 // value in the link register.
53 instPoint::instPoint(pd_Function *f, const instruction &instr, 
54                      const image *owner, Address &adr,
55                      bool delayOK,
56                      instPointType pointType)
57 : addr(adr), originalInstruction(instr), inDelaySlot(false), isDelayed(false),
58   callIndirect(false), callAggregate(false), callee(NULL), func(f),
59   ipType(pointType), image_ptr(owner), firstIsConditional(false),
60   relocated_(false), isLongJump(false)
61 {
62
63   isBranchOut = false;
64   size = 0;
65
66   // When the function has a stack frame
67   if (!this->hasNoStackFrame()) {
68
69       // we will treat the first instruction after the SAVE instruction
70       // in the nonleaf procedure as the function entry.  
71       if (ipType == functionEntry) {
72
73           assert(isInsnType(instr, SAVEmask, SAVEmatch));
74           saveInsn.raw = owner->get_instruction(addr);
75           addr += 4;
76           originalInstruction.raw = owner->get_instruction(addr);
77           delaySlotInsn.raw = owner->get_instruction(addr+4);
78           size += 2*sizeof(instruction);
79
80           // If the second instruction is DCTI, we need to move the
81           // the instruction in the delayed slot.
82           if (IS_DELAYED_INST(delaySlotInsn)) {
83               isDelayed = true; 
84               isDelayedInsn.raw = owner->get_instruction(addr+8);
85               size += 1*sizeof(instruction);
86
87               // Life is hard. If the second instruction is actually
88               // an CALL instruction, we need to move the instruction
89               // after the instruction in the delayed slot if the 
90               // return value of this function is a aggregate value.
91               aggregateInsn.raw = owner->get_instruction(addr+12);
92               if (isCallInsn(delaySlotInsn)) {
93                   if (!IS_VALID_INSN(aggregateInsn) && aggregateInsn.raw != 0) {
94                       callAggregate = true;
95                       size += 1*sizeof(instruction);
96                   }
97               }
98           }
99
100       // The following are easier.        
101       } else if (ipType == callSite) {
102           delaySlotInsn.raw = owner->get_instruction(addr+4);
103           size += 2*sizeof(instruction);
104
105           aggregateInsn.raw = owner->get_instruction(addr+8);
106           if (!IS_VALID_INSN(aggregateInsn) && aggregateInsn.raw != 0) {
107               callAggregate = true;
108               size += 1*sizeof(instruction);
109           }
110       } else {
111           delaySlotInsn.raw = owner->get_instruction(addr+4);
112           size += 2*sizeof(instruction);
113       }
114   }
115
116   // When the function is a leaf function
117   else {
118
119       // For the leaf procedure, there are no function calls in
120       // this procdure. So we don't need to consider the 
121       // aggregate instuction.
122       if (ipType == functionEntry) {
123
124           otherInstruction.raw = owner->get_instruction(addr+4);
125           delaySlotInsn.raw = owner->get_instruction(addr+8);
126           size += 2*sizeof(instruction);
127
128           if (IS_DELAYED_INST(delaySlotInsn)) {
129               isDelayed = true;
130               isDelayedInsn.raw = owner->get_instruction(addr+12);
131               size += 2*sizeof(instruction);
132           }
133
134       } else if (ipType == functionExit) {
135           
136           addr -= 4;
137
138           if (owner->isValidAddress(addr-4)) {
139               instruction iplus1;
140               iplus1.raw = owner->get_instruction(addr-4);
141               if (IS_DELAYED_INST(iplus1) && !delayOK) {
142                   addr -= 4;
143                   inDelaySlot = true;
144                   size += 1*sizeof(instruction);
145                   if(isCondBranch(iplus1)){
146                       instruction previous_inst; 
147                       previous_inst.raw = owner->get_instruction(addr-4);
148                       firstIsConditional = true;
149                       addr -= sizeof(instruction);
150                       size += 1*sizeof(instruction);
151                   }
152               }
153           }
154
155           originalInstruction.raw = owner->get_instruction(addr);
156           otherInstruction.raw = owner->get_instruction(addr+4);
157           delaySlotInsn.raw = owner->get_instruction(addr+8);
158           size += 3*sizeof(instruction);
159
160           if (inDelaySlot) {
161               inDelaySlotInsn.raw = owner->get_instruction(addr+12);
162               if(firstIsConditional) {
163                   extraInsn.raw = owner->get_instruction(addr+16);
164               }
165           }
166
167       } else {
168           assert(ipType == callSite);
169           // Usually, a function without a stack frame won't have any call sites
170           extern debug_ostream metric_cerr;
171           metric_cerr << "inst-sparc-solaris.C WARNING: found a leaf fn (no stack frame)" << endl;
172           metric_cerr << "which makes a function call" << endl;
173           metric_cerr << "This fn is " << func->prettyName() << endl;
174
175           //abort();
176       }
177   }
178
179   // return the address in the code segment after this instruction
180   // sequence. (there's a -1 here because one will be added up later in
181   // the function findInstPoints)  
182   adr = addr + (size - 1*sizeof(instruction));
183 }
184
185
186 void AstNode::sysFlag(instPoint *location)
187 {
188     if (location -> ipType == functionEntry) {
189         astFlag = (location -> isLongJump)? false:true; 
190     } else if (location -> ipType == functionExit) {
191        astFlag = location -> hasNoStackFrame(); // formerly "isLeaf()"
192     } else
193         astFlag = false;
194
195     if (loperand)
196         loperand->sysFlag(location);
197     if (roperand)
198         roperand->sysFlag(location); 
199
200     for (unsigned u = 0; u < operands.size(); u++) {
201         operands[u]->sysFlag(location);
202     }
203 }
204
205 // Determine if the called function is a "library" function or a "user" function
206 // This cannot be done until all of the functions have been seen, verified, and
207 // classified
208 //
209 void pd_Function::checkCallPoints() {
210   instPoint *p;
211   Address loc_addr;
212
213   vector<instPoint*> non_lib;
214
215   for (unsigned i=0; i<calls.size(); ++i) {
216     /* check to see where we are calling */
217     p = calls[i];
218     assert(p);
219
220     if (isInsnType(p->originalInstruction, CALLmask, CALLmatch)) {
221       // Direct call
222       loc_addr = p->addr + (p->originalInstruction.call.disp30 << 2);
223       pd_Function *pdf = (file_->exec())->findFunction(loc_addr);
224       if (pdf && !pdf->isLibTag()) {
225         p->callee = pdf;
226         non_lib += p;
227       } else if(!pdf){
228            // if this is a call outside the fuction, keep it
229            if((loc_addr < getAddress(0))||(loc_addr > (getAddress(0)+size()))){
230                 p->callIndirect = true;
231                 p->callee = NULL;
232                 non_lib += p;
233            }
234            else {
235                delete p;
236            }
237       } else {
238           delete p;
239       }
240     } else {
241       // Indirect call -- be conservative, assume it is a call to 
242       // an unnamed user function
243       assert(!p->callee); assert(p->callIndirect);
244       p->callee = NULL;
245       non_lib += p;
246     }
247   }
248   calls = non_lib;
249 }
250
251 // TODO we cannot find the called function by address at this point in time
252 // because the called function may not have been seen.
253 // reloc_info is 0 if the function is not currently being relocated
254 Address pd_Function::newCallPoint(Address &adr, const instruction instr,
255                                  const image *owner, bool &err, 
256                                  int &callId, Address &oldAddr,
257                                  relocatedFuncInfo *reloc_info,
258                                  const instPoint *&location)
259 {
260     Address ret=adr;
261     instPoint *point;
262
263     err = true;
264     if (isTrap) {
265         point = new instPoint(this, instr, owner, adr, false, callSite, oldAddr);
266     } else {
267         point = new instPoint(this, instr, owner, adr, false, callSite);
268     }
269
270     if (!isInsnType(instr, CALLmask, CALLmatch)) {
271       point->callIndirect = true;
272       point->callee = NULL;
273     } else{
274       point->callIndirect = false;
275     }
276
277     if (isTrap) {
278         if (!reloc_info) {
279             calls += point;
280             calls[callId] -> instId = callId++;
281         } else {
282             // calls to a location within the function are not
283             // kept in the calls vector
284             assert(callId >= 0);
285             assert(((u_int)callId) < calls.size());
286             point->relocated_ = true;
287             // if the location was this call site, then change its value
288             if(location && (calls[callId] == location)) { 
289                 assert(calls[callId]->instId  == location->instId);
290                 location = point; 
291             } 
292             point->instId = callId++;
293             reloc_info->addFuncCall(point);
294         }
295     } else {
296         if (!reloc_info) {
297             calls += point;
298         }
299         else {
300             point->relocated_ = true;
301             reloc_info->addFuncCall(point);
302         }
303     }
304     err = false;
305     return ret;
306 }
307
308 /*
309  * Given and instruction, relocate it to a new address, patching up
310  *   any relative addressing that is present.
311  *
312  */
313 void relocateInstruction(instruction *insn, u_int origAddr, u_int targetAddr,
314                          process *proc)
315 {
316     int newOffset;
317
318     // If the instruction is a CALL instruction, calculate the new
319     // offset
320     if (isInsnType(*insn, CALLmask, CALLmatch)) {
321         newOffset = origAddr  - targetAddr + (insn->call.disp30 << 2);
322         insn->call.disp30 = newOffset >> 2;
323     } else if (isInsnType(*insn, BRNCHmask, BRNCHmatch)||
324                isInsnType(*insn, FBRNCHmask, FBRNCHmatch)) {
325
326         // If the instruction is a Branch instruction, calculate the 
327         // new offset. If the new offset is out of reach after the 
328         // instruction is moved to the base Trampoline, we would do
329         // the following:
330         //    b  address  ......    address: save
331         //                                   call new_offset             
332         //                                   restore 
333         newOffset = origAddr - targetAddr + (insn->branch.disp22 << 2);
334
335         // if the branch is too far, then allocate more space in inferior
336         // heap for a call instruction to branch target.  The base tramp 
337         // will branch to this new inferior heap code, which will call the
338         // target of the branch
339         if (!offsetWithinRangeOfBranchInsn(newOffset)) {
340 //      if (ABS(newOffset) > getMaxBranch1Insn()) {
341             int ret = inferiorMalloc(proc,3*sizeof(instruction), textHeap);
342             u_int old_offset = insn->branch.disp22 << 2;
343             insn->branch.disp22  = (ret - targetAddr)>>2;
344             instruction insnPlus[3];
345             genImmInsn(insnPlus, SAVEop3, REG_SP, -112, REG_SP);
346             generateCallInsn(insnPlus+1, ret+sizeof(instruction), 
347                              origAddr+old_offset);
348             genSimpleInsn(insnPlus+2, RESTOREop3, 0, 0, 0); 
349             proc->writeDataSpace((caddr_t)ret, sizeof(insnPlus), 
350                          (caddr_t) insnPlus);
351         } else {
352             insn->branch.disp22 = newOffset >> 2;
353         }
354     } else if (isInsnType(*insn, TRAPmask, TRAPmatch)) {
355         // There should be no probelm for moving trap instruction
356         // logLine("attempt to relocate trap\n");
357     } 
358     /* The rest of the instructions should be fine as is */
359 }
360
361 /*
362  * Install a base tramp -- fill calls with nop's for now.
363  *
364  * This one install the base tramp for the regular functions.
365  *
366  */
367 trampTemplate *installBaseTramp(instPoint *&location, process *proc)
368 {
369     unsigned baseAddr = inferiorMalloc(proc, baseTemplate.size, textHeap);
370
371     instruction *code = new instruction[baseTemplate.size];
372     assert(code);
373
374     memcpy((char *) code, (char*) baseTemplate.trampTemp, baseTemplate.size);
375
376     instruction *temp;
377     unsigned currAddr;
378     for (temp = code, currAddr = baseAddr; 
379         (currAddr - baseAddr) < (unsigned) baseTemplate.size;
380         temp++, currAddr += sizeof(instruction)) {
381
382         if (temp->raw == EMULATE_INSN) {
383
384             // Load the value of link register from stack 
385             // If no stack frame, genereate a RESTORE instruction
386             // since there's an instruction SAVE generated and put in the
387             // code segment.
388             if (location -> hasNoStackFrame()) {
389
390                 Address baseAddress = 0;
391                 proc->getBaseAddress(location->image_ptr,baseAddress);
392                 baseAddress += location -> addr;
393
394                 if (in1BranchInsnRange(baseAddress, baseAddr) == false) {
395                     //cerr << "This happen very rarely, I suppose "<< endl;
396                     //cerr << "Let's see if this is going to be executed..." << endl;
397                     location -> isLongJump = true;
398                     genImmInsn(temp, RESTOREop3, 0, 0, 0);
399                 } else {
400                     generateNOOP(temp);
401                 }
402                 temp++;
403                 currAddr += sizeof(instruction);
404             } 
405             // Same for the leaf and nonleaf functions.
406             // First, relocate the "FIRST instruction" in the sequence;  
407             Address fromAddr = location->addr;
408
409             if (!location -> hasNoStackFrame())
410                 if (location -> ipType == functionEntry) {
411                     *temp = location -> saveInsn;
412                     temp++;
413                     currAddr += sizeof(instruction);
414                 }
415
416             *temp = location->originalInstruction;
417
418             // compute the real from address if this instrumentation
419             // point is from a shared object image
420             Address baseAddress = 0;
421             if(proc->getBaseAddress(location->image_ptr,baseAddress)){
422                 fromAddr += baseAddress;                
423             }
424             // If the instruction is a call instruction to a location somewhere 
425             // within the function, then the 07 regester must be saved and 
426             // resored around the relocated call from the base tramp...the call
427             // instruction changes the value of 07 to be the PC value, and if
428             // we move the call instruction to the base tramp, its value will
429             // be incorrect when we use it in the function.  We generate the
430             // following base tramp code:
431             //          original delay slot instruction 
432             //          save
433             //          original call instruction
434             //          restore
435             // This case should only occur for function entry points in
436             // functions from shared objects, and there should be no append
437             // trampolene code because the relocated call instruction will
438             // not return to the base tramp
439             if (isInsnType(*temp, CALLmask, CALLmatch)) {
440                 Address offset = fromAddr + (temp->call.disp30 << 2);
441                 if ((offset > (location->func->getAddress(0)+ baseAddress)) && 
442                     (offset < ((location->func->getAddress(0)+ baseAddress)+
443                                  location->func->size()))) {
444                     // offset > adr; "=" means recursive function which is allowed
445                     // offset < adr + size; "=" does not apply to this case
446
447                     // TODO: this assumes that the delay slot instruction is not
448                     // a call instruction....is this okay?
449                     
450                     // assume this situation only happens at function entry point 
451                     // for the shared library routine. And it is definately nees
452                     // long jump support
453                     assert(location -> ipType == functionEntry); 
454                     location -> isLongJump = true;
455                     
456                     // In this situcation, save instruction is discarded
457                     // Rollback!! 
458                     assert(location->hasNoStackFrame() == false);
459                     temp--;
460                     currAddr -= sizeof(instruction);
461                     
462                     *temp = location->delaySlotInsn;  
463                     temp++; 
464                     currAddr += sizeof(instruction);
465                     genImmInsn(temp, SAVEop3, REG_SP, -112, REG_SP); 
466                     temp++; 
467                     currAddr += sizeof(instruction);  
468                     *temp = location->originalInstruction;
469                     relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
470                     temp++; 
471                     fromAddr += sizeof(instruction); 
472                     currAddr += sizeof(instruction);
473                     genImmInsn(temp, RESTOREop3, 0, 0, 0);
474                     continue;
475                 }
476             }   
477
478             relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
479
480             // Again, for leaf function, one more is needed to move for one
481             // more spot;
482             if (location->hasNoStackFrame()) {
483                 fromAddr += sizeof(instruction);
484                 currAddr += sizeof(instruction);
485                 *++temp = location->otherInstruction;
486                 relocateInstruction(temp, fromAddr, currAddr, 
487                                     (process *)proc);
488             }     
489             
490             // Second, relocate the "NEXT instruction";
491             fromAddr += sizeof(instruction);
492             currAddr += sizeof(instruction);
493             *++temp = location->delaySlotInsn;
494  
495             // if the NEXT instruction is a call instruction to a location
496             // within the function, then the 07 regester must be saved and 
497             // resored around the relocated call from the base tramp...the call
498             // instruction changes the value of 07 to be the PC value, and if
499             // we move the call instruction to the base tramp, its value will
500             // be incorrect when we use it in the function.  We generate:
501             //
502             //  orignial            relocated to base tramp
503             //  --------            -----------------------
504             //  save                nop  // SAVE added above, replace w/nop 
505             //  original insn       original instruction // already relocated
506             //  delaySlotInsn       isDelayedInsn
507             //  isDelayedInsn       save
508             //                      delaySlotInsn  (call with offset - 4)
509             //                      restore
510             //  In the function, the call to the base tramp will have an
511             //  additional add instruction to adjust the 07 register
512             //  orignial            relocated to base tramp
513             //  --------            -----------------------
514             //  save                 save       
515             //  mov                  call
516             //  call                 nop
517             //  sethi                add $o7 4   
518             //
519             if (isInsnType(*temp, CALLmask, CALLmatch)) {
520                 Address offset = fromAddr + (temp->call.disp30 << 2);
521                 if ((offset > (location->func->getAddress(0)+ baseAddress)) && 
522                     (offset < ((location->func->getAddress(0)+ baseAddress)+
523                                  location->func->size()))) {
524                     
525                     temp--;
526                     temp--;
527                     generateNOOP(temp);  
528                     temp++;
529                     temp++;
530                     location->isLongJump = true;
531                     // assert(location->hasNoStackFrame() == false);
532                     // assume that this is not a delayed instr.
533                     *temp = location->isDelayedInsn;  
534                     temp++; 
535                     currAddr += sizeof(instruction);
536                     genImmInsn(temp, SAVEop3, REG_SP, -112, REG_SP); 
537                     temp++; 
538                     currAddr += sizeof(instruction);  
539                     *temp = location->delaySlotInsn;
540                     Address new_call_addr = fromAddr - sizeof(instruction);
541                     relocateInstruction(temp,new_call_addr,currAddr,proc);
542                     temp++; 
543                     fromAddr += sizeof(instruction); 
544                     currAddr += sizeof(instruction);
545                     genImmInsn(temp, RESTOREop3, 0, 0, 0);
546                     continue;
547                 }
548             }   
549
550             // otherwise relocate the NEXT instruction
551             relocateInstruction(temp, fromAddr, currAddr,
552                                 (process *)proc);
553             
554             // Third, if the "NEXT instruction" is a DCTI, 
555             if (location->isDelayed) {
556                 fromAddr += sizeof(instruction);
557                 currAddr += sizeof(instruction);
558                 *++temp = location->isDelayedInsn;
559                 relocateInstruction(temp, fromAddr, currAddr,
560                                     (process *)proc);
561                 
562                 // Then, possibly, there's an callAggregate instruction
563                 // after this. 
564                 if (location->callAggregate) {
565                     currAddr += sizeof(instruction);
566                     *++temp = location->aggregateInsn;
567                     continue;
568                 }       
569             }
570             
571             // If the "FIRST instruction" is a DCTI, then our so called 
572             // "NEXT instruction" is in the delayed Slot and this might
573             // happen. (actullay, it happened)
574             if (location->callAggregate) {
575                 currAddr += sizeof(instruction);
576                 *++temp = location->aggregateInsn;
577                 continue;
578             }   
579             
580             // For the leaf function, if there's an inDelaySlot instruction,
581             // move this one to the base Tramp.(i.e. at the function exit,
582             // if the first instruction is in the delayed slot the previous
583             // instruction, we have to move that one too, so we count from 
584             // that one and the last one is this sequence is called inDelaySlot
585             // instruction.)
586             // Well, after all these, another SAVE instruction is generated
587             // so we are prepared to handle the returning to our application's
588             // code segment. 
589             if (location->hasNoStackFrame()) {
590                 if (location->inDelaySlot) {
591                     fromAddr += sizeof(instruction);
592                     currAddr += sizeof(instruction);
593                     *++temp = location->inDelaySlotInsn;
594                     relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
595                     if(location->firstIsConditional){
596                         fromAddr += sizeof(instruction);
597                         currAddr += sizeof(instruction);
598                         *++temp = location->extraInsn;
599                         relocateInstruction(temp, fromAddr, currAddr, proc);
600                     }
601                 } 
602                 
603                 genImmInsn(temp+1, SAVEop3, REG_SP, -112, REG_SP);
604             }
605             
606         } else if (temp->raw == RETURN_INSN) {
607             // compute the real from address if this instrumentation
608             // point is from a shared object image
609             Address baseAddress = 0;
610             if(proc->getBaseAddress(location->image_ptr,baseAddress)){
611             }
612             // Back to the code segement of the application.
613             // If the location is in the leaf procedure, generate an RESTORE
614             // instruction right after the CALL instruction to restore all
615             // the values in the registers.
616             if (location -> hasNoStackFrame()) {
617                 generateCallInsn(temp, currAddr, 
618                                 (baseAddress + location->addr)+location->size);
619                 genImmInsn(temp+1, RESTOREop3, 0, 0, 0);
620             } else {
621                 generateCallInsn(temp, currAddr, 
622                                 (baseAddress + location->addr)+location->size);
623             }
624         } else if (temp->raw == SKIP_PRE_INSN) {
625             unsigned offset;
626             offset = baseAddr+baseTemplate.updateCostOffset-currAddr;
627             generateBranchInsn(temp,offset);
628
629         } else if (temp->raw == SKIP_POST_INSN) {
630             unsigned offset;
631             offset = baseAddr+baseTemplate.returnInsOffset-currAddr;
632             generateBranchInsn(temp,offset);
633
634         } else if (temp->raw == UPDATE_COST_INSN) {
635             
636             baseTemplate.costAddr = currAddr;
637             generateNOOP(temp);
638         } else if ((temp->raw == LOCAL_PRE_BRANCH) ||
639                    (temp->raw == GLOBAL_PRE_BRANCH) ||
640                    (temp->raw == LOCAL_POST_BRANCH) ||
641                    (temp->raw == GLOBAL_POST_BRANCH)) {
642 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
643             if ((temp->raw == LOCAL_PRE_BRANCH) ||
644                 (temp->raw == LOCAL_POST_BRANCH)) {
645                 temp -= NUM_INSN_MT_PREAMBLE;
646                 unsigned numIns=0;
647                 generateMTpreamble((char *)temp, numIns, proc);
648                 temp += NUM_INSN_MT_PREAMBLE;
649             }
650 #endif
651             /* fill with no-op */
652             generateNOOP(temp);
653         }
654     }
655     // TODO cast
656     proc->writeDataSpace((caddr_t)baseAddr, baseTemplate.size,(caddr_t) code);
657
658     delete [] code;
659
660     trampTemplate *baseInst = new trampTemplate;
661     *baseInst = baseTemplate;
662     baseInst->baseAddr = baseAddr;
663     return baseInst;
664 }
665
666 /*
667  * Install the base Tramp for the function relocated.
668  * (it means the base tramp that don't need to bother with long jump and
669  *  is the one we used before for all the functions(since there's no
670  *  long jumps)
671  *  for system calls
672  */ 
673 trampTemplate *installBaseTrampSpecial(const instPoint *&location,
674                                        process *proc,
675                                        vector<instruction> &extra_instrs) 
676 {
677     unsigned currAddr;
678     instruction *code;
679     instruction *temp;
680
681     unsigned baseAddr = inferiorMalloc(proc, baseTemplate.size, textHeap);
682
683     if(!(location->func->isInstalled(proc))) {
684         location->func->relocateFunction(proc,location,extra_instrs);
685     }
686     else if(!location->relocated_){
687         // need to find new instPoint for location...it has the pre-relocated
688         // address of the instPoint
689         location->func->modifyInstPoint(location,proc);      
690     }
691
692     code = new instruction[baseTemplate.size];
693     memcpy((char *) code, (char*) baseTemplate.trampTemp, baseTemplate.size);
694
695     for (temp = code, currAddr = baseAddr; 
696         (currAddr - baseAddr) < (unsigned) baseTemplate.size;
697         temp++, currAddr += sizeof(instruction)) {
698
699         if (temp->raw == EMULATE_INSN) {
700             if (location->isBranchOut) {
701                 // the original instruction is a branch that goes out of a 
702                 // function.  We don't relocate the original instruction. We 
703                 // only get to the tramp is the branch is taken, so we generate
704                 // a unconditional branch to the target of the original 
705                 // instruction here 
706                 assert(location->branchTarget);
707                 int disp = location->branchTarget - currAddr;
708
709                 generateBranchInsn(temp, disp);
710                 disp = temp->branch.disp22;
711                 continue;
712             }
713             else {
714                 *temp = location->originalInstruction;
715                 Address fromAddress = location->addr;
716                 relocateInstruction(temp, fromAddress, currAddr, proc);
717                 if (location->isDelayed) {
718                     /* copy delay slot instruction into tramp instance */
719                     currAddr += sizeof(instruction);  
720                     *++temp = location->delaySlotInsn;
721                 }
722                 if (location->callAggregate) {
723                     /* copy invalid insn with aggregate size in it */
724                     currAddr += sizeof(instruction);  
725                     *++temp = location->aggregateInsn;
726                 }
727             }
728         } else if (temp->raw == RETURN_INSN) {
729             generateBranchInsn(temp, 
730                 (location->addr+ sizeof(instruction) - currAddr));
731             if (location->isDelayed) {
732                 /* skip the delay slot instruction */
733                 temp->branch.disp22 += 1;
734             }
735             if (location->callAggregate) {
736                 /* skip the aggregate size slot */
737                 temp->branch.disp22 += 1;
738             }
739         } else if (temp->raw == SKIP_PRE_INSN) {
740           unsigned offset;
741           offset = baseAddr+baseTemplate.updateCostOffset-currAddr;
742           generateBranchInsn(temp,offset);
743         } else if (temp->raw == SKIP_POST_INSN) {
744           unsigned offset;
745           offset = baseAddr+baseTemplate.returnInsOffset-currAddr;
746           generateBranchInsn(temp,offset);
747         } else if (temp->raw == UPDATE_COST_INSN) {
748             
749             baseTemplate.costAddr = currAddr;
750             generateNOOP(temp);
751         } else if ((temp->raw == LOCAL_PRE_BRANCH) ||
752                    (temp->raw == GLOBAL_PRE_BRANCH) ||
753                    (temp->raw == LOCAL_POST_BRANCH) ||
754                    (temp->raw == GLOBAL_POST_BRANCH)) {
755 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
756             if ((temp->raw == LOCAL_PRE_BRANCH) ||
757                 (temp->raw == LOCAL_POST_BRANCH)) {
758                 temp -= NUM_INSN_MT_PREAMBLE;
759                 unsigned numIns=0;
760                 generateMTpreamble((char *)temp, numIns, proc);
761                 temp += NUM_INSN_MT_PREAMBLE;
762             }
763 #endif
764             /* fill with no-op */
765             generateNOOP(temp);
766         }
767     }
768     // TODO cast
769     proc->writeDataSpace((caddr_t)baseAddr, baseTemplate.size,(caddr_t) code);
770
771     delete [] code;
772
773     trampTemplate *baseInst = new trampTemplate;
774     *baseInst = baseTemplate;
775     baseInst->baseAddr = baseAddr;
776     return baseInst;
777 }
778
779 /*
780  * Allocate the space for the base Trampoline, and generate the instruction
781  * we need for modifying the code segment.
782  *
783  * 'retInstance' tells you how to modify the code to jump to the base tramp
784  *
785  */
786 trampTemplate *findAndInstallBaseTramp(process *proc, 
787                                  instPoint *&location,
788                                  returnInstance *&retInstance,
789                                  bool)
790 {
791     Address adr = location->addr;
792     retInstance = NULL;
793
794     
795     trampTemplate *ret;
796     if (proc->baseMap.find((const instPoint *)location, ret)) // writes to ret if found
797        // This base tramp already exists; nothing to do.
798        return ret;
799
800     if (location->func->isTrapFunc()) {
801        // get the base Address of this function if it is a 
802        // shared object
803        Address baseAddress = 0;
804        if(!proc->getBaseAddress(location->image_ptr,baseAddress)){
805           // TODO: what should be done here?    
806           logLine("Error:findAndInstallBaseTramp call getBaseAddress\n"); 
807        }
808        // Install Base Tramp for the functions which are 
809        // relocated to the heap.
810        vector<instruction> extra_instrs;
811
812        ret = installBaseTrampSpecial(location, proc,extra_instrs);
813
814        // add a branch from relocated function to the base tramp
815        // if function was just relocated then location has old address
816        // otherwise location will have address in already relocated func
817        if (!location->func->isInstalled(proc)){
818           if (location->isBranchOut){
819              changeBranch(proc, location->addr, 
820                           (int) ret->baseAddr, location->originalInstruction);
821            } else {
822              generateBranch(proc, location->addr, (int)ret->baseAddr);
823            }
824        }
825        else {  // location's address is correct...it is in the heap
826           if (location->isBranchOut){
827              changeBranch(proc, location->addr, 
828                           (int) ret->baseAddr, location->originalInstruction);
829           } else {
830              generateBranch(proc, location->addr, (int)ret->baseAddr);
831           }
832        }
833
834        // If for this process, a call to the relocated function has not
835        // yet be installed in its original location, then genterate the
836        // following instructions at the begining of the function:
837        //   SAVE;             CALL;         RESTORE.
838        // so that it would jump the start of the relocated function
839        // which is in heap.
840        if (!location->func->isInstalled(proc)){
841           location->func->setInstalled(proc);
842           u_int e_size = extra_instrs.size();
843           instruction *insn = new instruction[3 + e_size];
844           Address adr = location-> func -> getAddress(0);
845           genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
846           generateCallInsn(insn+1, adr+baseAddress+4, 
847                            location->func->getAddress(proc));
848           genSimpleInsn(insn+2, RESTOREop3, 0, 0, 0); 
849           for(u_int i=0; i < e_size; i++){
850              insn[3+i] = extra_instrs[i];
851           }
852           retInstance = new returnInstance((instructUnion *)insn, 
853                                            (3+e_size)*sizeof(instruction), 
854                                            adr+baseAddress, 
855                                            location->func->size());
856           assert(retInstance);
857
858           //cerr << "created a new return instance (relocated fn)!" << endl;
859        }
860     } else {
861        // It's not a trap-function; it's a "normal" function
862        // compute the real from address if this instrumentation
863        // point is from a shared object image
864        Address baseAddress = 0;
865        if (proc->getBaseAddress(location->image_ptr,baseAddress)){
866           adr += baseAddress;           
867        }
868
869        ret = installBaseTramp(location, proc);
870        // check to see if this is an entry point and if the delay 
871        // slot instruction is a call insn, if so, then if the 
872        // call is to a location within the function, then we need to 
873        // add an extra instruction after the restore to correctly
874        // set the o7 register
875        bool need_to_add = false;
876        if (location->ipType==functionEntry &&
877            isInsnType(location->delaySlotInsn,CALLmask,CALLmatch)) {
878           Address call_offset = location->addr + 8 + 
879                                 (location->delaySlotInsn.call.disp30<<2);
880           Address fun_addr = location->func->getAddress(0);
881           u_int fun_size = location->func->size();
882           if (call_offset>fun_addr && call_offset<(fun_addr+fun_size)) {
883              assert(location->isLongJump);
884              need_to_add = true;
885           }
886        }        
887
888        if (location->hasNoStackFrame()) {
889           // if it is the leaf function, we need to generate
890           // the following instruction sequence:
891           //     SAVE;      CALL;      NOP.
892
893           if (location -> isLongJump == false) {
894              instruction *insn = new instruction;
895              generateBranchInsn(insn, (int)(ret->baseAddr-location->addr));
896              retInstance = new returnInstance((instructUnion *)insn,
897                                               sizeof(instruction), adr, 
898                                               sizeof(instruction));
899           } else if (need_to_add) {
900              // generate  original; call; add $o7 imm4 
901              instruction *insn = new instruction[2];
902              generateCallInsn(insn, adr+4, (int) ret->baseAddr);
903              genImmInsn(insn+1,ADDop3,REG_O7,4,REG_O7);
904              retInstance = new returnInstance((instructUnion *)insn,
905                                  2*sizeof(instruction), adr+4,
906                                  2*sizeof(instruction));
907
908           } else {
909              instruction *insn = new instruction[3];
910              genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
911              generateCallInsn(insn+1, adr+4, (int) ret->baseAddr);
912              generateNOOP(insn+2);
913              retInstance = new returnInstance((instructUnion *)insn, 
914                                                      3*sizeof(instruction), adr, 
915                                                      3*sizeof(instruction));
916                     
917           }
918                 
919           assert(retInstance);
920        } else {
921           // It's not a leaf.
922           // Generate branch instruction from the application to the
923           // base trampoline and no SAVE instruction is needed
924                 
925           if (in1BranchInsnRange(adr, ret->baseAddr)) {
926              // make sure that the isLongJump won't be true
927              // which only is possible for shlib entry point 
928              assert(location->isLongJump == false);
929              instruction *insn = new instruction;
930              if (location -> ipType == functionEntry) {
931                 generateBranchInsn(insn, (int)(ret->baseAddr -
932                                                location->addr+sizeof(instruction))); 
933                 retInstance = new returnInstance((instructUnion *)insn,
934                                                  sizeof(instruction), 
935                                                  adr - sizeof(instruction), 
936                                                  sizeof(instruction));
937              } else {
938                 generateBranchInsn(insn, (int)(ret->baseAddr-location->addr));
939                 retInstance = new returnInstance((instructUnion *)insn,
940                                                  sizeof(instruction), 
941                                                  adr, 
942                                                  sizeof(instruction));
943              }
944           } else if(need_to_add) {
945              // the delay slot instruction is is a call to a location
946              // within the same function, then need to generate 3 instrs
947              //    call
948              //    nop          // delay slot (originally call insn)
949              //    add o7 imm4  // sets o7 register to correct value
950              instruction *insn = new instruction[3];    
951              generateCallInsn(insn, adr, (int) ret->baseAddr);
952              generateNOOP(insn+1);
953              genImmInsn(insn+2,ADDop3,REG_O7,4,REG_O7);
954              retInstance = new returnInstance((instructUnion *)insn, 
955                                               3*sizeof(instruction), adr, 
956                                               3*sizeof(instruction));
957           } else {
958              instruction *insn = new instruction[2];    
959              generateCallInsn(insn, adr, (int) ret->baseAddr);
960              if (location -> ipType == functionEntry) {
961                 if (location -> isLongJump)
962                    generateNOOP(insn+1);
963                 else
964                    genSimpleInsn(insn+1, RESTOREop3, 0, 0, 0);
965              } else
966                 generateNOOP(insn+1);
967
968              retInstance = new returnInstance((instructUnion *)insn, 
969                                               2*sizeof(instruction), adr, 
970                                               2*sizeof(instruction));
971              assert(retInstance);
972           }
973        }
974     }
975
976     proc->baseMap[(const instPoint *)location] = ret;
977         
978     return(ret);
979        // remember, ret was the result of either installBaseTramp() or
980        // installBaseTrampSpecial()
981 }
982
983 /*
984  * Install a single tramp.
985  *
986  */
987 void installTramp(instInstance *inst, char *code, int codeSize) 
988 {
989     totalMiniTramps++;
990     insnGenerated += codeSize/sizeof(int);
991     
992     // TODO cast
993     (inst->proc)->writeDataSpace((caddr_t)inst->trampBase, codeSize, code);
994
995     unsigned atAddr;
996     if (inst->when == callPreInsn) {
997         if (inst->baseInstance->prevInstru == false) {
998             atAddr = inst->baseInstance->baseAddr+baseTemplate.skipPreInsOffset;
999             inst->baseInstance->cost += inst->baseInstance->prevBaseCost;
1000             inst->baseInstance->prevInstru = true;
1001             generateNoOp(inst->proc, atAddr);
1002         }
1003     } else {
1004         if (inst->baseInstance->postInstru == false) {
1005             atAddr = inst->baseInstance->baseAddr+baseTemplate.skipPostInsOffset; 
1006             inst->baseInstance->cost += inst->baseInstance->postBaseCost;
1007             inst->baseInstance->postInstru = true;
1008             generateNoOp(inst->proc, atAddr);
1009         }
1010     }
1011 }
1012
1013
1014 unsigned emitFuncCall(opCode op, 
1015                       registerSpace *rs,
1016                       char *i, unsigned &base, 
1017                       const vector<AstNode *> &operands, 
1018                       const string &callee, process *proc,
1019                       bool noCost)
1020 {
1021         assert(op == callOp);
1022         unsigned addr;
1023         bool err;
1024         vector <reg> srcs;
1025         void cleanUpAndExit(int status);
1026
1027         addr = proc->findInternalAddress(callee, false, err);
1028
1029         if (err) {
1030             function_base *func = proc->findOneFunction(callee);
1031             if (!func) {
1032                 ostrstream os(errorLine, 1024, ios::out);
1033                 os << "Internal error: unable to find addr of " << callee << endl;
1034                 logLine(errorLine);
1035                 showErrorCallback(80, (const char *) errorLine);
1036                 P_abort();
1037             }
1038             // TODO: is this correct or should we get relocated address?
1039             addr = func->getAddress(0);
1040         }
1041         
1042         for (unsigned u = 0; u < operands.size(); u++)
1043             srcs += operands[u]->generateCode(proc, rs, i, base, noCost);
1044
1045         // TODO cast
1046         instruction *insn = (instruction *) ((void*)&i[base]);
1047
1048         for (unsigned u=0; u<srcs.size(); u++){
1049             if (u >= 5) {
1050                  string msg = "Too many arguments to function call in instrumentation code: only 5 arguments can be passed on the sparc architecture.\n";
1051                  fprintf(stderr, msg.string_of());
1052                  showErrorCallback(94,msg);
1053                  cleanUpAndExit(-1);
1054             }
1055             genSimpleInsn(insn, ORop3, 0, srcs[u], u+8); insn++;
1056             base += sizeof(instruction);
1057             rs->freeRegister(srcs[u]);
1058         }
1059
1060         // As Ling pointed out to me, the following is rather inefficient.  It does:
1061         //   sethi %hi(addr), %o5
1062         //   jmpl %o5 + %lo(addr), %o7   ('call' pseudo-instr)
1063         //   nop
1064         // We can do better:
1065         //   call <addr>    (but note that the call true-instr is pc-relative jump)
1066         //   nop
1067         generateSetHi(insn, addr, 13); insn++;
1068         genImmInsn(insn, JMPLop3, 13, LOW10(addr), 15); insn++;
1069         generateNOOP(insn);
1070
1071         base += 3 * sizeof(instruction);
1072
1073         // return value is the register with the return value from the
1074         //   function.
1075         // This needs to be %o0 since it is back in the callers scope.
1076         return(8);
1077 }
1078  
1079 unsigned emit(opCode op, reg src1, reg src2, reg dest, char *i, unsigned &base,
1080               bool noCost)
1081 {
1082     // TODO cast
1083     instruction *insn = (instruction *) ((void*)&i[base]);
1084
1085     if (op == loadConstOp) {
1086       // dest = src1:imm    TODO
1087       if (src1 > MAX_IMM13 || src1 < MIN_IMM13) {
1088             // src1 is out of range of imm13, so we need an extra instruction
1089             generateSetHi(insn, src1, dest);
1090             base += sizeof(instruction);
1091             insn++;
1092
1093             // or regd,imm,regd
1094
1095             // Chance for optimization: we should check for LOW10(src1)==0, and
1096             // if so, don't generate the following bitwise-or instruction, since
1097             // in that case nothing would be done.
1098
1099             genImmInsn(insn, ORop3, dest, LOW10(src1), dest);
1100             base += sizeof(instruction);
1101         } else {
1102             // really or %g0,imm,regd
1103             genImmInsn(insn, ORop3, 0, src1, dest);
1104
1105             base += sizeof(instruction);
1106         }
1107     } else if (op ==  loadOp) {
1108         // dest = [src1]   TODO
1109         generateSetHi(insn, src1, dest);
1110         insn++;
1111
1112         generateLoad(insn, dest, LOW10(src1), dest);
1113
1114         base += sizeof(instruction)*2;
1115     } else if (op ==  loadIndirOp) {
1116         generateLoad(insn, src1, 0, dest);
1117         base += sizeof(instruction);
1118     } else if (op ==  storeOp) {
1119         insn->sethi.op = FMT2op;
1120         insn->sethi.rd = src2;
1121         insn->sethi.op2 = SETHIop2;
1122         insn->sethi.imm22 = HIGH22(dest);
1123         insn++;
1124
1125         generateStore(insn, src1, src2, LOW10(dest));
1126
1127         base += sizeof(instruction)*2;
1128     } else if (op ==  storeIndirOp) {
1129         generateStore(insn, src1, dest, 0);
1130         base += sizeof(instruction);
1131     } else if (op ==  ifOp) {
1132         // cmp src1,0
1133         genSimpleInsn(insn, SUBop3cc, src1, 0, 0); insn++;
1134
1135         insn->branch.op = 0;
1136         insn->branch.cond = BEcond;
1137         insn->branch.op2 = BICCop2;
1138         insn->branch.anneal = false;
1139         insn->branch.disp22 = dest/4;
1140         insn++;
1141
1142         generateNOOP(insn);
1143         base += sizeof(instruction)*3;
1144         return(base - 2*sizeof(instruction));
1145     } else if (op == branchOp) {
1146         // Unconditional branch
1147         generateBranchInsn(insn, dest); insn++;
1148
1149         generateNOOP(insn);
1150         base += sizeof(instruction)*2;
1151         return(base - 2*sizeof(instruction));
1152     } else if (op ==  updateCostOp) {
1153         // generate code to update the observed cost.
1154         if (!noCost) {
1155            // sethi %hi(dest), %l0
1156            generateSetHi(insn, dest, REG_L0);
1157            base += sizeof(instruction);
1158            insn++;
1159   
1160            // ld [%l0+ lo(dest)], %l1
1161            generateLoad(insn, REG_L0, LOW10(dest), REG_L1);
1162            base += sizeof(instruction);
1163            insn++;
1164   
1165            // update value (src1 holds the cost, in cycles; e.g. 19)
1166            if (src1 <= MAX_IMM13) {
1167               genImmInsn(insn, ADDop3, REG_L1, src1, REG_L1);
1168               base += sizeof(instruction);
1169               insn++;
1170
1171               generateNOOP(insn);
1172               base += sizeof(instruction);
1173               insn++;
1174
1175               generateNOOP(insn);
1176               base += sizeof(instruction);
1177               insn++;
1178            } else {
1179               // load in two parts
1180               generateSetHi(insn, src1, REG_L2);
1181               base += sizeof(instruction);
1182               insn++;
1183
1184               // or regd,imm,regd
1185               genImmInsn(insn, ORop3, REG_L2, LOW10(src1), REG_L2);
1186               base += sizeof(instruction);
1187               insn++;
1188
1189               // now add it
1190               genSimpleInsn(insn, ADDop3, REG_L1, REG_L2, REG_L1);
1191               base += sizeof(instruction);
1192               insn++;
1193            }
1194   
1195            // store result st %l1, [%l0+ lo(dest)];
1196            generateStore(insn, REG_L1, REG_L0, LOW10(dest));
1197            base += sizeof(instruction);
1198            insn++;
1199         } // if (!noCost)
1200     } else if (op ==  trampPreamble) {
1201 #ifdef ndef
1202         // save and restore are done inthe base tramp now
1203         genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
1204         base += sizeof(instruction);
1205         insn++;
1206
1207         // generate code to save global registers
1208         for (unsigned u = 0; u < 4; u++) {
1209           genStoreD(insn, 2*u, REG_FP, - (8 + 8*u));
1210           base += sizeof(instruction);
1211           insn++;
1212         }
1213 #endif
1214     } else if (op ==  trampTrailer) {
1215 #ifdef ndef
1216         // save and restore are done inthe base tramp now
1217         // generate code to restore global registers
1218         for (unsigned u = 0; u < 4; u++) {
1219           genLoadD(insn, REG_FP, - (8 + 8*u), 2*u);
1220           base += sizeof(instruction);
1221           insn++;
1222         }
1223
1224         // sequence: restore; nop; b,a back to base tramp; nop
1225         // we can do better.  How about putting the restore in
1226         // the delay slot of the branch instruction, as in:
1227         // b <back to base tramp>; restore
1228         genSimpleInsn(insn, RESTOREop3, 0, 0, 0); 
1229         base += sizeof(instruction);
1230         insn++;
1231
1232         generateNOOP(insn);
1233         base += sizeof(instruction);
1234         insn++;
1235 #endif
1236         // dest is in words of offset and generateBranchInsn is bytes offset
1237         generateBranchInsn(insn, dest << 2);
1238         base += sizeof(instruction);
1239         insn++;
1240
1241         // add no-op, SS-5 sometimes seems to try to decode this insn - jkh 2/14
1242         generateNOOP(insn);
1243         insn++;
1244         base += sizeof(instruction);
1245
1246         return(base -  2 * sizeof(instruction));
1247     } else if (op == noOp) {
1248         generateNOOP(insn);
1249         base += sizeof(instruction);
1250     } else if (op == getParamOp) {
1251 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1252         // saving CT/vector address on the stack
1253         generateStore(insn, REG_MT, REG_FP, -40);
1254         insn++;
1255 #endif
1256         // first 8 parameters are in register 24 ....
1257         genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1258         insn++;
1259
1260         generateStore(insn, 24+src1, REG_SP, 68+4*src1); 
1261         insn++;
1262               
1263         genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
1264         insn++;
1265
1266         generateLoad(insn, REG_SP, 112+68+4*src1, 24+src1); 
1267         insn++;
1268
1269 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1270         // restoring CT/vector address back in REG_MT
1271         generateLoad(insn, REG_FP, -40, REG_MT);
1272         insn++;
1273         base += 6*sizeof(instruction);
1274 #else
1275         base += 4*sizeof(instruction);
1276 #endif
1277         
1278         if (src1 <= 8) {
1279             return(24+src1);
1280         }
1281         
1282         abort();
1283     } else if (op == getSysParamOp) {
1284         
1285         if (src1 <= 8) {
1286             return(24+src1);
1287         }       
1288     } else if (op == getRetValOp) {
1289         // return value is in register 24
1290         genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1291         insn++;
1292
1293         generateStore(insn, 24, REG_SP, 68); 
1294         insn++;
1295               
1296         genImmInsn(insn, SAVEop3, REG_SP, -112, REG_SP);
1297         insn++;
1298
1299         generateLoad(insn, REG_SP, 112+68, 24); 
1300         insn++;
1301
1302         base += 4*sizeof(instruction);
1303
1304         return(24);
1305
1306     } else if (op == getSysRetValOp) {
1307
1308         return(24);
1309     } else if (op == saveRegOp) {
1310         // should never be called for this platform.
1311         abort();
1312     } else {
1313       int op3=-1;
1314         switch (op) {
1315             // integer ops
1316             case plusOp:
1317                 op3 = ADDop3;
1318                 break;
1319
1320             case minusOp:
1321                 op3 = SUBop3;
1322                 break;
1323
1324             case timesOp:
1325                 op3 = SMULop3;
1326                 break;
1327
1328             case divOp:
1329                 op3 = SDIVop3;
1330                 //need to set the Y register to Zero, Zhichen
1331                 genImmInsn(insn, WRYop3, REG_G0, 0, 0);
1332                 base += sizeof(instruction);
1333                 insn = (instruction *) ((void*)&i[base]);
1334                 break;
1335
1336             // Bool ops
1337             case orOp:
1338                 op3 = ORop3;
1339                 break;
1340
1341             case andOp:
1342                 op3 = ANDop3;
1343                 break;
1344
1345             // rel ops
1346             // For a particular condition (e.g. <=) we need to use the
1347             // the opposite in order to get the right value (e.g. for >=
1348             // we need BLTcond) - naim
1349             case eqOp:
1350                 genRelOp(insn, BNEcond, src1, src2, dest, base);
1351                 return(0);
1352                 break;
1353
1354             case neOp:
1355                 genRelOp(insn, BEcond, src1, src2, dest, base);
1356                 return(0);
1357                 break;
1358
1359             case lessOp:
1360                 genRelOp(insn, BGEcond, src1, src2, dest, base);
1361                 return(0);
1362                 break;
1363
1364             case leOp:
1365                 genRelOp(insn, BGTcond, src1, src2, dest, base);
1366                 return(0);
1367                 break;
1368
1369             case greaterOp:
1370                 genRelOp(insn, BLEcond, src1, src2, dest, base);
1371                 return(0);
1372                 break;
1373
1374             case geOp:
1375                 genRelOp(insn, BLTcond, src1, src2, dest, base);
1376                 return(0);
1377                 break;
1378
1379             default:
1380                 abort();
1381                 break;
1382         }
1383         genSimpleInsn(insn, op3, src1, src2, dest);
1384
1385         base += sizeof(instruction);
1386       }
1387     return(0);
1388 }
1389
1390 /*
1391  * Find the instPoints of this function.
1392  */
1393 bool pd_Function::findInstPoints(const image *owner) {
1394
1395    if (size() == 0) {
1396      return false;
1397    }
1398
1399    noStackFrame = true;
1400
1401    Address adr;
1402    Address adr1 = getAddress(0);
1403    instruction instr;
1404    instr.raw = owner->get_instruction(adr1);
1405    if (!IS_VALID_INSN(instr))
1406      return false;
1407
1408    // If it contains an instruction, I assume it would be s system call
1409    // which will be treat differently. 
1410    isTrap = false;
1411    bool func_entry_found = false;
1412
1413    for ( ; adr1 < getAddress(0) + size(); adr1 += 4) {
1414        instr.raw = owner->get_instruction(adr1);
1415
1416        // If there's an TRAP instruction in the function, we 
1417        // assume that it is an system call and will relocate it 
1418        // to the heap
1419        if (isInsnType(instr, TRAPmask, TRAPmatch)) {
1420            isTrap = true;
1421            return findInstPoints(owner, getAddress(0), 0);
1422        } 
1423
1424        // TODO: This is a hacking for the solaris(solaris2.5 actually)
1425        // We will relocate that function if the function has been 
1426        // tail-call optimazed.
1427        // (Actully, the reason of this is that the system calls like 
1428        //  read, write, etc have the tail-call optimazation to call
1429        //  the _read, _write etc. which contain the TRAP instruction 
1430        //  This is only done if libc is statically linked...if the
1431        //  libTag is set, otherwise we instrument read and _read
1432        //  both for the dynamically linked case
1433        if (isLibTag()) {
1434            if (isCallInsn(instr)) {
1435                instruction nexti; 
1436                nexti.raw = owner->get_instruction(adr1+4);
1437                
1438                if (nexti.rest.op == 2 
1439                    && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
1440                        || nexti.rest.op3 == RESTOREop3)) {
1441                    isTrap = true;
1442                    return findInstPoints(owner, getAddress(0), 0);
1443                }
1444            }   
1445        }
1446
1447        // The function Entry is defined as the first SAVE instruction plus
1448        // the instructions after this.
1449        // ( The first instruction for the nonleaf function is not 
1450        //   necessarily a SAVE instruction. ) 
1451        if (isInsnType(instr, SAVEmask, SAVEmatch) && !func_entry_found) {
1452            noStackFrame = false;
1453
1454            func_entry_found = true;
1455            funcEntry_ = new instPoint(this, instr, owner, adr1, true,
1456                                       functionEntry);
1457            adr = adr1;
1458            assert(funcEntry_);
1459        }
1460    }
1461
1462    // If there's no SAVE instruction found, this is a leaf function and
1463    // and function Entry will be defined from the first instruction
1464    if (noStackFrame) {
1465        adr = getAddress(0);
1466        instr.raw = owner->get_instruction(adr);
1467        funcEntry_ = new instPoint(this, instr, owner, adr, true,
1468                                   functionEntry);
1469        assert(funcEntry_);
1470    }
1471
1472    for ( ; adr < getAddress(0) + size(); adr += sizeof(instruction)) {
1473
1474      instr.raw = owner->get_instruction(adr);
1475
1476      bool done;
1477
1478      // check for return insn and as a side affect decide if we are at the
1479      //   end of the function.
1480      if (isReturnInsn(owner, adr, done)) {
1481        // define the return point
1482        funcReturns += new instPoint(this, instr, owner, adr, false,
1483                                     functionExit);
1484
1485      } else if (instr.branch.op == 0 
1486                 && (instr.branch.op2 == 2 || instr.branch.op2 == 6) 
1487                 && (instr.branch.cond == 0 ||instr.branch.cond == 8)) {
1488        // find if this branch is going out of the function
1489        int disp = instr.branch.disp22;
1490        Address target = adr +  (disp << 2);
1491        if ((target < (getAddress(0)))  
1492            || (target >= (getAddress(0) + size()))) {
1493          instPoint *point = new instPoint(this, instr, owner, adr, false,
1494                                           functionExit);
1495          funcReturns += point;
1496        }
1497
1498      } else if (isCallInsn(instr)) {
1499
1500        // if the call target is the address of the call instruction
1501        // then this is not something that we can instrument...
1502        // this occurs in functions with code that is modifined when 
1503        // they are loaded by the run-time linker, or when the .init
1504        // section is executed.  In this case the instructions in the
1505        // parsed image file are different from the ones in the executable
1506        // process.
1507        if(instr.call.op == CALLop) { 
1508            Address call_target = adr + (instr.call.disp30 << 2);
1509            if(call_target == adr){ 
1510                 return false;
1511        }}
1512        // first, check for tail-call optimization: a call where the instruction 
1513        // in the delay slot write to register %o7(15), usually just moving
1514        // the caller's return address, or doing a restore
1515        // Tail calls are instrumented as return points, not call points.
1516
1517
1518        instruction nexti; 
1519        nexti.raw = owner->get_instruction(adr+4);
1520
1521        if (nexti.rest.op == 2 
1522            && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
1523               || nexti.rest.op3 == RESTOREop3)) {
1524          //fprintf(stderr, "#### Tail-call optimization in function %s, addr %x\n",
1525          //     prettyName().string_of(), adr);
1526          funcReturns += new instPoint(this, instr, owner, adr, false,
1527                                       functionExit);
1528
1529        } else {
1530          // define a call point
1531          // this may update address - sparc - aggregate return value
1532          // want to skip instructions
1533          bool err;
1534          int dummyId;
1535          instPoint *blah = 0;
1536          adr = newCallPoint(adr, instr, owner, err, dummyId, adr,0,blah);
1537        }
1538      }
1539
1540      else if (isInsnType(instr, JMPLmask, JMPLmatch)) {
1541        /* A register indirect jump. Some jumps may exit the function 
1542           (e.g. read/write on SunOS). In general, the only way to 
1543           know if a jump is exiting the function is to instrument
1544           the jump to test if the target is outside the current 
1545           function. Instead of doing this, we just check the 
1546           previous two instructions, to see if they are loading
1547           an address that is out of the current function.
1548           This should catch the most common cases (e.g. read/write).
1549           For other cases, we would miss a return point.
1550
1551           This is the case considered:
1552
1553              sethi addr_hi, r
1554              or addr_lo, r, r
1555              jump r
1556         */
1557
1558        reg jumpreg = instr.rest.rs1;
1559        instruction prev1;
1560        instruction prev2;
1561
1562        prev1.raw = owner->get_instruction(adr-4);
1563        prev2.raw = owner->get_instruction(adr-8);
1564
1565        unsigned targetAddr;
1566
1567        if (instr.rest.rd == 0 && (instr.rest.i == 1 || instr.rest.rs2 == 0)
1568            && prev2.sethi.op == FMT2op && prev2.sethi.op2 == SETHIop2 
1569            && prev2.sethi.rd == (unsigned)jumpreg
1570            && prev1.rest.op == RESTop 
1571            && prev1.rest.rd == (unsigned)jumpreg && prev1.rest.i == 1
1572            && prev1.rest.op3 == ORop3 && prev1.rest.rs1 == (unsigned)jumpreg) {
1573
1574          targetAddr = (prev2.sethi.imm22 << 10) & 0xfffffc00;
1575          targetAddr |= prev1.resti.simm13;
1576          if ((targetAddr<getAddress(0))||(targetAddr>=(getAddress(0)+size()))){
1577            instPoint *point = new instPoint(this, instr, owner, adr, false, 
1578                                             functionExit);
1579            funcReturns += point;
1580          }
1581        }
1582
1583      }
1584  }
1585
1586  return (checkInstPoints(owner)); 
1587 }
1588
1589 /*
1590  * Check all the instPoints within this function to see if there's 
1591  * any conficts happen.
1592  */
1593 bool pd_Function::checkInstPoints(const image *owner) {
1594
1595     // Our own library function, skip the test.
1596     if (prettyName().prefixed_by("DYNINST")) 
1597         return true;
1598
1599 #ifndef BPATCH_LIBRARY /* XXX Users of libdyninstAPI might not agree. */
1600     // The function is too small to be worthing instrumenting.
1601     if (size() <= 12){
1602         return false;
1603     }
1604 #endif
1605
1606     // No function return! return false;
1607     if (sizeof(funcReturns) == 0) {
1608         return false;
1609     }
1610
1611     instruction instr;
1612     Address adr = getAddress(0);
1613
1614     bool retl_inst = false;
1615     // Check if there's any branch instruction jump to the middle
1616     // of the instruction sequence in the function entry point
1617     // and function exit point.
1618     for ( ; adr < getAddress(0) + size(); adr += sizeof(instruction)) {
1619
1620         instr.raw = owner->get_instruction(adr);
1621         if(isInsnType(instr, RETLmask, RETLmatch)) retl_inst = true;
1622
1623         if (isInsnType(instr, BRNCHmask, BRNCHmatch)||
1624             isInsnType(instr, FBRNCHmask, FBRNCHmatch)) {
1625
1626             int disp = instr.branch.disp22;
1627             Address target = adr + (disp << 2);
1628
1629             if ((target > funcEntry_->addr)&&
1630                 (target < (funcEntry_->addr + funcEntry_->size))) {
1631                 if (adr > (funcEntry_->addr+funcEntry_->size)){
1632                     //cout << "Function " << prettyName().string_of() <<" entry" << endl;
1633                     return false;
1634             } }
1635
1636             for (u_int i = 0; i < funcReturns.size(); i++) {
1637                 if ((target > funcReturns[i]->addr)&&
1638                     (target < (funcReturns[i]->addr + funcReturns[i]->size))) {
1639                     if ((adr < funcReturns[i]->addr)||
1640                         (adr > (funcReturns[i]->addr + funcReturns[i]->size))){
1641                         return false;
1642                 } }
1643             }
1644         }
1645     }
1646
1647     // if there is a retl instruction and we don't think this is a leaf
1648     // function then this is a way messed up function...well, at least we
1649     // we can't deal with this...the only example I can find is _cerror
1650     // and _cerror64 in libc.so.1
1651     if(retl_inst && !noStackFrame){
1652          return false;
1653     }
1654
1655     // check that no instrumentation points could overlap
1656     Address func_entry = funcEntry_->addr + funcEntry_->size; 
1657     for (u_int i = 0; i < funcReturns.size(); i++) {
1658         if(func_entry >= funcReturns[i]->addr){
1659            return false;
1660         }
1661         if(i >= 1){ // check if return points overlap
1662             Address prev_exit = funcReturns[i-1]->addr+funcReturns[i-1]->size;  
1663             if(funcReturns[i]->addr < prev_exit) {
1664                 return false;
1665             } 
1666         }
1667     }
1668
1669     return true;        
1670 }
1671
1672
1673 /* The maximum length of relocatable function is 1k instructions */  
1674 // This function is to find the inst Points for a function
1675 // that will be relocated if it is instrumented. 
1676 bool pd_Function::findInstPoints(const image *owner, Address newAdr, process*){
1677
1678    int i;
1679    if (size() == 0) {
1680      return false;
1681    }
1682    relocatable_ = true;
1683
1684    Address adr = getAddress(0);
1685    instruction instr;
1686    instr.raw = owner->get_instruction(adr);
1687    if (!IS_VALID_INSN(instr))
1688      return false;
1689    
1690    if (size() <= 3*sizeof(instruction)) 
1691        return false;
1692
1693    instPoint *point = new instPoint(this, instr, owner, newAdr, true, 
1694                                     functionEntry, adr);
1695
1696    funcEntry_ = point;
1697
1698    // if the second instruction in a relocated function is a call instruction
1699    // or a branch instruction, then we can't deal with this 
1700    if(size() > sizeof(instruction)){
1701        Address second_adr = adr + sizeof(instruction);
1702        instruction second_instr;
1703        second_instr.raw =  owner->get_instruction(second_adr); 
1704        if ((isCallInsn(second_instr)) || 
1705                       (second_instr.branch.op == 0 && 
1706                       (second_instr.branch.op2 == 2 || 
1707                       second_instr.branch.op2 == 6))) {
1708            return false;
1709        }
1710    }
1711    
1712    assert(funcEntry_);
1713    int retId = 0;
1714    int callsId = 0; 
1715
1716    for (i = 0; adr < getAddress(0) + size(); adr += sizeof(instruction),  
1717         newAdr += sizeof(instruction), i++) {
1718
1719      instr.raw = owner->get_instruction(adr);
1720      newInstr[i] = instr;
1721      bool done;
1722
1723      // check for return insn and as a side affect decide if we are at the
1724      //   end of the function.
1725      if (isReturnInsn(owner, adr, done)) {
1726        // define the return point
1727        instPoint *point = new instPoint(this, instr, owner, newAdr, false, 
1728                                         functionExit, adr);
1729        funcReturns += point;
1730        funcReturns[retId] -> instId = retId++;
1731      } else if (instr.branch.op == 0 
1732                 && (instr.branch.op2 == 2 || instr.branch.op2 == 6)) {
1733        // find if this branch is going out of the function
1734        int disp = instr.branch.disp22;
1735        Address target = adr + (disp << 2);
1736        if (target < getAddress(0) || target >= getAddress(0) + size()) {
1737            instPoint *point = new instPoint(this, newInstr[i], owner, 
1738                                             newAdr, false, 
1739                                             functionExit, adr);
1740            if ((instr.branch.cond != 0) && (instr.branch.cond != 8)) {  
1741                point->isBranchOut = true;
1742                point->branchTarget = target;
1743            }
1744            funcReturns += point;
1745            funcReturns[retId] -> instId = retId++;
1746        }
1747
1748      } else if (isCallInsn(instr)) {
1749
1750        // first, check for tail-call optimization: a call where the instruction 
1751        // in the delay slot write to register %o7(15), usually just moving
1752        // the caller's return address, or doing a restore
1753        // Tail calls are instrumented as return points, not call points.
1754        instruction nexti; 
1755        nexti.raw = owner->get_instruction(adr+4);
1756
1757        if (nexti.rest.op == 2 
1758            && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
1759               || nexti.rest.op3 == RESTOREop3)) {
1760
1761            instPoint *point = new instPoint(this, instr, owner, newAdr, false,
1762                                       functionExit, adr);
1763            funcReturns += point;
1764            funcReturns[retId] -> instId = retId++;
1765
1766        } else {
1767          // if this is a call instr to a location within the function, and if 
1768          // the offest is not 8 then do not define this function 
1769          if(instr.call.op == CALLop){ 
1770            Address call_target = adr + (instr.call.disp30 << 2);
1771            if((call_target >= getAddress(0)) 
1772               && (call_target <= (getAddress(0)+size()))){ 
1773               if((instr.call.disp30 << 2) != 2*sizeof(instruction)) {
1774                 return false;
1775               }
1776            }
1777          }
1778          // define a call point
1779          // this may update address - sparc - aggregate return value
1780          // want to skip instructions
1781          bool err;
1782          instPoint *blah = 0;
1783          adr = newCallPoint(newAdr, instr, owner, err, callsId, adr,0,blah);
1784          if (err)
1785            return false;
1786        }
1787      }
1788
1789      else if (isInsnType(instr, JMPLmask, JMPLmatch)) {
1790        /* A register indirect jump. Some jumps may exit the function 
1791           (e.g. read/write on SunOS). In general, the only way to 
1792           know if a jump is exiting the function is to instrument
1793           the jump to test if the target is outside the current 
1794           function. Instead of doing this, we just check the 
1795           previous two instructions, to see if they are loading
1796           an address that is out of the current function.
1797           This should catch the most common cases (e.g. read/write).
1798           For other cases, we would miss a return point.
1799
1800           This is the case considered:
1801
1802              sethi addr_hi, r
1803              or addr_lo, r, r
1804              jump r
1805         */
1806
1807          reg jumpreg = instr.rest.rs1;
1808          instruction prev1;
1809          instruction prev2;
1810          
1811          prev1.raw = owner->get_instruction(adr-4);
1812          prev2.raw = owner->get_instruction(adr-8);
1813
1814          unsigned targetAddr;
1815
1816          if (instr.rest.rd == 0 && (instr.rest.i == 1 || instr.rest.rs2 == 0)
1817              && prev2.sethi.op == FMT2op && prev2.sethi.op2 == SETHIop2 
1818              && prev2.sethi.rd == (unsigned)jumpreg
1819              && prev1.rest.op == RESTop 
1820              && prev1.rest.rd == (unsigned)jumpreg && prev1.rest.i == 1
1821              && prev1.rest.op3 == ORop3 && prev1.rest.rs1 == (unsigned)jumpreg){
1822              
1823              targetAddr = (prev2.sethi.imm22 << 10) & 0xfffffc00;
1824              targetAddr |= prev1.resti.simm13;
1825              if ((targetAddr < getAddress(0)) 
1826                  || (targetAddr >= (getAddress(0)+size()))) {
1827                  instPoint *point = new instPoint(this, instr, owner, 
1828                                                   newAdr, false,
1829                                                   functionExit, adr);
1830                  funcReturns += point;
1831                  funcReturns[retId] -> instId = retId++;
1832              }
1833          }
1834      }
1835  }
1836  return true;
1837 }
1838
1839 // This function assigns new address to instrumentation points of  
1840 // a function that has been relocated
1841 bool pd_Function::findNewInstPoints(const image *owner, 
1842                                 const instPoint *&location,
1843                                 Address newAdr,
1844                                 process *proc,
1845                                 vector<instruction> &callInstrs,
1846                                 relocatedFuncInfo *reloc_info) {
1847
1848    int i;
1849    if (size() == 0) {
1850      return false;
1851    }
1852    assert(reloc_info);
1853
1854    Address adr = getAddress(0);
1855    instruction instr;
1856    instr.raw = owner->get_instruction(adr);
1857    if (!IS_VALID_INSN(instr))
1858      return false;
1859
1860    instPoint *point = new instPoint(this, instr, owner, newAdr, true, 
1861                                     functionEntry, adr);
1862    point->relocated_ = true;
1863    // if location was the entry point then change location's value to new pt
1864    if(location == funcEntry_) { 
1865         location = point;
1866    }
1867
1868    reloc_info->addFuncEntry(point);
1869    assert(reloc_info->funcEntry());
1870    int retId = 0;
1871    int callsId = 0; 
1872
1873    // get baseAddress if this is a shared object
1874    Address baseAddress = 0;
1875    if(!(proc->getBaseAddress(owner,baseAddress))){
1876         baseAddress =0;
1877    }
1878
1879    // if we have call instructions that need to be added after the instrs
1880    // to call the relocated instruction, the first address we can use is
1881    // the address of the 4th instruction in the function
1882    Address call_start_addr = getAddress(0)+baseAddress + 3*sizeof(instruction);
1883
1884    for (i = 0; adr < getAddress(0) + size(); adr += 4,  newAdr += 4, i++) {
1885     
1886      instr.raw = owner->get_instruction(adr);
1887      newInstr[i] = instr;
1888
1889      bool done;
1890
1891      // check for return insn and as a side affect decide if we are at the
1892      //   end of the function.
1893      if (isReturnInsn(owner, adr, done)) {
1894        // define the return point
1895        instPoint *point = new instPoint(this, instr, owner, newAdr, false, 
1896                                         functionExit, adr);
1897        point->relocated_ = true;
1898        // if location was this point, change it to new point
1899        if(location == funcReturns[retId]) { 
1900            location = point;
1901        }
1902        retId++;
1903        reloc_info->addFuncReturn(point);
1904      } else if (instr.branch.op == 0 
1905                 && (instr.branch.op2 == 2 || instr.branch.op2 == 6)) {
1906        // find if this branch is going out of the function
1907        int disp = instr.branch.disp22;
1908        Address target = adr + baseAddress + (disp << 2);
1909
1910        // getAddress(0) gives the addr of the fn before it's relocated
1911        if ((target < (getAddress(0) + baseAddress)) 
1912            || (target >= (getAddress(0) + baseAddress + size()))) {
1913            // the original branch went out of the function...
1914
1915            relocateInstruction(&newInstr[i],adr+baseAddress,newAdr,proc);
1916            instPoint *point = new instPoint(this, newInstr[i], owner, 
1917                                             newAdr, false, 
1918                                             functionExit, adr);
1919            point->relocated_ = true;
1920            disp = newInstr[i].branch.disp22;
1921            if ((instr.branch.cond != 0) && (instr.branch.cond != 8)) {  
1922                point->isBranchOut = true;
1923                point->branchTarget = adr + (disp<<2);
1924            }
1925            // if location was this point, change it to new point
1926            if(location == funcReturns[retId]) { 
1927                location = point;
1928            }
1929            retId++;
1930            reloc_info->addFuncReturn(point);
1931        }
1932
1933      } else if (isCallInsn(instr)) {
1934
1935        // first, check for tail-call optimization: a call where the instruction 
1936        // in the delay slot write to register %o7(15), usually just moving
1937        // the caller's return address, or doing a restore
1938        // Tail calls are instrumented as return points, not call points.
1939        instruction nexti; 
1940        nexti.raw = owner->get_instruction(adr+4);
1941
1942        if (nexti.rest.op == 2 
1943            && ((nexti.rest.op3 == ORop3 && nexti.rest.rd == 15)
1944               || nexti.rest.op3 == RESTOREop3)) {
1945
1946             // Undoing the tail-call optimazation when the function
1947             // is relocated. Here is an example:
1948             //   before:          --->             after
1949             // ---------------------------------------------------
1950             //   call  %g1                        restore    
1951             //   restore                          st  %i0, [ %fp + 0x44 ]
1952             //                                    mov %o7 %i0
1953             //                                    call %g1 
1954             //                                    nop
1955             //                                    mov %i0,%o7
1956             //                                    st  [ %fp + 0x44 ], %i0
1957             //                              retl
1958             //                                    nop
1959             // Q: Here the assumption that register i1 is available 
1960             //    might be an question, is it?
1961             // A: I think it is appropriate because:
1962             //      (in situation A calls B and B calls C)
1963             //      The procedure C called via tail call is a leaf 
1964             //      procedure, the value arguments and return value between
1965             //      A and C are passed by register (o1...o5, o7)
1966             //      So even If B mess up the value of i0, it won't affect the
1967             //      commnucation between A and C. Also, we saved the value of
1968             //      i0 on stack and when we return from B, the value of i0
1969             //      won't be affected.
1970             //      If C is not a leaf procedure, it should be fine
1971             //      as it is.
1972             //    ( If you could give an counter-example, please
1973             //      let me know.                         --ling )
1974
1975             genSimpleInsn(&newInstr[i++], RESTOREop3, 0, 0, 0);
1976             generateStore(&newInstr[i++], 24, REG_FP, 0x44); 
1977             genImmInsn(&newInstr[i++], ORop3, 15, 0, 24); 
1978             newInstr[i++].raw = owner->get_instruction(adr);
1979             generateNOOP(&newInstr[i++]);
1980             genImmInsn(&newInstr[i++], ORop3, 24, 0, 15);
1981             generateLoad(&newInstr[i++], REG_FP, 0x44, 24);         
1982             generateJmplInsn(&newInstr[i++], 15, 8 ,0);
1983             newAdr += 28;
1984             generateNOOP(&newInstr[i]);
1985             instPoint *point = new instPoint(this, instr, owner, newAdr, false,
1986                                       functionExit, adr);
1987             point-> originalInstruction = newInstr[i-1];
1988             point-> delaySlotInsn = newInstr[i];
1989             point-> isDelayed = true;
1990             point->relocated_ = true;
1991             // if location was this point, change it to new point
1992             if(location == funcReturns[retId]) { 
1993                 location = point;
1994             }
1995             retId++;
1996             reloc_info->addFuncReturn(point);
1997        } else {
1998          // if the second instruction in the function is a call instruction
1999          // then this cannot go in the delay slot of the branch to the
2000          // base tramp, so add a noop between first and second instructions
2001          // in the relocated function (check out write in libc.so.1 for
2002          // and example of this):
2003          //
2004          //     save  %sp, -96, %sp             brach to base tramp
2005          //     call  0x73b70                   nop
2006          //                                     call 0x73b70
2007          if(adr == (getAddress(0)+4)){
2008              newInstr[i+1] = instr;
2009              generateNOOP(&newInstr[i]);
2010              i++;
2011              newAdr += 4;
2012          }
2013
2014          // if this is a call to an address within the same function, then
2015          // we need to set the 07 register to have the same value as it
2016          // would before the function was relocated
2017          // to do this we generate a call instruction back to the original
2018          // function location, and then at this location we generate a call 
2019          // instruction back to the relocated instruction.  In the delay 
2020          // slot of the second instruction the value of 07 is changed by 
2021          // the difference between the origninal call instruction, and 
2022          // the location of the call instruction back to the relocated
2023          // function.  This way the 07 register will contain the address
2024          // of the original call instruction
2025          Address call_target = adr + (instr.call.disp30 << 2);
2026          if((call_target >= getAddress(0)) 
2027                 && (call_target <= (getAddress(0) + size()))){ 
2028             assert((newInstr[i].call.disp30 << 2) == 8);
2029
2030             // generating call instruction to orginal function address
2031             // after the SAVE call RESTORE instr.s that call the relocated
2032             // function 
2033             newInstr[i].call.disp30 = ((call_start_addr -newAdr) >> 2); 
2034
2035             // generate call to relocated function from original function 
2036             // (this will get almost correct value for register 07)
2037             instruction new_inst;
2038             generateCallInsn(&new_inst,call_start_addr,
2039                              newAdr+sizeof(instruction));
2040             callInstrs += new_inst;
2041            
2042             // generate add isntruction to get correct value for 07 register 
2043             // this will go in delay slot of previous call instr.
2044             genImmInsn(&new_inst,ADDop3,REG_O7,
2045                        (adr+baseAddress-call_start_addr),REG_O7);
2046             callInstrs += new_inst;
2047             call_start_addr += 2*sizeof(instruction);
2048          }
2049          else {
2050             // otherwise, this is a call instruction to a location
2051             // outside the function
2052             bool err;
2053             relocateInstruction(&newInstr[i],adr+baseAddress,newAdr,proc);
2054             (void)newCallPoint(newAdr, newInstr[i], owner, err, 
2055                                callsId, adr,reloc_info,location);
2056             if (err) return false;
2057          }
2058        }
2059      }
2060
2061      else if (isInsnType(instr, JMPLmask, JMPLmatch)) {
2062        /* A register indirect jump. Some jumps may exit the function 
2063           (e.g. read/write on SunOS). In general, the only way to 
2064           know if a jump is exiting the function is to instrument
2065           the jump to test if the target is outside the current 
2066           function. Instead of doing this, we just check the 
2067           previous two instructions, to see if they are loading
2068           an address that is out of the current function.
2069           This should catch the most common cases (e.g. read/write).
2070           For other cases, we would miss a return point.
2071
2072           This is the case considered:
2073
2074              sethi addr_hi, r
2075              or addr_lo, r, r
2076              jump r
2077         */
2078
2079          reg jumpreg = instr.rest.rs1;
2080          instruction prev1;
2081          instruction prev2;
2082          
2083          prev1.raw = owner->get_instruction(adr-4);
2084          prev2.raw = owner->get_instruction(adr-8);
2085
2086          unsigned targetAddr;
2087
2088          if (instr.rest.rd == 0 && (instr.rest.i == 1 || instr.rest.rs2 == 0)
2089              && prev2.sethi.op == FMT2op && prev2.sethi.op2 == SETHIop2 
2090              && prev2.sethi.rd == (unsigned)jumpreg
2091              && prev1.rest.op == RESTop 
2092              && prev1.rest.rd == (unsigned)jumpreg && prev1.rest.i == 1
2093              && prev1.rest.op3 == ORop3 && prev1.rest.rs1 == (unsigned)jumpreg){
2094              
2095              targetAddr = (prev2.sethi.imm22 << 10) & 0xfffffc00;
2096              targetAddr |= prev1.resti.simm13;
2097              if ((targetAddr < getAddress(0)) 
2098                  || (targetAddr >= (getAddress(0)+size()))) {
2099                  instPoint *point = new instPoint(this, instr, owner, 
2100                                                   newAdr, false,
2101                                                   functionExit, adr);
2102                  point->relocated_ = true;
2103                  // if location was this point, change it to new point
2104                  if(location == funcReturns[retId]) { 
2105                      location = point;
2106                  }
2107                  retId++;
2108                  reloc_info->addFuncReturn(point);
2109              }
2110          }
2111      }
2112  }
2113    
2114    return true;
2115 }