2 * Copyright (c) 1996 Barton P. Miller
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance. We reserve the right to update, modify,
7 * or discontinue this software at any time. We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
11 * This license is for research uses. For such uses, there is no
12 * charge. We define "research use" to mean you may freely use it
13 * inside your organization for whatever purposes you see fit. But you
14 * may not re-distribute Paradyn or parts of Paradyn, in any form
15 * source or binary (including derivatives), electronic or otherwise,
16 * to any other organization or entity without our permission.
18 * (for other uses, please contact us at paradyn@cs.wisc.edu)
20 * All warranties, including without limitation, any warranty of
21 * merchantability or fitness for a particular purpose, are hereby
24 * By your use of Paradyn, you understand and agree that we (or any
25 * other person or entity with proprietary rights in Paradyn) are
26 * under no obligation to provide either maintenance services,
27 * update services, notices of latent defects, or correction of
28 * defects for Paradyn.
30 * Even if advised of the possibility of such damages, under no
31 * circumstances shall we (or any other person or entity with
32 * proprietary rights in the software licensed hereunder) be liable
33 * to you or any third party for direct, indirect, or consequential
34 * damages of any character regardless of type of action, including,
35 * without limitation, loss of profits, loss of use, loss of good
36 * will, or computer failure or malfunction. You agree to indemnify
37 * us (and any other person or entity with proprietary rights in the
38 * software licensed hereunder) for any and all liability it may
39 * incur to third parties resulting from your use of Paradyn.
42 // $Id: inst-sparc-solaris.C,v 1.93 2001/09/07 21:15:08 tikir Exp $
44 #include "dyninstAPI/src/inst-sparc.h"
45 #include "dyninstAPI/src/instPoint.h"
46 #include "common/h/debugOstream.h"
48 // Needed for function relocation
49 #include "dyninstAPI/src/func-reloc.h"
51 #include <sys/utsname.h>
54 extern bool relocateFunction(process *proc, instPoint *&location);
55 extern bool branchInsideRange(instruction insn, Address branchAddress,
56 Address firstAddress, Address lastAddress);
57 extern instPoint* find_overlap(vector<instPoint*> v, Address targetAddress);
58 extern void sorted_ips_vector(vector<instPoint*>&fill_in);
60 /****************************************************************************/
61 /****************************************************************************/
62 /****************************************************************************/
64 // static unsigned pfdp_to_pfdp_hash(pd_Function * const &f) {
65 // pd_Function *pdf = f;
66 // unsigned l = (unsigned)pdf;
67 // return addrHash4(l);
70 /****************************************************************************/
71 /****************************************************************************/
72 /****************************************************************************/
74 // Another constructor for the class instPoint. This one is called
75 // for the define the instPoints for regular functions which means
76 // multiple instructions is going to be moved to based trampoline.
77 // Since we will use the instruction CALL to branch to the base
78 // tramp(so it doesn't have any code size restriction), things are
79 // a little more complicated because instruction CALL changes the
80 // value in the link register.
81 instPoint::instPoint(pd_Function *f, const instruction &instr,
82 const image *owner, Address &adr,
84 instPointType pointType)
85 : insnAddr(adr), addr(adr), originalInstruction(instr),
86 inDelaySlot(false), isDelayed(false),
87 callIndirect(false), callAggregate(false), callee(NULL),
89 ipType(pointType), image_ptr(owner), firstIsConditional(false),
90 relocated_(false), isLongJump(false)
96 // When the function has a stack frame
97 if (!this->hasNoStackFrame()) {
99 // we will treat the first instruction after the SAVE instruction
100 // in the nonleaf procedure as the function entry.
101 if (ipType == functionEntry) {
103 assert(isInsnType(instr, SAVEmask, SAVEmatch));
104 saveInsn.raw = owner->get_instruction(addr);
106 originalInstruction.raw = owner->get_instruction(addr);
107 delaySlotInsn.raw = owner->get_instruction(addr+4);
108 size += 2*sizeof(instruction);
110 // If the second instruction is DCTI, we need to move the
111 // the instruction in the delayed slot.
112 if (IS_DELAYED_INST(delaySlotInsn)) {
114 isDelayedInsn.raw = owner->get_instruction(addr+8);
115 size += 1*sizeof(instruction);
117 // Life is Hard. If the second instruction is actually
118 // an CALL instruction, we need to move the instruction
119 // after the instruction in the delayed slot if the
120 // return value of this function is a aggregate value.
121 aggregateInsn.raw = owner->get_instruction(addr+12);
122 if (isCallInsn(delaySlotInsn)) {
123 if (!IS_VALID_INSN(aggregateInsn) && aggregateInsn.raw != 0) {
124 callAggregate = true;
125 size += 1*sizeof(instruction);
130 // The following are easier.
131 } else if (ipType == callSite) {
132 delaySlotInsn.raw = owner->get_instruction(addr+4);
133 size += 2*sizeof(instruction);
135 aggregateInsn.raw = owner->get_instruction(addr+8);
136 if (!IS_VALID_INSN(aggregateInsn) && aggregateInsn.raw != 0) {
137 callAggregate = true;
138 size += 1*sizeof(instruction);
141 delaySlotInsn.raw = owner->get_instruction(addr+4);
142 size += 2*sizeof(instruction);
146 // When the function is a leaf function
149 // For the leaf procedure, there are no function calls in
150 // this procdure. So we don't need to consider the
151 // aggregate instuction.
152 if (ipType == functionEntry) {
154 otherInstruction.raw = owner->get_instruction(addr+4);
155 delaySlotInsn.raw = owner->get_instruction(addr+8);
156 size += 2*sizeof(instruction);
158 if (IS_DELAYED_INST(delaySlotInsn)) {
160 isDelayedInsn.raw = owner->get_instruction(addr+12);
161 size += 2*sizeof(instruction);
164 } else if (ipType == functionExit) {
168 if (owner->isValidAddress(addr-4)) {
170 iplus1.raw = owner->get_instruction(addr-4);
171 if (IS_DELAYED_INST(iplus1) && !delayOK) {
174 size += 1*sizeof(instruction);
175 if(isCondBranch(iplus1)){
176 instruction previous_inst;
177 previous_inst.raw = owner->get_instruction(addr-4);
178 firstIsConditional = true;
179 addr -= sizeof(instruction);
180 size += 1*sizeof(instruction);
185 originalInstruction.raw = owner->get_instruction(addr);
186 otherInstruction.raw = owner->get_instruction(addr+4);
187 delaySlotInsn.raw = owner->get_instruction(addr+8);
188 size += 3*sizeof(instruction);
191 inDelaySlotInsn.raw = owner->get_instruction(addr+12);
192 if(firstIsConditional) {
193 extraInsn.raw = owner->get_instruction(addr+16);
196 } else if(ipType == otherPoint) {
197 delaySlotInsn.raw = owner->get_instruction(addr+4);
198 size += 2*sizeof(instruction);
200 assert(ipType == callSite);
201 // Usually, a function without a stack frame won't have any call sites
202 //cerr << "inst-sparc-solaris.C WARNING: found a leaf fn (no stack frame)"
203 // << "which makes a function call : " << func->prettyName()
204 // << " at address " << adr << endl;
206 // Actually - that is incorrect. It confuses a leaf function
207 // (one without a stack frame of its own) with a function which
208 // does not make any calls). It is possible for a function without
209 // a stack frae to make calls in the case of e.g. tail-call
210 // optimization (in this case, the function could end with
211 // e.g. jmp, nop)....
212 delaySlotInsn.raw = owner->get_instruction(addr+4);
213 size += 2*sizeof(instruction);
215 aggregateInsn.raw = owner->get_instruction(addr+8);
216 if (!IS_VALID_INSN(aggregateInsn) && aggregateInsn.raw != 0) {
217 callAggregate = true;
218 size += 1*sizeof(instruction);
223 // return the address in the code segment after this instruction
224 // sequence. (there's a -1 here because one will be added up later in
225 // the function findInstPoints)
226 adr = addr + (size - 1*sizeof(instruction));
229 /****************************************************************************/
230 /****************************************************************************/
231 /****************************************************************************/
233 // return the instruction after originalInstruction....
234 const instruction instPoint::insnAfterPoint() const {
235 if (this->hasNoStackFrame()) {
238 return otherInstruction;
241 return delaySlotInsn;
244 return otherInstruction;
252 return delaySlotInsn;
255 return delaySlotInsn;
258 return delaySlotInsn;
264 // should never be reached....
266 // prevent warning about lack of return value....
267 return delaySlotInsn;
270 /****************************************************************************/
271 /****************************************************************************/
272 /****************************************************************************/
274 void AstNode::sysFlag(instPoint *location)
276 if (location -> ipType == functionEntry) {
277 astFlag = (location -> isLongJump)? false:true;
278 } else if (location -> ipType == functionExit) {
279 astFlag = location -> hasNoStackFrame(); // formerly "isLeaf()"
284 loperand->sysFlag(location);
286 roperand->sysFlag(location);
288 for (unsigned u = 0; u < operands.size(); u++) {
289 operands[u]->sysFlag(location);
293 /****************************************************************************/
294 /****************************************************************************/
295 /****************************************************************************/
297 // Determine if the called function is a "library" function or a "user" function
298 // This cannot be done until all of the functions have been seen, verified, and
301 void pd_Function::checkCallPoints() {
305 //cerr << "pd_Function:: checkCallPoints called, *this = " << *this;
307 vector<instPoint*> non_lib;
309 for (unsigned i=0; i<calls.size(); ++i) {
310 /* check to see where we are calling */
314 if (isInsnType(p->originalInstruction, CALLmask, CALLmatch)) {
315 //cerr << " isIsinType TRUE" << endl;
317 loc_addr = p->addr + (p->originalInstruction.call.disp30 << 2);
318 pd_Function *pdf = (file_->exec())->findFuncByAddr(loc_addr);
321 non_lib.push_back(p);
322 //cerr << " pdf (called func?) non-NULL = " << *pdf;
324 //cerr << " pdf (called func) NULL" << endl;
325 // if this is a call outside the fuction, keep it
326 if((loc_addr < getAddress(0))||(loc_addr > (getAddress(0)+size()))){
327 //cerr << " apparent call outside function, adding p to non_lib"
329 p->callIndirect = true;
331 non_lib.push_back(p);
334 //cerr << " apparent call inside function, deleting p" << endl;
339 //cerr << " isIsinType FALSE, assuming call to unnamed user function" << endl;
340 // Indirect call -- be conservative, assume it is a call to
341 // an unnamed user function
342 assert(!p->callee); assert(p->callIndirect);
344 non_lib.push_back(p);
350 /****************************************************************************/
351 /****************************************************************************/
352 /****************************************************************************/
354 // TODO we cannot find the called function by address at this point in time
355 // because the called function may not have been seen.
356 // reloc_info is 0 if the function is not currently being relocated
357 // Note that this may be called even when instr is NOT a CAlL inst, e.g.
358 // in the case of a jmp instruction where paradynd/dyninstAPI knows
359 // (really guesses) that control flow will return to the point following
361 // to mark the jmp as a call to preserve its logical structure of
362 // synchronous call + return (which is violated by tail-call optimization -
363 // including a function (w/o stack frame) which ends w/ jmp, nop....
364 Address pd_Function::newCallPoint(Address &adr, const instruction instr,
365 const image * /*owner*/, bool &err,
366 unsigned &callId, Address &/*oldAddr*/,
367 relocatedFuncInfo *reloc_info,
369 const instPoint *&location)
374 #ifdef DEBUG_CALL_POINTS
375 cerr << "pd_Function::newCallPoint called " << endl;
376 cerr << " this " << *this << endl;
377 cerr << " adr = " << adr << endl;
378 cerr << " isTrap = " << isTrap << endl;
379 cerr << " reloc_info = " << reloc_info << endl;
386 point = new instPoint(this, instr, owner, adr, false, callSite, oldAddr);
388 point = new instPoint(this, instr, owner, adr, false, callSite);
390 //point = new instPoint(this, instr, owner, adr, false, callSite);
393 if (!isInsnType(instr, CALLmask, CALLmatch)) {
394 point->callIndirect = true;
395 point->callee = NULL;
397 point->callIndirect = false;
402 calls.push_back(point);
403 calls[callId] -> instId = callId; callId++;
405 // calls to a location within the function are not
406 // kept in the calls vector
408 #ifdef DEBUG_CALL_POINTS
409 cerr << " *this = " << *this;
410 cerr << " callId = " << callId;
411 cerr << " (u_int)callId = " << (u_int)callId;
412 cerr << " calls.size() = " << calls.size() << endl;
413 cerr << " calls = " << endl;
414 for(unsigned un=0;un<calls.size();un++) {
415 cerr << calls[un] << " , ";
420 point->relocated_ = true;
423 // cannot simply assert that this is true, because of the case
424 // where (as a hack), the call site in a tail-call optimization
425 // might not have been previously seen....
426 assert((callId) < calls.size());
428 if(location && (calls[callId] == location)) {
429 assert(calls[callId]->instId == location->instId);
433 point->instId = callId++;
434 reloc_info->addFuncCall(point);
438 calls.push_back(point);
441 point->relocated_ = true;
442 reloc_info->addFuncCall(point);
449 /****************************************************************************/
450 /****************************************************************************/
451 /****************************************************************************/
454 * Given an instruction, relocate it to a new address, patching up
455 * any relative addressing that is present.
458 void relocateInstruction(instruction*& insn,
459 Address origAddr, Address& targetAddr, process *proc)
463 // If the instruction is a CALL instruction, calculate the new
465 if (isInsnType(*insn, CALLmask, CALLmatch)) {
467 newOffset = origAddr - targetAddr + (insn->call.disp30 << 2);
468 insn->call.disp30 = newOffset >> 2;
470 } else if (isInsnType(*insn, BRNCHmask, BRNCHmatch)||
471 isInsnType(*insn, FBRNCHmask, FBRNCHmatch)) {
473 // If the instruction is a Branch instruction, calculate the
474 // new offset. If the new offset is out of reach after the
475 // instruction is moved to the base Trampoline, we would do
477 // b address ...... address: save
480 newOffset = origAddr - targetAddr + (insn->branch.disp22 << 2);
482 // if the branch is too far, then allocate more space in inferior
483 // heap for a call instruction to branch target. The base tramp
484 // will branch to this new inferior heap code, which will call the
485 // target of the branch
486 if (!offsetWithinRangeOfBranchInsn(newOffset)) {
487 // if (ABS(newOffset) > getMaxBranch1Insn()) {
488 int ret = inferiorMalloc(proc,3*sizeof(instruction), textHeap,targetAddr);
490 u_int old_offset = insn->branch.disp22 << 2;
491 insn->branch.disp22 = (ret - targetAddr)>>2;
492 instruction insnPlus[3];
493 genImmInsn(insnPlus, SAVEop3, REG_SPTR, -112, REG_SPTR);
494 generateCallInsn(insnPlus+1, ret+sizeof(instruction),
495 origAddr+old_offset);
496 genSimpleInsn(insnPlus+2, RESTOREop3, 0, 0, 0);
497 proc->writeDataSpace((caddr_t)ret, sizeof(insnPlus),
500 insn->branch.disp22 = newOffset >> 2;
502 } else if (isInsnType(*insn, TRAPmask, TRAPmatch)) {
503 // There should be no probelm for moving trap instruction
504 // logLine("attempt to relocate trap\n");
506 /* The rest of the instructions should be fine as is */
509 /****************************************************************************/
510 /****************************************************************************/
511 /****************************************************************************/
513 void generate_base_tramp_recursive_guard_code( process & p,
516 NonRecursiveTrampTemplate & templ )
518 /* prepare guard flag memory, if needed */
519 Address guard_flag_address = p.getTrampGuardFlagAddr();
520 if( guard_flag_address == 0 )
522 int initial_value = 1;
523 guard_flag_address = inferiorMalloc( & p, sizeof( int ), dataHeap );
524 /* initialize the new value */
525 p.writeDataSpace( ( void * )guard_flag_address, sizeof( int ), & initial_value );
527 p.setTrampGuardFlagAddr( guard_flag_address );
530 instruction * curr_instr;
533 /* fill the 'guard on' pre-instruction instrumentation */
534 curr_instr = code + templ.guardOnPre_beginOffset / sizeof( instruction );
535 curr_addr = base_addr + templ.guardOnPre_beginOffset;
536 generateSetHi( curr_instr, guard_flag_address, REG_L(0) );
537 curr_instr++; curr_addr += sizeof( instruction );
538 genSimpleInsn( curr_instr, ADDop3, REG_G(0), REG_G(0), REG_L(1) );
539 curr_instr++; curr_addr += sizeof( instruction );
540 generateLoad( curr_instr, REG_L(0), LOW10( guard_flag_address ), REG_L(2) );
541 curr_instr++; curr_addr += sizeof( instruction );
542 generateStore( curr_instr, REG_L(1), REG_L(0), LOW10( guard_flag_address ) );
543 curr_instr++; curr_addr += sizeof( instruction );
544 genSimpleInsn( curr_instr, SUBop3cc, REG_L(2), REG_G(0), REG_G(0) );
545 curr_instr++; curr_addr += sizeof( instruction );
546 int branch_offset_in_bytes =
547 ( base_addr + templ.guardOffPre_endOffset )
551 genBranch( curr_instr,
552 branch_offset_in_bytes,
555 curr_instr++; curr_addr += sizeof( instruction );
556 generateNOOP ( curr_instr );
558 /* fill the 'guard off' pre-instruction instrumentation */
559 curr_instr = code + templ.guardOffPre_beginOffset / sizeof( instruction );
560 curr_addr = base_addr + templ.guardOffPre_beginOffset;
561 generateSetHi( curr_instr, guard_flag_address, REG_L(0) );
562 curr_instr++; curr_addr += sizeof( instruction );
563 genImmInsn( curr_instr, ADDop3, REG_G(0), 1, REG_L(1) );
564 curr_instr++; curr_addr += sizeof( instruction );
565 generateStore( curr_instr, REG_L(1), REG_L(0), LOW10( guard_flag_address ) );
567 /* fill the 'guard on' post-instruction instrumentation */
568 curr_instr = code + templ.guardOnPost_beginOffset / sizeof( instruction );
569 curr_addr = base_addr + templ.guardOnPost_beginOffset;
570 generateSetHi( curr_instr, guard_flag_address, REG_L(0) );
571 curr_instr++; curr_addr += sizeof( instruction );
572 genSimpleInsn( curr_instr, ADDop3, REG_G(0), REG_G(0), REG_L(1) );
573 curr_instr++; curr_addr += sizeof( instruction );
574 generateLoad( curr_instr, REG_L(0), LOW10( guard_flag_address ), REG_L(2) );
575 curr_instr++; curr_addr += sizeof( instruction );
576 generateStore( curr_instr, REG_L(1), REG_L(0), LOW10( guard_flag_address ) );
577 curr_instr++; curr_addr += sizeof( instruction );
578 genSimpleInsn( curr_instr, SUBop3cc, REG_L(2), REG_G(0), REG_G(0) );
579 curr_instr++; curr_addr += sizeof( instruction );
580 branch_offset_in_bytes =
581 ( base_addr + templ.guardOffPost_endOffset )
585 genBranch( curr_instr,
586 branch_offset_in_bytes,
589 curr_instr++; curr_addr += sizeof( instruction );
590 generateNOOP ( curr_instr );
592 /* fill the 'guard off' post-instruction instrumentation */
593 curr_instr = code + templ.guardOffPost_beginOffset / sizeof( instruction );
594 curr_addr = base_addr + templ.guardOffPost_beginOffset;
595 generateSetHi( curr_instr, guard_flag_address, REG_L(0) );
596 curr_instr++; curr_addr += sizeof( instruction );
597 genImmInsn( curr_instr, ADDop3, REG_G(0), 1, REG_L(1) );
598 curr_instr++; curr_addr += sizeof( instruction );
599 generateStore( curr_instr, REG_L(1), REG_L(0), LOW10( guard_flag_address ) );
602 /****************************************************************************/
603 /****************************************************************************/
604 /****************************************************************************/
607 * Install a base tramp -- fill calls with nop's for now.
609 * This one install the base tramp for the regular functions.
612 trampTemplate * installBaseTramp( instPoint * & location,
614 bool trampRecursiveDesired = false )
616 trampTemplate* current_template = &nonRecursiveBaseTemplate;
618 if(location->ipType == otherPoint)
619 current_template = &nonRecursiveConservativeBaseTemplate;
621 if( trampRecursiveDesired )
623 current_template = &baseTemplate;
625 if(location->ipType == otherPoint)
626 current_template = &conservativeBaseTemplate;
630 proc->getBaseAddress( location->image_ptr, ipAddr );
631 ipAddr += location->addr;
633 Address baseAddr = inferiorMalloc( proc, current_template->size, textHeap, ipAddr );
636 /* very conservative installation as o7 can be live at
637 this arbitrary inst point */
639 if((location->ipType == otherPoint) &&
640 location->func && location->func->is_o7_live() &&
641 !in1BranchInsnRange(ipAddr, baseAddr))
643 vector<addrVecType> pointsToCheck;
644 inferiorFree(proc,baseAddr,pointsToCheck);
648 instruction * code = new instruction[ current_template->size ];
651 memcpy( ( char * )code,
652 ( char * )current_template->trampTemp,
653 current_template->size );
657 for (temp = code, currAddr = baseAddr;
658 (currAddr - baseAddr) < (unsigned) current_template->size;
659 temp++, currAddr += sizeof(instruction)) {
661 if (temp->raw == EMULATE_INSN) {
663 // Load the value of link register from stack
664 // If no stack frame, genereate a RESTORE instruction
665 // since there's an instruction SAVE generated and put in the
667 if (location -> hasNoStackFrame()) {
669 Address baseAddress = 0;
670 proc->getBaseAddress(location->image_ptr,baseAddress);
671 baseAddress += location -> addr;
673 if (in1BranchInsnRange(baseAddress, baseAddr) == false) {
674 //cerr << "This happen very rarely, I suppose "<< endl;
675 //cerr << "Lets see if this is going to be executed..." << endl;
676 location -> isLongJump = true;
677 genImmInsn(temp, RESTOREop3, 0, 0, 0);
682 currAddr += sizeof(instruction);
684 // Same for the leaf and nonleaf functions.
685 // First, relocate the "FIRST instruction" in the sequence;
686 Address fromAddr = location->addr;
688 if (!(location -> hasNoStackFrame())) {
689 if (location -> ipType == functionEntry) {
690 *temp = location -> saveInsn;
692 currAddr += sizeof(instruction);
695 *temp = location->originalInstruction;
697 // compute the real from address if this instrumentation
698 // point is from a shared object image
699 Address baseAddress = 0;
700 if(proc->getBaseAddress(location->image_ptr,baseAddress)){
701 fromAddr += baseAddress;
704 // If the instruction is a call instruction to a location somewhere
705 // within the function, then the 07 regester must be saved and
706 // resored around the relocated call from the base tramp...the call
707 // instruction changes the value of 07 to be the PC value, and if
708 // we move the call instruction to the base tramp, its value will
709 // be incorrect when we use it in the function. We generate the
710 // following base tramp code:
711 // original delay slot instruction
713 // original call instruction
715 // This case should only occur for function entry points in
716 // functions from shared objects, and there should be no append
717 // trampolene code because the relocated call instruction will
718 // not return to the base tramp
719 if (isInsnType(*temp, CALLmask, CALLmatch)) {
720 Address offset = fromAddr + (temp->call.disp30 << 2);
721 if ((offset > (location->func->getAddress(0)+ baseAddress)) &&
722 (offset < ((location->func->getAddress(0)+ baseAddress)+
723 location->func->size()))) {
724 // offset > adr; "=" means recursive function which is allowed
725 // offset < adr + size; "=" does not apply to this case
727 // TODO: this assumes that the delay slot instruction is not
728 // a call instruction....is this okay?
730 // assume this situation only happens at function entry point
731 // for the shared library routine. And it is definately nees
733 assert(location -> ipType == functionEntry);
734 location -> isLongJump = true;
736 // In this situation, save instruction is discarded
738 assert(location->hasNoStackFrame() == false);
740 currAddr -= sizeof(instruction);
742 *temp = location->delaySlotInsn;
744 currAddr += sizeof(instruction);
745 genImmInsn(temp, SAVEop3, REG_SPTR, -112, REG_SPTR);
747 currAddr += sizeof(instruction);
748 *temp = location->originalInstruction;
749 relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
751 fromAddr += sizeof(instruction);
752 currAddr += sizeof(instruction);
753 genImmInsn(temp, RESTOREop3, 0, 0, 0);
758 relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
760 // Again, for leaf function, one more is needed to move for one
762 if (location->hasNoStackFrame()) {
763 // check to see if the otherInstruction is a call instruction
764 // to itself, if so then generate the following
765 // before after basetramp
766 // ------ ----- ---------
767 // mov originalInsn mov sethi
768 // call otherInsn call save
769 // sethi delaySlot nop call
771 // the idea is to not really relocate the originalInsn, and
772 // relocate only the call otherInsn and delaySlot instrn
773 // then do a save and restore around the relocated call to
774 // save the value of the o7 register from the call to base tramp
775 if (isInsnType(location->otherInstruction,CALLmask,CALLmatch)) {
776 *temp = location->otherInstruction;
777 fromAddr += sizeof(instruction);
778 Address offset = fromAddr + (temp->call.disp30 << 2);
779 if ((offset > (location->func->getAddress(0)+baseAddress)) &&
780 (offset < ((location->func->getAddress(0)+ baseAddress)+
781 location->func->size()))) {
782 location -> isLongJump = true;
783 // need to replace retore instr with nop
786 // relocate delaySlot instr
788 *temp = location->delaySlotInsn;
789 fromAddr += sizeof(instruction);
790 relocateInstruction(temp,fromAddr,currAddr,
793 currAddr += sizeof(instruction);
794 genImmInsn(temp, SAVEop3, REG_SPTR, -112, REG_SPTR);
796 // relocate the call instruction
798 currAddr += sizeof(instruction);
799 fromAddr -= sizeof(instruction);
800 *temp = location->otherInstruction;
801 relocateInstruction(temp,fromAddr,currAddr,
804 fromAddr += sizeof(instruction);
805 currAddr += sizeof(instruction);
806 genImmInsn(temp, RESTOREop3, 0, 0, 0);
811 // otherwise relocate the other instruction
812 fromAddr += sizeof(instruction);
813 currAddr += sizeof(instruction);
814 *++temp = location->otherInstruction;
815 relocateInstruction(temp, fromAddr, currAddr,
819 // Second, relocate the "NEXT instruction";
820 fromAddr += sizeof(instruction);
821 currAddr += sizeof(instruction);
822 *++temp = location->delaySlotInsn;
824 // if the NEXT instruction is a call instruction to a location
825 // within the function, then the 07 regester must be saved and
826 // resored around the relocated call from the base tramp...the call
827 // instruction changes the value of 07 to be the PC value, and if
828 // we move the call instruction to the base tramp, its value will
829 // be incorrect when we use it in the function. We generate:
831 // orignial relocated to base tramp
832 // -------- -----------------------
833 // save nop // SAVE added above, replace w/nop
834 // original insn original instruction // already relocated
835 // delaySlotInsn isDelayedInsn
836 // isDelayedInsn save
837 // delaySlotInsn (call with offset - 4)
839 // In the function, the call to the base tramp will have an
840 // additional add instruction to adjust the 07 register
841 // orignial relocated to base tramp
842 // -------- -----------------------
848 if (isInsnType(*temp, CALLmask, CALLmatch)) {
849 Address offset = fromAddr + (temp->call.disp30 << 2);
850 if ((offset > (location->func->getAddress(0)+ baseAddress)) &&
851 (offset < ((location->func->getAddress(0)+ baseAddress)+
852 location->func->size()))) {
859 location->isLongJump = true;
860 // assert(location->hasNoStackFrame() == false);
861 // assume that this is not a delayed instr.
862 *temp = location->isDelayedInsn;
864 currAddr += sizeof(instruction);
865 genImmInsn(temp, SAVEop3, REG_SPTR, -112, REG_SPTR);
867 currAddr += sizeof(instruction);
868 *temp = location->delaySlotInsn;
869 Address new_call_addr = fromAddr - sizeof(instruction);
870 relocateInstruction(temp,new_call_addr,currAddr,proc);
872 fromAddr += sizeof(instruction);
873 currAddr += sizeof(instruction);
874 genImmInsn(temp, RESTOREop3, 0, 0, 0);
879 // otherwise relocate the NEXT instruction
880 relocateInstruction(temp, fromAddr, currAddr,
883 // Third, if the "NEXT instruction" is a DCTI,
884 if (location->isDelayed) {
885 fromAddr += sizeof(instruction);
886 currAddr += sizeof(instruction);
887 *++temp = location->isDelayedInsn;
888 relocateInstruction(temp, fromAddr, currAddr,
891 // Then, possibly, there's an callAggregate instruction
893 if (location->callAggregate) {
894 currAddr += sizeof(instruction);
895 *++temp = location->aggregateInsn;
900 // If the "FIRST instruction" is a DCTI, then our so called
901 // "NEXT instruction" is in the delayed Slot and this might
902 // happen. (actullay, it happened)
903 if (location->callAggregate) {
904 currAddr += sizeof(instruction);
905 *++temp = location->aggregateInsn;
909 // For the leaf function, if there's an inDelaySlot instruction,
910 // move this one to the base Tramp.(i.e. at the function exit,
911 // if the first instruction is in the delayed slot the previous
912 // instruction, we have to move that one too, so we count from
913 // that one and the last one is this sequence is called inDelaySlot
915 // Well, after all these, another SAVE instruction is generated
916 // so we are prepared to handle the returning to our application's
918 if (location->hasNoStackFrame()) {
919 if (location->inDelaySlot) {
920 fromAddr += sizeof(instruction);
921 currAddr += sizeof(instruction);
922 *++temp = location->inDelaySlotInsn;
923 relocateInstruction(temp,fromAddr,currAddr,(process *)proc);
924 if(location->firstIsConditional){
925 fromAddr += sizeof(instruction);
926 currAddr += sizeof(instruction);
927 *++temp = location->extraInsn;
928 relocateInstruction(temp, fromAddr, currAddr, proc);
932 genImmInsn(temp+1, SAVEop3, REG_SPTR, -112, REG_SPTR);
935 } else if (temp->raw == RETURN_INSN) {
936 // compute the real from address if this instrumentation
937 // point is from a shared object image
938 Address baseAddress = 0;
939 if(proc->getBaseAddress(location->image_ptr,baseAddress)){
941 // Back to the code segement of the application.
942 // If the location is in the leaf procedure, generate an RESTORE
943 // instruction right after the CALL instruction to restore all
944 // the values in the registers.
945 if (location -> hasNoStackFrame()) {
946 generateCallInsn(temp, currAddr,
947 (baseAddress + location->addr)+location->size);
948 genImmInsn(temp+1, RESTOREop3, 0, 0, 0);
949 } else if(location->ipType == otherPoint){
950 /** to save the value of live o7 register we save and call*/
951 genImmInsn(temp, SAVEop3, REG_SPTR, -120, REG_SPTR);
952 generateCallInsn(temp+1, currAddr+sizeof(instruction),
953 (baseAddress + location->addr)+location->size);
954 genImmInsn(temp+2, RESTOREop3, 0, 0, 0);
956 generateCallInsn(temp, currAddr,
957 (baseAddress + location->addr)+location->size);
959 } else if (temp->raw == SKIP_PRE_INSN) {
961 offset = baseAddr+current_template->updateCostOffset-currAddr;
962 generateBranchInsn(temp,offset);
963 } else if (temp->raw == SKIP_POST_INSN) {
966 offset = baseAddr+current_template->returnInsOffset-currAddr;
967 generateBranchInsn(temp,offset);
969 } else if (temp->raw == UPDATE_COST_INSN) {
970 current_template->costAddr = currAddr;
972 } else if ((temp->raw == LOCAL_PRE_BRANCH) ||
973 (temp->raw == GLOBAL_PRE_BRANCH) ||
974 (temp->raw == LOCAL_POST_BRANCH) ||
975 (temp->raw == GLOBAL_POST_BRANCH)) {
976 #if defined(MT_THREAD)
977 if ((temp->raw == LOCAL_PRE_BRANCH) ||
978 (temp->raw == LOCAL_POST_BRANCH))
980 temp -= NUM_INSN_MT_PREAMBLE;
982 generateMTpreamble((char *)temp, numIns, proc);
983 temp += NUM_INSN_MT_PREAMBLE;
986 /* fill with no-op */
989 else if( temp->raw == RECURSIVE_GUARD_ON_PRE_INSN )
991 generateNOOP( temp );
993 else if( temp->raw == RECURSIVE_GUARD_OFF_PRE_INSN )
995 generateNOOP( temp );
997 else if( temp->raw == RECURSIVE_GUARD_ON_POST_INSN )
999 generateNOOP( temp );
1001 else if( temp->raw == RECURSIVE_GUARD_OFF_POST_INSN )
1003 generateNOOP( temp );
1007 if( ! trampRecursiveDesired )
1009 generate_base_tramp_recursive_guard_code( * proc,
1012 ( NonRecursiveTrampTemplate & )*current_template );
1016 proc->writeDataSpace( ( caddr_t )baseAddr,
1017 current_template->size,
1021 trampTemplate * baseInst;
1022 if( trampRecursiveDesired )
1024 baseInst = new trampTemplate;
1028 baseInst = new NonRecursiveTrampTemplate;
1030 * baseInst = *current_template;
1031 baseInst->baseAddr = baseAddr;
1036 /****************************************************************************/
1037 /****************************************************************************/
1038 /****************************************************************************/
1041 * Install the base Tramp for the function relocated.
1042 * (it means the base tramp that don't need to bother with long jump and
1043 * is the one we used before for all the functions(since there's no
1047 trampTemplate *installBaseTrampSpecial(const instPoint *&location,
1048 process *proc, bool &deferred,
1049 bool trampRecursiveDesired = false)
1051 trampTemplate* current_template = &nonRecursiveBaseTemplate;
1053 if(location->ipType == otherPoint)
1054 current_template = &nonRecursiveConservativeBaseTemplate;
1056 if( trampRecursiveDesired )
1058 current_template = &baseTemplate;
1060 if(location->ipType == otherPoint)
1061 current_template = &conservativeBaseTemplate;
1070 if(!(location->func->isInstalled(proc))) {
1071 relocated = location->func->relocateFunction(proc, const_cast<instPoint *>(location), deferred);
1073 // Unable to relocate function
1074 if (relocated == false) {
1078 else if(!location->relocated_){
1079 // need to find new instPoint for location...it has the pre-relocated
1080 // address of the instPoint
1081 location->func->modifyInstPoint(location,proc);
1084 code = new instruction[current_template->size];
1085 memcpy((char *) code, (char*) current_template->trampTemp, current_template->size);
1087 Address baseAddr = inferiorMalloc(proc, current_template->size, textHeap, location->addr);
1090 if((location->ipType == otherPoint) &&
1091 location->func && location->func->is_o7_live() &&
1092 !in1BranchInsnRange(location->addr, baseAddr))
1094 vector<addrVecType> pointsToCheck;
1095 inferiorFree(proc,baseAddr,pointsToCheck);
1099 for (temp = code, currAddr = baseAddr;
1100 (currAddr - baseAddr) < (unsigned) current_template->size;
1101 temp++, currAddr += sizeof(instruction)) {
1103 if (temp->raw == EMULATE_INSN) {
1104 if (location->isBranchOut) {
1105 // the original instruction is a branch that goes out of a
1106 // function. We don't relocate the original instruction. We
1107 // only get to the tramp if the branch is taken, so we generate
1108 // an unconditional branch to the target of the original
1110 assert(location->branchTarget);
1111 int disp = location->branchTarget - currAddr;
1113 if (in1BranchInsnRange(currAddr,location->branchTarget)) {
1114 generateBranchInsn(temp, disp);
1115 disp = temp->branch.disp22;
1117 generateCallInsn(temp, currAddr, disp);
1122 *temp = location->originalInstruction;
1123 Address fromAddress = location->addr;
1124 relocateInstruction(temp, fromAddress, currAddr, proc);
1125 if (location->isDelayed) {
1126 /* copy delay slot instruction into tramp instance */
1127 currAddr += sizeof(instruction);
1128 *++temp = location->delaySlotInsn;
1130 if (location->callAggregate) {
1131 /* copy invalid insn with aggregate size in it */
1132 currAddr += sizeof(instruction);
1133 *++temp = location->aggregateInsn;
1136 } else if (temp->raw == RETURN_INSN) {
1137 generateBranchInsn(temp,
1138 (location->addr+ sizeof(instruction) - currAddr));
1139 if (location->isDelayed) {
1140 /* skip the delay slot instruction */
1141 temp->branch.disp22 += 1;
1143 if (location->callAggregate) {
1144 /* skip the aggregate size slot */
1145 temp->branch.disp22 += 1;
1147 } else if (temp->raw == SKIP_PRE_INSN) {
1149 offset = baseAddr+current_template->updateCostOffset-currAddr;
1150 generateBranchInsn(temp,offset);
1151 } else if (temp->raw == SKIP_POST_INSN) {
1153 offset = baseAddr+current_template->returnInsOffset-currAddr;
1154 generateBranchInsn(temp,offset);
1155 } else if (temp->raw == UPDATE_COST_INSN) {
1157 current_template->costAddr = currAddr;
1159 } else if ((temp->raw == LOCAL_PRE_BRANCH) ||
1160 (temp->raw == GLOBAL_PRE_BRANCH) ||
1161 (temp->raw == LOCAL_POST_BRANCH) ||
1162 (temp->raw == GLOBAL_POST_BRANCH)) {
1163 #if defined(MT_THREAD)
1164 if ((temp->raw == LOCAL_PRE_BRANCH) ||
1165 (temp->raw == LOCAL_POST_BRANCH))
1167 temp -= NUM_INSN_MT_PREAMBLE;
1169 generateMTpreamble((char *)temp, numIns, proc);
1170 temp += NUM_INSN_MT_PREAMBLE;
1173 /* fill with no-op */
1176 else if( temp->raw == RECURSIVE_GUARD_ON_PRE_INSN )
1178 generateNOOP( temp );
1180 else if( temp->raw == RECURSIVE_GUARD_OFF_PRE_INSN )
1182 generateNOOP( temp );
1184 else if( temp->raw == RECURSIVE_GUARD_ON_POST_INSN )
1186 generateNOOP( temp );
1188 else if( temp->raw == RECURSIVE_GUARD_OFF_POST_INSN )
1190 generateNOOP( temp );
1194 if( ! trampRecursiveDesired )
1196 generate_base_tramp_recursive_guard_code( * proc,
1199 ( NonRecursiveTrampTemplate & )*current_template );
1203 proc->writeDataSpace((caddr_t)baseAddr, current_template->size,(caddr_t) code);
1207 trampTemplate * baseInst;
1208 if( trampRecursiveDesired )
1210 baseInst = new trampTemplate;
1214 baseInst = new NonRecursiveTrampTemplate;
1216 * baseInst = *current_template;
1217 baseInst->baseAddr = baseAddr;
1222 /****************************************************************************/
1223 /****************************************************************************/
1224 /****************************************************************************/
1227 * Allocate the space for the base Trampoline, and generate the instruction
1228 * we need for modifying the code segment.
1230 * 'retInstance' tells you how to modify the code to jump to the base tramp
1233 trampTemplate *findAndInstallBaseTramp(process *proc,
1234 instPoint *&location,
1235 returnInstance *&retInstance,
1236 bool trampRecursionDesired,
1240 Address adr = location->addr;
1243 const instPoint *&cLocation = const_cast<const instPoint *&>(location);
1246 if (proc->baseMap.find(cLocation, ret)) // writes to ret if found
1247 // This base tramp already exists; nothing to do.
1250 if (location->func->isTrapFunc()) {
1251 // get the base Address of this function if it is a
1253 Address baseAddress = 0;
1254 if(!proc->getBaseAddress(location->image_ptr,baseAddress)){
1255 // TODO: what should be done here?
1256 logLine("Error:findAndInstallBaseTramp call getBaseAddress\n");
1258 // Install Base Tramp for the functions which are
1259 // relocated to the heap.
1260 // vector<instruction> extra_instrs; // not any more
1262 ret = installBaseTrampSpecial(cLocation, proc, deferred,
1263 trampRecursionDesired);
1264 if(!ret) return NULL;
1266 // add a branch from relocated function to the base tramp
1267 // if function was just relocated then location has old address
1268 // otherwise location will have address in already relocated func
1269 if (!location->func->isInstalled(proc)){
1270 if (location->isBranchOut){
1271 changeBranch(proc, location->addr,
1272 (int) ret->baseAddr, location->originalInstruction);
1274 generateBranch(proc, location->addr, (int)ret->baseAddr);
1277 else { // location's address is correct...it is in the heap
1278 if (location->isBranchOut){
1279 changeBranch(proc, location->addr,
1280 (int) ret->baseAddr, location->originalInstruction);
1282 generateBranch(proc, location->addr, (int)ret->baseAddr);
1286 // If for this process, a call to the relocated function has not
1287 // yet be installed in its original location, then genterate either
1290 // SAVE; CALL; RESTORE.
1291 // so that it would jump to the start of the relocated function
1292 // which is in heap.
1293 if (!location->func->isInstalled(proc)){
1294 location->func->setInstalled(proc);
1296 Address adr = location-> func -> getAddress(0);
1298 unsigned branchSize ;
1299 if (in1BranchInsnRange(adr+baseAddress, location->func->getAddress(proc))) {
1301 insn = new instruction[branchSize];
1302 generateBranchInsn(insn,(int)(location->func->getAddress(proc)-(adr+baseAddress)));
1305 insn = new instruction[branchSize];
1306 genImmInsn(insn, SAVEop3, REG_SPTR, -112, REG_SPTR);
1307 generateCallInsn(insn+1, adr+baseAddress+4, location->func->getAddress(proc));
1308 genSimpleInsn(insn+2, RESTOREop3, 0, 0, 0);
1311 // set unknown the number of instructions to be overwritten
1312 retInstance = new returnInstance(0/*branchSize*/, (instructUnion *)insn,
1313 branchSize*sizeof(instruction),
1315 location->func->size());
1316 assert(retInstance);
1318 //cerr << "created a new return instance (relocated fn)!" << endl;
1321 // It's not a trap-function; it's a "normal" function
1322 // compute the real from address if this instrumentation
1323 // point is from a shared object image
1324 Address baseAddress = 0;
1325 if (proc->getBaseAddress(location->image_ptr,baseAddress)){
1329 ret = installBaseTramp(location, proc, trampRecursionDesired);
1330 if(!ret) return NULL;
1331 // check to see if this is an entry point and if the delay
1332 // slot instruction is a call insn, if so, then if the
1333 // call is to a location within the function, then we need to
1334 // add an extra instruction after the restore to correctly
1335 // set the o7 register
1336 bool need_to_add = false;
1337 if (location->ipType==functionEntry &&
1338 isInsnType(location->delaySlotInsn,CALLmask,CALLmatch)) {
1339 Address call_offset = location->addr + 8 +
1340 (location->delaySlotInsn.call.disp30<<2);
1341 Address fun_addr = location->func->getAddress(0);
1342 u_int fun_size = location->func->size();
1343 if (call_offset>fun_addr && call_offset<(fun_addr+fun_size)) {
1344 assert(location->isLongJump);
1349 if (location->hasNoStackFrame()) {
1350 // if it is the leaf function, we need to generate
1351 // the following instruction sequence:
1354 if (location -> isLongJump == false) {
1355 instruction *insn = new instruction;
1356 generateBranchInsn(insn, (int)(ret->baseAddr-adr));
1357 retInstance = new returnInstance(1, (instructUnion *)insn,
1358 sizeof(instruction), adr,
1359 sizeof(instruction));
1360 } else if (need_to_add) {
1361 // generate original; call; add $o7 imm4
1362 instruction *insn = new instruction[2];
1363 generateCallInsn(insn, adr+4, (int) ret->baseAddr);
1364 genImmInsn(insn+1,ADDop3,REG_O(7),4,REG_O(7));
1365 retInstance = new returnInstance(2, (instructUnion *)insn,
1366 2*sizeof(instruction), adr+4,
1367 2*sizeof(instruction));
1369 bool already_done = false;
1370 // check to see if the otherInstruction is a call instruction
1371 // to itself, if so then generate the following
1372 // before after basetramp
1373 // ------ ----- ---------
1374 // mov originalInsn mov sethi
1375 // call otherInsn call save
1376 // sethi delaySlot nop call
1378 // only generate a call and nop...leave the originalInsn
1380 if (isInsnType(location->otherInstruction, CALLmask, CALLmatch)) {
1381 Address offset = location-> func -> getAddress(0)+4 +
1382 (location->otherInstruction.call.disp30 << 2);
1383 if ((offset > (location->func->getAddress(0))) &&
1384 (offset < ((location->func->getAddress(0))+
1385 location->func->size()))) {
1386 instruction *insn = new instruction[2];
1387 generateCallInsn(insn, adr+4, (int) ret->baseAddr);
1388 generateNOOP(insn+1);
1389 retInstance = new returnInstance(2, (instructUnion *)insn,
1390 2*sizeof(instruction), adr+4,
1391 2*sizeof(instruction));
1393 already_done = true;
1398 instruction *insn = new instruction[3];
1399 genImmInsn(insn, SAVEop3, REG_SPTR, -112, REG_SPTR);
1400 generateCallInsn(insn+1, adr+4, (int) ret->baseAddr);
1401 generateNOOP(insn+2);
1402 retInstance = new returnInstance(3, (instructUnion *)insn,
1403 3*sizeof(instruction), adr,
1404 3*sizeof(instruction));
1408 assert(retInstance);
1411 // Generate branch instruction from the application to the
1412 // base trampoline and no SAVE instruction is needed
1414 if (in1BranchInsnRange(adr, ret->baseAddr)) {
1415 // make sure that the isLongJump won't be true
1416 // which only is possible for shlib entry point
1417 //assert(location->isLongJump == false);
1418 if (location->isLongJump) {
1419 instruction *insn = new instruction[2];
1420 generateCallInsn(insn, adr, (int) ret->baseAddr);
1421 assert(location->ipType == functionEntry);
1422 generateNOOP(insn+1);
1423 retInstance = new returnInstance(2, (instructUnion *)insn,
1424 2*sizeof(instruction), adr,
1425 2*sizeof(instruction));
1426 assert(retInstance);
1428 instruction *insn = new instruction;
1429 if (location -> ipType == functionEntry) {
1430 generateBranchInsn(insn, (int)(ret->baseAddr-adr+sizeof(instruction)));
1431 retInstance = new returnInstance(1, (instructUnion *)insn,
1432 sizeof(instruction),
1433 adr - sizeof(instruction),
1434 sizeof(instruction));
1436 generateBranchInsn(insn,(int)(ret->baseAddr-adr));
1437 retInstance = new returnInstance(1, (instructUnion *)insn,
1438 sizeof(instruction),
1440 sizeof(instruction));
1443 } else if(need_to_add) {
1444 // the delay slot instruction is is a call to a location
1445 // within the same function, then need to generate 3 instrs
1447 // nop // delay slot (originally call insn)
1448 // add o7 imm4 // sets o7 register to correct value
1449 instruction *insn = new instruction[3];
1450 generateCallInsn(insn, adr, (int) ret->baseAddr);
1451 generateNOOP(insn+1);
1452 genImmInsn(insn+2,ADDop3,REG_O(7),4,REG_O(7));
1453 retInstance = new returnInstance(3, (instructUnion *)insn,
1454 3*sizeof(instruction), adr,
1455 3*sizeof(instruction));
1457 instruction *insn = new instruction[2];
1458 generateCallInsn(insn, adr, (int) ret->baseAddr);
1459 if (location -> ipType == functionEntry) {
1460 if (location -> isLongJump)
1461 generateNOOP(insn+1);
1463 genSimpleInsn(insn+1, RESTOREop3, 0, 0, 0);
1465 generateNOOP(insn+1);
1467 retInstance = new returnInstance(2, (instructUnion *)insn,
1468 2*sizeof(instruction), adr,
1469 2*sizeof(instruction));
1470 assert(retInstance);
1475 proc->baseMap[(const instPoint *)location] = ret;
1478 // remember, ret was the result of either installBaseTramp() or
1479 // installBaseTrampSpecial()
1482 /****************************************************************************/
1483 /****************************************************************************/
1484 /****************************************************************************/
1487 * Install a single tramp.
1490 void installTramp(instInstance *inst, char *code, int codeSize)
1492 //the default base trampoline template is the regular base trampoline.
1493 //However if the location iptype is randomPoint then we have to use
1494 //the conservatibve base trampoline which saves the condition codes
1496 trampTemplate* current_template = &baseTemplate;
1498 if(inst->location->ipType == otherPoint)
1499 current_template = &conservativeBaseTemplate;
1502 insnGenerated += codeSize/sizeof(int);
1505 (inst->proc)->writeDataSpace((caddr_t)inst->trampBase, codeSize, code);
1508 if (inst->when == callPreInsn) {
1509 if (inst->baseInstance->prevInstru == false) {
1510 atAddr = inst->baseInstance->baseAddr+current_template->skipPreInsOffset;
1511 inst->baseInstance->cost += inst->baseInstance->prevBaseCost;
1512 inst->baseInstance->prevInstru = true;
1513 generateNoOp(inst->proc, atAddr);
1516 if (inst->baseInstance->postInstru == false) {
1517 atAddr = inst->baseInstance->baseAddr+current_template->skipPostInsOffset;
1518 inst->baseInstance->cost += inst->baseInstance->postBaseCost;
1519 inst->baseInstance->postInstru = true;
1520 generateNoOp(inst->proc, atAddr);
1525 /****************************************************************************/
1526 /****************************************************************************/
1527 /****************************************************************************/
1529 //This function returns true if the processor on which the daemon is running
1530 //is an ultra SPARC, otherwise returns false.
1531 bool isUltraSparc(){
1534 cerr <<"Trouble in uname(), inst-sparc-solaris.C\n";
1537 if(!strcmp(u.machine, "sun4u")){
1543 /****************************************************************************/
1544 /****************************************************************************/
1545 /****************************************************************************/
1547 void emitLoadPreviousStackFrameRegister(Address register_num,
1553 if(register_num > 31)
1555 else if(register_num > 15){
1556 /*Need to find it on the stack*/
1557 unsigned frame_offset = (register_num-16) * 4;
1558 /*generate a FLUSHW instruction, in order to make sure that
1559 the registers from the caller are on the caller's stack
1561 instruction *in = (instruction *) ((void*)&insn[base]);
1565 generateTrapRegisterSpill(in);
1566 base+=sizeof(instruction);
1568 if(frame_offset == 0){
1569 emitV(loadIndirOp, 30, 0, dest, insn, base, noCost, size);
1572 emitImm(plusOp,(Register) 30,(RegValue)frame_offset,
1573 dest, insn, base, noCost);
1574 emitV(loadIndirOp, dest, 0, dest, insn, base, noCost, size);
1577 else if(register_num > 7) {
1578 //out registers become in registers, so we add 16 to the register
1579 //number to find it's value this stack frame. We move it's value
1580 //into the destination register
1581 emitV(orOp, (Register) register_num + 16, 0, dest, insn, base, false);
1583 else /* if(register_num >= 0) */ {
1585 if(register_num % 2 == 0)
1586 frame_offset = (register_num * -4) - 8;
1588 frame_offset = (register_num * -4);
1589 //read globals from the stack, they were saved in tramp-sparc.S
1590 emitImm(plusOp,(Register) 30,(RegValue)frame_offset,
1591 dest, insn, base, noCost);
1592 emitV(loadIndirOp, dest, 0, dest, insn, base, noCost, size);
1594 /* else assert(0); */
1598 /****************************************************************************/
1599 /****************************************************************************/
1600 /****************************************************************************/
1602 Register emitFuncCall(opCode op,
1604 char *i, Address &base,
1605 const vector<AstNode *> &operands,
1606 const string &callee, process *proc,
1607 bool noCost, const function_base *calleefunc)
1609 assert(op == callOp);
1612 vector <Register> srcs;
1613 void cleanUpAndExit(int status);
1616 addr = calleefunc->getEffectiveAddress(proc);
1620 addr = proc->findInternalAddress(callee, false, err);
1623 function_base *func = proc->findOneFunction(callee);
1625 ostrstream os(errorLine, 1024, ios::out);
1626 os << "Internal error: unable to find addr of " << callee << endl;
1627 showErrorCallback(80, (const char *) errorLine);
1630 // TODO: is this correct or should we get relocated address?
1631 addr = func->getAddress(0);
1634 for (unsigned u = 0; u < operands.size(); u++)
1635 srcs.push_back(operands[u]->generateCode(proc, rs, i, base, noCost, false));
1638 instruction *insn = (instruction *) ((void*)&i[base]);
1640 for (unsigned u=0; u<srcs.size(); u++){
1642 string msg = "Too many arguments to function call in instrumentation code: only 5 arguments can be passed on the sparc architecture.\n";
1643 fprintf(stderr, msg.string_of());
1644 showErrorCallback(94,msg);
1647 genSimpleInsn(insn, ORop3, 0, srcs[u], u+8); insn++;
1648 base += sizeof(instruction);
1649 rs->freeRegister(srcs[u]);
1652 // As Ling pointed out to me, the following is rather inefficient. It does:
1653 // sethi %hi(addr), %o5
1654 // jmpl %o5 + %lo(addr), %o7 ('call' pseudo-instr)
1656 // We can do better:
1657 // call <addr> (but note that the call true-instr is pc-relative jump)
1659 generateSetHi(insn, addr, 13); insn++;
1660 genImmInsn(insn, JMPLop3, 13, LOW10(addr), 15); insn++;
1663 base += 3 * sizeof(instruction);
1665 // return value is the register with the return value from the function.
1666 // This needs to be %o0 since it is back in the caller's scope.
1670 /****************************************************************************/
1671 /****************************************************************************/
1672 /****************************************************************************/
1674 Address emitA(opCode op, Register src1, Register /*src2*/, Register dest,
1675 char *i, Address &base, bool /*noCost*/)
1677 //fprintf(stderr,"emitA(op=%d,src1=%d,src2=XX,dest=%d)\n",op,src1,dest);
1679 instruction *insn = (instruction *) ((void*)&i[base]);
1684 genImmInsn(insn, SUBop3cc, src1, 0, 0); insn++;
1685 //genSimpleInsn(insn, SUBop3cc, src1, 0, 0); insn++;
1687 insn->branch.op = 0;
1688 insn->branch.cond = BEcond;
1689 insn->branch.op2 = BICCop2;
1690 insn->branch.anneal = false;
1691 insn->branch.disp22 = dest/4;
1695 base += sizeof(instruction)*3;
1696 return(base - 2*sizeof(instruction));
1699 // Unconditional branch
1700 generateBranchInsn(insn, dest); insn++;
1703 base += sizeof(instruction)*2;
1704 return(base - 2*sizeof(instruction));
1706 case trampPreamble: {
1708 // save and restore are done in the base tramp now
1709 genImmInsn(insn, SAVEop3, REG_SPTR, -112, REG_SPTR);
1710 base += sizeof(instruction);
1713 // generate code to save global registers
1714 for (unsigned u = 0; u < 4; u++) {
1715 genStoreD(insn, 2*u, REG_FPTR, - (8 + 8*u));
1716 base += sizeof(instruction);
1720 return(0); // let's hope this is expected!
1722 case trampTrailer: {
1724 // save and restore are done in the base tramp now
1725 // generate code to restore global registers
1726 for (unsigned u = 0; u < 4; u++) {
1727 genLoadD(insn, REG_FPTR, - (8 + 8*u), 2*u);
1728 base += sizeof(instruction);
1732 // sequence: restore; nop; b,a back to base tramp; nop
1733 // we can do better. How about putting the restore in
1734 // the delay slot of the branch instruction, as in:
1735 // b <back to base tramp>; restore
1736 genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1737 base += sizeof(instruction);
1741 base += sizeof(instruction);
1744 // dest is in words of offset and generateBranchInsn is bytes offset
1745 generateBranchInsn(insn, dest << 2);
1746 base += sizeof(instruction);
1749 // add no-op, SS-5 sometimes seems to try to decode this insn - jkh 2/14
1752 base += sizeof(instruction);
1754 return(base - 2 * sizeof(instruction));
1757 abort(); // unexpected op for this emit!
1761 /****************************************************************************/
1762 /****************************************************************************/
1763 /****************************************************************************/
1765 Register emitR(opCode op, Register src1, Register /*src2*/, Register /*dest*/,
1766 char *i, Address &base, bool /*noCost*/)
1768 //fprintf(stderr,"emitR(op=%d,src1=%d,src2=XX,dest=XX)\n",op,src1);
1770 instruction *insn = (instruction *) ((void*)&i[base]);
1774 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1775 // saving CT/vector address on the stack
1776 generateStore(insn, REG_MT, REG_FPTR, -40);
1779 // first 8 parameters are in register bank I (24..31)
1780 genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1783 generateStore(insn, REG_I(src1), REG_SPTR, 68+4*src1);
1786 genImmInsn(insn, SAVEop3, REG_SPTR, -112, REG_SPTR);
1789 generateLoad(insn, REG_SPTR, 112+68+4*src1, REG_I(src1));
1792 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1793 // restoring CT/vector address back in REG_MT
1794 generateLoad(insn, REG_FPTR, -40, REG_MT);
1796 base += 6*sizeof(instruction);
1798 base += 4*sizeof(instruction);
1802 return(REG_I(src1));
1806 case getSysParamOp: {
1808 return(REG_I(src1));
1813 // return value is in register REG_I(0)==24
1814 genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1817 generateStore(insn, REG_I(0), REG_SPTR, 68);
1820 genImmInsn(insn, SAVEop3, REG_SPTR, -112, REG_SPTR);
1823 generateLoad(insn, REG_SPTR, 112+68, REG_I(0));
1826 base += 4*sizeof(instruction);
1830 case getSysRetValOp:
1833 abort(); // unexpected op for this emit!
1837 /****************************************************************************/
1838 /****************************************************************************/
1839 /****************************************************************************/
1842 // load the original FP (before the dyninst saves) into register dest
1844 int getFP(instruction *insn, Register dest)
1846 genSimpleInsn(insn, RESTOREop3, 0, 0, 0);
1849 generateStore(insn, REG_FPTR, REG_SPTR, 68);
1852 genImmInsn(insn, SAVEop3, REG_SPTR, -112, REG_SPTR);
1855 generateLoad(insn, REG_SPTR, 112+68, dest);
1858 return(4*sizeof(instruction));
1861 /****************************************************************************/
1862 /****************************************************************************/
1863 /****************************************************************************/
1865 void emitVload(opCode op, Address src1, Register src2, Register dest,
1866 char *i, Address &base, bool /*noCost*/, int /* size */)
1868 instruction *insn = (instruction *) ((void*)&i[base]);
1870 if (op == loadConstOp) {
1871 // dest = src1:imm TODO
1873 if ((src1) > ( unsigned )MAX_IMM13 || (src1) < ( unsigned )MIN_IMM13) {
1874 // src1 is out of range of imm13, so we need an extra instruction
1875 generateSetHi(insn, src1, dest);
1876 base += sizeof(instruction);
1881 // Chance for optimization: we should check for LOW10(src1)==0,
1882 // and if so, don't generate the following bitwise-or instruction,
1883 // since in that case nothing would be done.
1885 genImmInsn(insn, ORop3, dest, LOW10(src1), dest);
1886 base += sizeof(instruction);
1888 // really or %g0,imm,regd
1889 genImmInsn(insn, ORop3, 0, src1, dest);
1891 base += sizeof(instruction);
1893 } else if (op == loadOp) {
1894 // dest = [src1] TODO
1895 generateSetHi(insn, src1, dest);
1898 generateLoad(insn, dest, LOW10(src1), dest);
1900 base += sizeof(instruction)*2;
1901 } else if (op == loadFrameRelativeOp) {
1902 // return the value that is FP offset from the original fp
1903 // need to restore old fp and save it on the stack to get at it.
1905 base += getFP(insn, dest);
1906 insn = (instruction *) ((void*)&i[base]);
1907 if (((int) src1 < MIN_IMM13) || ((int) src1 > MAX_IMM13)) {
1908 // offsets are signed!
1909 int offset = (int) src1;
1911 // emit sethi src2, offset
1912 generateSetHi(insn, offset, src2);
1913 base += sizeof(instruction);
1916 // or src2, offset, src2
1917 genImmInsn(insn, ORop3, src2, LOW10(offset), src2);
1918 base += sizeof(instruction);
1921 // add dest, src2, dest
1922 genSimpleInsn(insn, ADDop3, dest, src2, src2);
1923 base += sizeof(instruction);
1926 generateLoad(insn, src2, 0, dest);
1928 base += sizeof(instruction);
1930 generateLoad(insn, dest, src1, dest);
1932 base += sizeof(instruction);
1934 } else if (op == loadFrameAddr) {
1935 // offsets are signed!
1936 int offset = (int) src1;
1938 base += getFP(insn, dest);
1939 insn = (instruction *) ((void*)&i[base]);
1941 if (((int) offset < MIN_IMM13) || ((int) offset > MAX_IMM13)) {
1942 // emit sethi src2, offset
1943 generateSetHi(insn, offset, src2);
1944 base += sizeof(instruction);
1947 // or src2, offset, src2
1948 genImmInsn(insn, ORop3, src2, LOW10(offset), src2);
1949 base += sizeof(instruction);
1952 // add dest, src2, dest
1953 genSimpleInsn(insn, ADDop3, dest, src2, dest);
1954 base += sizeof(instruction);
1957 // fp is in dest, just add the offset
1958 genImmInsn(insn, ADDop3, dest, offset, dest);
1960 base += sizeof(instruction);
1963 abort(); // unexpected op for this emit!
1967 /****************************************************************************/
1968 /****************************************************************************/
1969 /****************************************************************************/
1971 void emitVstore(opCode op, Register src1, Register src2, Address dest,
1972 char *i, Address &base, bool /*noCost*/, int /* size */)
1974 instruction *insn = (instruction *) ((void*)&i[base]);
1976 if (op == storeOp) {
1977 insn->sethi.op = FMT2op;
1978 insn->sethi.rd = src2;
1979 insn->sethi.op2 = SETHIop2;
1980 insn->sethi.imm22 = HIGH22(dest);
1983 generateStore(insn, src1, src2, LOW10(dest));
1985 base += sizeof(instruction)*2;
1986 } else if (op == storeFrameRelativeOp) {
1987 // offsets are signed!
1988 int offset = (int) dest;
1990 base += getFP(insn, src2);
1991 insn = (instruction *) ((void*)&i[base]);
1993 if ((offset < MIN_IMM13) || (offset > MAX_IMM13)) {
1994 // We are really one regsiter short here, so we put the
1995 // value to store onto the stack for part of the sequence
1996 generateStore(insn, src1, REG_SPTR, 112+68);
1997 base += sizeof(instruction);
2000 generateSetHi(insn, offset, src1);
2001 base += sizeof(instruction);
2004 genImmInsn(insn, ORop3, src1, LOW10(offset), src1);
2005 base += sizeof(instruction);
2008 genSimpleInsn(insn, ADDop3, src1, src2, src2);
2009 base += sizeof(instruction);
2012 generateLoad(insn, REG_SPTR, 112+68, src1);
2013 base += sizeof(instruction);
2016 generateStore(insn, src1, src2, 0);
2017 base += sizeof(instruction);
2020 generateStore(insn, src1, src2, offset);
2022 base += sizeof(instruction);
2025 abort(); // unexpected op for this emit!
2029 /****************************************************************************/
2030 /****************************************************************************/
2031 /****************************************************************************/
2033 void emitVupdate(opCode op, RegValue src1, Register /*src2*/, Address dest,
2034 char *i, Address &base, bool noCost)
2036 instruction *insn = (instruction *) ((void*)&i[base]);
2038 if (op == updateCostOp) {
2039 // generate code to update the observed cost.
2041 // sethi %hi(dest), %l0
2042 generateSetHi(insn, dest, REG_L(0));
2043 base += sizeof(instruction);
2046 // ld [%l0+ lo(dest)], %l1
2047 generateLoad(insn, REG_L(0), LOW10(dest), REG_L(1));
2048 base += sizeof(instruction);
2051 // update value (src1 holds the cost, in cycles; e.g. 19)
2052 if (src1 <= MAX_IMM13) {
2053 genImmInsn(insn, ADDop3, REG_L(1), src1, REG_L(1));
2054 base += sizeof(instruction);
2058 base += sizeof(instruction);
2062 base += sizeof(instruction);
2065 // load in two parts
2066 generateSetHi(insn, src1, REG_L(2));
2067 base += sizeof(instruction);
2071 genImmInsn(insn, ORop3, REG_L(2), LOW10(src1), REG_L(2));
2072 base += sizeof(instruction);
2076 genSimpleInsn(insn, ADDop3, REG_L(1), REG_L(2), REG_L(1));
2077 base += sizeof(instruction);
2081 // store result st %l1, [%l0+ lo(dest)];
2082 generateStore(insn, REG_L(1), REG_L(0), LOW10(dest));
2083 base += sizeof(instruction);
2087 abort(); // unexpected op for this emit!
2091 /****************************************************************************/
2092 /****************************************************************************/
2093 /****************************************************************************/
2095 void emitV(opCode op, Register src1, Register src2, Register dest,
2096 char *i, Address &base, bool /*noCost*/, int /* size */)
2098 //fprintf(stderr,"emitV(op=%d,src1=%d,src2=%d,dest=%d)\n",op,src1,src2,dest);
2100 assert ((op!=branchOp) && (op!=ifOp) &&
2101 (op!=trampTrailer) && (op!=trampPreamble)); // !emitA
2102 assert ((op!=getRetValOp) && (op!=getSysRetValOp) &&
2103 (op!=getParamOp) && (op!=getSysParamOp)); // !emitR
2104 assert ((op!=loadOp) && (op!=loadConstOp)); // !emitVload
2105 assert ((op!=storeOp)); // !emitVstore
2106 assert ((op!=updateCostOp)); // !emitVupdate
2108 instruction *insn = (instruction *) ((void*)&i[base]);
2110 if (op == loadIndirOp) {
2111 generateLoad(insn, src1, 0, dest);
2112 base += sizeof(instruction);
2113 } else if (op == storeIndirOp) {
2114 generateStore(insn, src1, dest, 0);
2115 base += sizeof(instruction);
2116 } else if (op == noOp) {
2118 base += sizeof(instruction);
2119 } else if (op == saveRegOp) {
2120 // should never be called for this platform.
2140 //need to set the Y register to Zero, Zhichen
2141 genImmInsn(insn, WRYop3, REG_G(0), 0, 0);
2142 base += sizeof(instruction);
2143 insn = (instruction *) ((void*)&i[base]);
2156 // For a particular condition (e.g. <=) we need to use the
2157 // the opposite in order to get the right value (e.g. for >=
2158 // we need BLTcond) - naim
2160 genRelOp(insn, BNEcond, src1, src2, dest, base);
2165 genRelOp(insn, BEcond, src1, src2, dest, base);
2170 genRelOp(insn, BGEcond, src1, src2, dest, base);
2175 genRelOp(insn, BGTcond, src1, src2, dest, base);
2180 genRelOp(insn, BLEcond, src1, src2, dest, base);
2185 genRelOp(insn, BLTcond, src1, src2, dest, base);
2193 genSimpleInsn(insn, op3, src1, src2, dest);
2195 base += sizeof(instruction);
2200 /****************************************************************************/
2201 /****************************************************************************/
2202 /****************************************************************************/
2204 static inline bool isRestoreInsn(instruction i) {
2205 return (i.rest.op == 2 \
2206 && ((i.rest.op3 == ORop3 && i.rest.rd == 15)
2207 || i.rest.op3 == RESTOREop3));
2210 /****************************************************************************/
2211 /****************************************************************************/
2212 /****************************************************************************/
2214 static inline bool CallRestoreTC(instruction instr, instruction nexti) {
2215 return (isCallInsn(instr) && isRestoreInsn(nexti));
2219 Return integer value indicating whether instruction sequence
2220 found signals tail call
2223 sequence. Note that this should NOT include jmpl nop, ret nop, retl
2225 Current heuristic to detect such sequences :
2226 look for jmp %reg, nop in function w/ no stack frame, if jmp, nop
2227 are last 2 instructions, return 1 (definate TC), at any other point,
2228 return 0 (not TC). Otherwise, return 0 (no TC).
2229 w/ no stack frame....
2230 instr is instruction being examioned.
2231 nexti is instruction after
2232 addr is address of <instr>
2233 func is pointer to function class object describing function
2234 instructions come from....
2236 static inline bool JmpNopTC(instruction instr, instruction nexti,
2237 Address addr, pd_Function *func) {
2239 if (!isInsnType(instr, JMPLmask, JMPLmatch)) {
2243 assert(instr.resti.op3 == 0x38);
2245 // only looking for jump instructions which don't overwrite a register
2246 // with the PC which the jump comes from (g0 is hardwired to 0, so a write
2247 // there has no effect?)....
2248 // instr should have gdb disass syntax :
2250 // NOT jmpl %reg1, %reg2
2251 if (instr.resti.rd != REG_G(0)) {
2255 // only looking for jump instructions in which the destination is
2256 // NOT %i7 + 8/12/16 or %o7 + 8/12/16 (ret and retl synthetic
2257 // instructions, respectively)
2258 if (instr.resti.i == 1) {
2259 if (instr.resti.rs1 == REG_I(7) || instr.resti.rs1 == REG_O(7)) {
2260 // NOTE : some return and retl instructions jump to {io}7 + 12,
2261 // or (io)7 + 16, not + 8, to have some extra space to store the size of a
2262 // return structure....
2263 if (instr.resti.simm13 == 0x8 || instr.resti.simm13 == 12 ||
2264 instr.resti.simm13 == 16) {
2270 // jmp, foloowed by NOP....
2271 if (!isNopInsn(nexti)) {
2275 // in function w/o stack frame....
2276 if (!func->hasNoStackFrame()) {
2280 // if sequence is detected, but not at end of fn
2281 // (last 2 instructions....), return value indicating possible TC.
2282 // This should (eventually) mark the fn as uninstrumenatble....
2283 if (addr != (func->getAddress(0) + func->size() - 8)) {
2290 /****************************************************************************/
2291 /****************************************************************************/
2292 /****************************************************************************/
2295 Is the specified call instruction one whose goal is to set the 07 register
2296 (the sequence of execution is as if the call instruction did not change the
2297 control flow, and the O7 register is set)?
2299 here, we define a call whose goal is to set the 07 regsiter
2300 as one where the target is the call address + 8, AND where that
2301 target is INSIDE the same function (need to make sure to check for that
2302 last case also, c.f. function DOW, which ends with):
2303 0xef601374 <DOW+56>: call 0xef60137c <adddays>
2304 0xef601378 <DOW+60>: restore
2306 instr - raw instruction....
2307 functionSize - size of function (in bytes, NOT # instructions)....
2308 instructionOffset - BYTE offset in function at which instr occurs....
2310 static inline bool is_set_O7_call(instruction instr, unsigned functionSize,
2311 unsigned instructionOffset) {
2312 // if the instruction is call %register, assume that it is NOT a
2313 // call designed purely to set %O7....
2314 if(instr.call.op != CALLop) {
2317 if (((instr.call.disp30 << 2) == 8) &&
2318 (instructionOffset < (functionSize - 2 * sizeof(instruction)))) {
2324 /****************************************************************************/
2325 /****************************************************************************/
2326 /****************************************************************************/
2329 Does the specified call instruction call to target inside function
2330 or outside - may be indeterminate if insn is call %reg instead of
2331 call <address> (really call PC + offset)
2332 Note: (recursive) calls back to the beginning of the function are OK
2333 since we really want to consider these as instrumentable call sites!
2335 enum fuzzyBoolean {eFalse = 0, eTrue = 1, eDontKnow = 2};
2337 static enum fuzzyBoolean is_call_outside_function(const instruction instr,
2338 const Address functionStarts, const Address instructionAddress,
2339 const unsigned int functionSize)
2341 // call %register - don't know if target inside function....
2342 if(instr.call.op != CALLop) {
2345 const Address call_target = instructionAddress + (instr.call.disp30 << 2);
2346 if ((call_target > functionStarts) &&
2347 (call_target < (functionStarts + functionSize))) {
2354 /****************************************************************************/
2355 /****************************************************************************/
2356 /****************************************************************************/
2359 * Find the instPoints of this function.
2361 bool pd_Function::findInstPoints(const image *owner) {
2363 Address firstAddress = getAddress(0);
2364 Address lastAddress = getAddress(0) + size();
2373 instPoint *point = 0;
2375 // For determining if function needs relocation to be instrumented
2377 relocatable_ = false;
2378 bool canBeRelocated = true;
2380 // Initially assume function has no stack frame
2381 noStackFrame = true;
2383 // variables for function parameters
2384 const instPoint *blah = 0;
2389 // Ids for instPoints
2391 unsigned callsId = 0;
2397 instr.raw = owner->get_instruction(firstAddress);
2398 if (!IS_VALID_INSN(instr)) {
2402 // Determine if function needs to be relocated when instrumented
2403 for ( adr = firstAddress; adr < lastAddress; adr += 4) {
2404 instr.raw = owner->get_instruction(adr);
2405 nexti.raw = owner->get_instruction(adr+4);
2407 // If there's an TRAP instruction in the function, we assume
2408 // that it is an system call and will relocate it to the heap
2409 if (isInsnType(instr, TRAPmask, TRAPmatch)) {
2411 relocatable_ = true;
2414 // TODO: This is a hacking for the solaris(solaris2.5 actually)
2415 // We will relocate that function if the function has been
2416 // tail-call optimazed.
2417 // (Actully, the reason of this is that the system calls like
2418 // read, write, etc have the tail-call optimazation to call
2419 // the _read, _write etc. which contain the TRAP instruction
2420 // This is only done if libc is statically linked...if the
2421 // libTag is set, otherwise we instrument read and _read
2422 // both for the dynamically linked case
2423 // New for Solaris 2.6 support - new form of tail-call opt-
2427 // as last 2 instructions in function which does not have
2428 // own register frame.
2429 if (CallRestoreTC(instr, nexti) || JmpNopTC(instr, nexti, adr, this)) {
2431 relocatable_ = true;
2434 // if call is directly to a retl, this is not a real call, but
2435 // is instead used to set the o7 register. Set the function to be
2436 // relocated when instrumented.
2437 if (isCallInsn(instr)) {
2439 // find target address of call
2440 disp = instr.call.disp30 << 2;
2441 target = adr + disp;
2443 // get target instruction of the call
2444 instruction tmpInsn;
2445 tmpInsn.raw = owner->get_instruction( target );
2447 if((tmpInsn.raw & 0xfffff000) == 0x81c3e000) {
2449 relocatable_ = true;
2455 /* FIND FUNCTION ENTRY */
2457 entry = firstAddress;
2458 for ( adr = firstAddress; adr < lastAddress; adr += 4) {
2460 // The function Entry is defined as the first SAVE instruction plus
2461 // the instructions after this.
2462 // ( The first instruction for the nonleaf function is not
2463 // necessarily a SAVE instruction. )
2464 instr.raw = owner->get_instruction(adr);
2466 if (isInsnType(instr, SAVEmask, SAVEmatch)) {
2468 noStackFrame = false;
2473 // If there's no SAVE instruction found, this is a leaf function
2474 // and function Entry will be defined from the first instruction
2477 // noStackFrame, apparently leaf function
2484 /* CHECK IF FUNCTION SHOULD NOT BE RELOCATED WHEN INSTRUMENTED */
2486 // FUNCTION TOO SMALL
2487 if (size() <= 3*sizeof(instruction)) {
2488 canBeRelocated = false;
2492 // if the second instruction in a function that needs relocation is a call
2493 // instruction or a branch instruction, then we can't deal with this.
2494 // New: only a problem if the call is to a location outside the function,
2495 // or is a jump to itself....
2497 // Grab second instruction
2498 Address addrSecondInstr = firstAddress + sizeof(instruction);
2499 instr.raw = owner->get_instruction(addrSecondInstr);
2501 if ( isCallInsn(instr) ) {
2504 target = addrSecondInstr + (instr.call.disp30 << 2);
2506 // if call dest. is outside of function, assume real
2507 // call site. Assuming cant deal with this case!!!!
2508 if ( !(target >= firstAddress && target <= lastAddress) ||
2509 (target == addrSecondInstr) ) {
2510 canBeRelocated = false;
2513 // Branch instruction
2514 if ( instr.branch.op == 0 &&
2515 (instr.branch.op2 == 2 || instr.branch.op2 == 6) ) {
2516 canBeRelocated = false;
2521 // Can't handle function
2522 if (canBeRelocated == false && isTrap == true) {
2527 #ifdef BPATCH_LIBRARY
2528 if (BPatch::bpatch->hasForcedRelocation_NP()) {
2529 if (canBeRelocated == true) {
2531 relocatable_ = true;
2537 /* CREATE ENTRY INSTPOINT */
2538 instr.raw = owner->get_instruction(entry);
2540 if (relocatable_ == true) {
2541 funcEntry_ = new instPoint(this, instr, owner, entry, true,
2542 functionEntry, entry);
2544 funcEntry_ = new instPoint(this, instr, owner, entry, true,
2550 // ITERATE OVER INSTRUCTIONS, locating instPoints
2553 instructions = new instruction[size()/sizeof(instruction)];
2555 for (int i=0; adr < lastAddress; adr += sizeof(instruction), i++) {
2557 instr.raw = owner->get_instruction(adr);
2558 instructions[i] = instr;
2559 nexti.raw = owner->get_instruction(adr+4);
2562 InsnRegister rd,rs1,rs2;
2563 get_register_operands(instr,&rd,&rs1,&rs2);
2565 if(rs1.is_o7() || rs2.is_o7() ||
2567 ((instr.raw & 0xc1f80000) != 0x81c00000))) /*indirect call*/
2571 // check for return insn and as a side affect decide if we are at the
2572 // end of the function.
2573 if (isReturnInsn(owner, adr, dummyParam, prettyName())) {
2574 // define the return point
2577 if (relocatable_ == true) {
2578 point = new instPoint(this, instr, owner, adr, false,
2581 point = new instPoint(this, instr, owner, adr, false,
2585 funcReturns.push_back(point);
2586 funcReturns[retId] -> instId = retId; retId++;
2589 else if (instr.branch.op == 0
2590 && (instr.branch.op2 == 2 || instr.branch.op2 == 6)
2591 && (instr.branch.cond == 0 || instr.branch.cond == 8)) {
2593 // find if this branch is going out of the function
2594 disp = instr.branch.disp22;
2595 Address target = adr + (disp << 2);
2597 if (target < firstAddress || target >= lastAddress) {
2600 if (relocatable_ == true) {
2601 point = new instPoint(this, instructions[i], owner, adr,
2602 false, functionExit, adr);
2604 point = new instPoint(this, instructions[i], owner, adr,
2605 false, functionExit);
2608 if ((instr.branch.cond != 0) && (instr.branch.cond != 8)) {
2610 point->isBranchOut = true;
2611 point->branchTarget = target;
2614 funcReturns.push_back(point);
2615 funcReturns[retId] -> instId = retId; retId++;
2620 else if (isCallInsn(instr)) {
2622 // if the call target is the address of the call instruction
2623 // then this is not something that we can instrument...
2624 // this occurs in functions with code that is modifined when
2625 // they are loaded by the run-time linker, or when the .init
2626 // section is executed. In this case the instructions in the
2627 // parsed image file are different from the ones in the executable
2629 Address call_target = adr + (instr.call.disp30 << 2);
2630 if(instr.call.op == CALLop) {
2631 if(call_target == adr){
2632 cerr << "WARN : function " << prettyName().string_of()
2633 << " has call to same location as call, NOT instrumenting"
2639 // first, check for tail-call optimization: a call where the
2640 // instruction in the delay slot write to register %o7(15), usually
2641 // just moving the caller's return address, or doing a restore
2642 // Tail calls are instrumented as return points, not call points.
2644 if (CallRestoreTC(instr, nexti)) {
2647 point = new instPoint(this, instr, owner, adr, false, callSite, adr);
2649 point = new instPoint(this, instr, owner, adr, false, callSite);
2652 adr = newCallPoint(adr, instr, owner, err, callsId, adr, 0, point, blah);
2657 disp = adr + sizeof(instruction);
2658 instPoint *point = new instPoint(this, instr, owner, disp,
2659 false, functionExit, adr);
2660 funcReturns.push_back(point);
2661 funcReturns[retId] -> instId = retId; retId++;
2665 // check if the call is to inside the function - if definately
2666 // inside function (meaning that thew destination can be determined
2667 // statically because its a call to an address, not to a register
2668 // or register + offset) then don't instrument as call site,
2669 // otherwise (meaning that the call destination is known statically
2670 // to be outside the function, or is not known statically), then
2671 // instrument as a call site....
2672 enum fuzzyBoolean is_inst_point;
2673 is_inst_point = is_call_outside_function(instr, firstAddress,
2675 if (is_inst_point == eFalse) {
2677 // if this is a call instr to a location within the function,
2678 // and if the offest is not 8 then do not define this function
2679 if (!is_set_O7_call(instr, size(), adr - firstAddress)) {
2684 point = new instPoint(this, instr, owner, adr, false, callSite,
2687 point = new instPoint(this, instr, owner, adr, false,
2690 adr = newCallPoint(adr, instr, owner, err, callsId, adr, 0, point,
2695 // get call target instruction
2696 Address call_target = adr + (instr.call.disp30 << 2);
2697 instruction tmpInsn;
2698 tmpInsn.raw = owner->get_instruction( call_target );
2700 // check that call is not directly to a retl instruction,
2701 // and thus a real call
2702 if((tmpInsn.raw & 0xfffff000) != 0x81c3e000) {
2704 point = new instPoint(this, instr, owner, adr, false,
2707 point = new instPoint(this, instr, owner, adr, false,
2710 adr = newCallPoint(adr, instr, owner, err, callsId,
2711 adr, 0, point, blah);
2720 else if (JmpNopTC(instr, nexti, adr, this)) {
2723 point = new instPoint(this, instr, owner, adr, false, callSite, adr);
2725 point = new instPoint(this, instr, owner, adr, false, callSite);
2728 adr = newCallPoint(adr, instr, owner, err, callsId, adr, 0, point, blah);
2733 disp = adr + sizeof(instruction);
2734 instPoint *point = new instPoint(this, instr, owner, disp, false,
2736 funcReturns.push_back(point);
2737 funcReturns[retId] -> instId = retId; retId++;
2740 else if (isInsnType(instr, JMPLmask, JMPLmatch)) {
2741 /* A register indirect jump. Some jumps may exit the function
2742 (e.g. read/write on SunOS). In general, the only way to
2743 know if a jump is exiting the function is to instrument
2744 the jump to test if the target is outside the current
2745 function. Instead of doing this, we just check the
2746 previous two instructions, to see if they are loading
2747 an address that is out of the current function.
2748 This should catch the most common cases (e.g. read/write).
2749 For other cases, we would miss a return point.
2751 This is the case considered:
2758 Register jumpreg = instr.rest.rs1;
2762 prev1.raw = owner->get_instruction(adr-4);
2763 prev2.raw = owner->get_instruction(adr-8);
2767 if (instr.rest.rd == 0 && (instr.rest.i == 1 || instr.rest.rs2 == 0)
2768 && prev2.sethi.op == FMT2op && prev2.sethi.op2 == SETHIop2
2769 && prev2.sethi.rd == (unsigned)jumpreg
2770 && prev1.rest.op == RESTop
2771 && prev1.rest.rd == (unsigned)jumpreg && prev1.rest.i == 1
2772 && prev1.rest.op3 == ORop3 && prev1.rest.rs1 == (unsigned)jumpreg) {
2774 targetAddr = (prev2.sethi.imm22 << 10) & 0xfffffc00;
2775 targetAddr |= prev1.resti.simm13;
2777 if ( (targetAddr < firstAddress) || (targetAddr >= lastAddress) ){
2780 if (relocatable_ == true) {
2781 point = new instPoint(this, instr, owner, adr, false,
2784 point = new instPoint(this, instr, owner, adr, false,
2788 funcReturns.push_back(point);
2789 funcReturns[retId] -> instId = retId; retId++;
2795 bool checkPoints = checkInstPoints(owner);
2797 if ( (checkPoints == false) || (!canBeRelocated && isTrap) ){
2804 /****************************************************************************/
2805 /****************************************************************************/
2806 /****************************************************************************/
2809 * Check all the instPoints within this function to see if there's
2810 * any conficts happen.
2812 bool pd_Function::checkInstPoints(const image *owner) {
2814 // Our own library function, skip the test.
2815 if (prettyName().prefixed_by("DYNINST"))
2818 #ifndef BPATCH_LIBRARY /* XXX Users of libdyninstAPI might not agree. */
2819 // The function is too small to be worthing instrumenting.
2821 //cerr << "WARN : function " << prettyName().string_of()
2822 // << " too small (size <= 12), can't instrument" << endl;
2827 // No function return! return false;
2828 if (sizeof(funcReturns) == 0) {
2829 //cerr << "WARN : function " << prettyName().string_of()
2830 // << " no return point found, can't instrument" << endl;
2835 Address adr = getAddress(0);
2837 bool retl_inst = false;
2838 bool restore_inst = false;
2839 // Check if there's any branch instruction jump to the middle
2840 // of the instruction sequence in the function entry point
2841 // and function exit point.
2842 for ( ; adr < getAddress(0) + size(); adr += sizeof(instruction)) {
2844 instr.raw = owner->get_instruction(adr);
2845 if(isInsnType(instr, RETLmask, RETLmatch)) retl_inst = true;
2846 if(isInsnType(instr, RESTOREmask, RESTOREmatch)) restore_inst = true;
2847 if (isInsnType(instr, BRNCHmask, BRNCHmatch)||
2848 isInsnType(instr, FBRNCHmask, FBRNCHmatch)) {
2850 int disp = instr.branch.disp22;
2851 Address target = adr + (disp << 2);
2853 if ((target > funcEntry_->addr)&&
2854 (target < (funcEntry_->addr + funcEntry_->size))) {
2855 if (adr > (funcEntry_->addr+funcEntry_->size)){
2856 //cerr << "WARN : function " << prettyName().string_of()
2857 // << " has branch target inside fn entry point, can't instrument" << endl;
2860 // function can be instrumented if we relocate it
2862 relocatable_ = true;
2865 for (u_int i = 0; i < funcReturns.size(); i++) {
2866 if ((target > funcReturns[i]->addr)&&
2867 (target < (funcReturns[i]->addr + funcReturns[i]->size))) {
2868 if ((adr < funcReturns[i]->addr)||
2869 (adr > (funcReturns[i]->addr + funcReturns[i]->size))){
2870 //cerr << "WARN : function " << prettyName().string_of()
2871 // << " has branch target inside fn return point, "
2872 // << "can't instrument" << endl;
2875 // function can be instrumented if we relocate it
2877 relocatable_ = true;
2883 // if there is a retl instruction and we don't think this is a leaf
2884 // function then this is a way messed up function...well, at least we
2885 // we can't deal with this...the only example I can find is _cerror
2886 // and _cerror64 in libc.so.1
2887 if(retl_inst && !noStackFrame && !restore_inst){
2888 //cerr << "WARN : function " << prettyName().string_of()
2889 // << " retl instruction in non-leaf function, can't instrument"
2894 // check that no instrumentation points could overlap
2895 Address func_entry = funcEntry_->addr + funcEntry_->size;
2896 for (u_int i = 0; i < funcReturns.size(); i++) {
2897 if(func_entry >= funcReturns[i]->addr){
2900 // function can be instrumented if we relocate it
2902 relocatable_ = true;
2904 if(i >= 1){ // check if return points overlap
2905 Address prev_exit = funcReturns[i-1]->addr+funcReturns[i-1]->size;
2906 if(funcReturns[i]->addr < prev_exit) {
2907 //cerr << "WARN : function " << prettyName().string_of()
2908 // << " overlapping instrumentation points, can't instrument"
2911 // function can be instrumented if we relocate it
2913 relocatable_ = true;
2921 /****************************************************************************/
2922 /****************************************************************************/
2923 /****************************************************************************/
2925 // used for sorting inst points - typecast void *s to instPoint **s, then
2926 // do {-1, 0, 1} comparison by address....
2927 int sort_inst_points_by_address(const void *arg1, const void *arg2) {
2928 instPoint * const *a = static_cast<instPoint* const *>(arg1);
2929 instPoint * const *b = static_cast<instPoint* const *>(arg2);
2930 if ((*a)->iPgetAddress() > (*b)->iPgetAddress()) {
2932 } else if ((*a)->iPgetAddress() < (*b)->iPgetAddress()) {
2938 /****************************************************************************/
2939 /****************************************************************************/
2940 /****************************************************************************/
2943 First pass, handle cases where general-purpose re-writing needs to be
2944 done to preserve paradynd/dyninstAPI's assumptions about function
2946 In current sparc-solaris version, this is ONLY the following cases:
2947 If a call site and return point overlap and are located directly
2948 next to eachother, and the return point is located on a
2949 restore operation, then a TailCallPA is applied whose footprint
2950 covers the call and the restore.
2951 The TailCallPA should rewrite the call; restore, and update the
2952 locations of both inst points as necessary.
2953 If the 2nd instruction in a function is a CALL, then a single nop
2954 is inserted before the call - to clear out the delay slot of the
2955 branch which is inserted at the first instruction (for entry point
2957 If the dyninstAPI is seperated from paradyn, or when converting this
2958 code to support instrumentation at arbitrary points (instead of assuming
2959 entry, exit, and call site instrumentation), then the check should be
2961 look at 2nd insn, see if its a call
2963 look at all inst points, see if any of them have a call in what
2964 corresponds to the delay slot of the instruction which is going to
2965 get stomped by a branch/call to tramp....
2966 If the code has any calls to address (of call) + 8, replace the call with
2967 a sequence which sets the 07 register to the (original) address....
2969 Attaches LocalAlterations related to general rewrites of function
2970 (by adding them to LocalAlterationSet p).
2971 returns boolean value indicating whether it was able to figure out
2972 sequence of LocalAlterations to apply to perform specified rewrites....
2974 bool pd_Function::PA_attachGeneralRewrites( const image *owner,
2975 LocalAlterationSet *p,
2976 Address baseAddress,
2977 Address firstAddress,
2978 instruction loadedCode[],
2979 unsigned /* numInstructions */,
2981 instruction instr, nexti;
2982 TailCallOptimization *tail_call;
2983 // previously referred to calls[i] directly, but gdb seems to be having
2984 // trouble with templated types with the new compiler - making debugging
2985 // difficult - so, directly assign calls[i] to the_call so can use gdb
2986 // to get at info about it....
2987 instPoint *the_call;
2989 #ifdef DEBUG_PA_INST
2990 cerr << "pd_Function::PA_attachGeneralRewrites called" <<endl;
2991 cerr << " prettyName = " << prettyName() << endl;
2994 // Look at the 2nd instruction in function. If its a call, then
2995 // stick a single nop before it....
2996 // The comment in the old inst-sparc-solaris.C version describing the rationale
2997 // for thw change is :
2998 // if the second instruction in the function is a call instruction
2999 // then this cannot go in the delay slot of the branch to the
3000 // base tramp, so add a noop between first and second instructions
3001 // in the relocated function (check out write in libc.so.1 for
3002 // and example of this):
3004 // save %sp, -96, %sp brach to base tramp
3007 // Note that if this call insn is a call to address + 8, it will actually
3008 // be replaced by a sequence which does NOT include a call, so don't need to
3009 // worry about inserting the extra nop....
3010 if (isCallInsn(loadedCode[1]) &&
3011 !is_set_O7_call(loadedCode[1], codeSize, sizeof(instruction))) {
3012 // Insert one no-op after the first instruction
3013 // (RewriteFootprint will copy the instruction at offset 0, and then
3014 // place the no-op after that instruction)
3015 InsertNops *nop = new InsertNops(this, 0, sizeof(instruction));
3016 p->AddAlteration(nop);
3017 #ifdef DEBUG_PA_INST
3018 cerr << " added single NOP in 2nd instruction" << endl;
3022 // Iterate over function instruction by instruction, looking for calls to
3024 assert((codeSize % sizeof(instruction)) == 0);
3025 for(unsigned i=0;i<(codeSize/sizeof(instruction));i++) {
3026 // want CALL %address, NOT CALL %register
3027 if (isTrueCallInsn(loadedCode[i])) {
3028 // figure out destination of call....
3029 if (is_set_O7_call(loadedCode[i], codeSize, i * sizeof(instruction))) {
3030 SetO7 *seto7 = new SetO7(this, i * sizeof(instruction));
3031 p->AddAlteration(seto7);
3032 #ifdef DEBUG_PA_INST
3033 cerr << " detected call pattern designed to set 07 register at offset "
3034 << i * sizeof(instruction) << endl;
3038 // Check for a call to a location outside of the function,
3039 // where the target of the call is a retl instruction. This
3040 // sequence is used to set the o7 register with the PC.
3042 // Get target of call instruction
3043 Address callAddress = firstAddress + i*sizeof(instruction);
3044 Address callTarget = callAddress +
3045 (loadedCode[i].call.disp30 << 2);
3047 // If call is to location outside of function
3048 if ( (callTarget < firstAddress) ||
3049 (callTarget > firstAddress + size()) ) {
3051 // get target instruction
3052 instruction tmpInsn;
3053 tmpInsn.raw = owner->get_instruction(callTarget - baseAddress);
3054 // If call target instruction is a retl instruction
3055 if((tmpInsn.raw & 0xfffff000) == 0x81c3e000) {
3057 // Retrieve the instruction in the delay slot of the retl,
3058 // so that it can be copied into the relocated function
3060 owner->get_instruction( (callTarget - baseAddress) + sizeof(instruction) );
3062 RetlSetO7 *retlSetO7 =
3063 new RetlSetO7(this, i * sizeof(instruction), tmpInsn);
3064 p->AddAlteration(retlSetO7);
3066 #ifdef DEBUG_PA_INST
3067 cerr << " detected call to retl instruction"
3068 << " designed to set 07 register at offset "
3069 << i * sizeof(instruction) << endl;
3081 bool pd_Function::PA_attachTailCalls(LocalAlterationSet *p) {
3082 instruction instr, nexti;
3083 TailCallOptimization *tail_call;
3084 // previously referred to calls[i] directly, but gdb seems to be having
3085 // trouble with templated types with the new compiler - making debugging
3086 // difficult - so, directly assign calls[i] to the_call so can use gdb
3087 // to get at info about it....
3088 instPoint *the_call;
3090 #ifdef DEBUG_PA_INST
3091 cerr << "pd_Function::PA_tailCallOptimizations called" <<endl;
3092 cerr << " prettyName = " << prettyName() << endl;
3096 // Look for an instPoint in funcCalls where the instruction is
3097 // a call instruction and the next instruction is a restore op
3098 // or where the instruction is a jmp (out of function?), and the next
3099 // instruction is a nop....
3100 // There is an unfortunate dependence on the method for detecting tail-call
3101 // optimizations in the code for detecting overlapping inst-points, below.
3102 // If change this code, may need to update that code correspondingly....
3103 for(unsigned i=0;i<calls.size();i++) {
3104 // this should return the offset at which the FIRST instruction which
3105 // is ACTUALLY OVEWRITTEN BY INST POINT is located....
3106 the_call = calls[i];
3107 int offset = (the_call->iPgetAddress() - getAddress(0));
3108 instr = the_call->insnAtPoint();
3109 nexti = the_call->insnAfterPoint();
3110 if (CallRestoreTC(instr, nexti)) {
3111 tail_call = new CallRestoreTailCallOptimization(this, offset,
3112 offset + 2 * sizeof(instruction), instr);
3113 p->AddAlteration(tail_call);
3115 #ifdef DEBUG_PA_INST
3116 cerr << " detected call, restore tail-call optimization at offset "
3121 if (JmpNopTC(instr, nexti, the_call->iPgetAddress(), this)) {
3122 tail_call = new JmpNopTailCallOptimization(this, offset,
3123 offset + 2 * sizeof(instruction));
3124 p->AddAlteration(tail_call);
3126 #ifdef DEBUG_PA_INST
3127 cerr << " detected jmp, nop tail-call optimization at offset "
3137 /****************************************************************************/
3138 /****************************************************************************/
3139 /****************************************************************************/
3142 Second pass, handle cases where inst points overlap eachother (and
3143 thus prevent the normal instrumentation technique from being used).
3144 In current sparc-solaris version, the following algorithm is used:
3145 Check all inst points. If any overlap, figure out the instruction
3146 that each is trying to get and insert nops so that they no longer
3149 bool pd_Function::PA_attachOverlappingInstPoints(
3150 LocalAlterationSet *p, Address /* baseAddress */,
3151 Address /* firstAddress */,
3152 instruction loadedCode[], int /* codeSize */) {
3154 instruction instr, nexti;
3156 #ifdef DEBUG_PA_INST
3157 cerr << "pd_Function::PA_attachOverlappingInstPoints called" <<endl;
3158 cerr << " prettyName = " << prettyName() << endl;
3161 // Make a list of all inst-points attached to function, and sort
3162 // by address. Then check for overlaps....
3163 vector<instPoint*> foo;
3164 //foo += funcEntry_;
3165 //foo += funcReturns;
3167 // define sort_inst_points_by_address as function with following
3169 // void sort (int (*)(const void *, const void *))
3170 // - takes 2 void *'s, typecasts to instPoint *'s, then does
3171 // {-1, 0, 1} comparison based on ip->addr....
3172 // foo.sort(sort_inst_points_by_address);
3173 // qsort((void *) data_, sz_, sizeof(T), cmpfunc);
3174 //qsort(foo.data(), foo.size(), sizeof(instPoint*), sort_inst_points_by_address);
3176 sorted_ips_vector(foo);
3178 // should hopefully have inst points for fn sorted by address....
3179 // check for overlaps....
3180 for (unsigned i=0;i<foo.size()-1;i++) {
3181 instPoint *this_inst_point = foo[i];
3182 instPoint *next_inst_point = foo[i+1];
3183 // This is kind of a hack - strictly speaking, the peephole alteration
3184 // abstraction for relocating inst points should be applied to the set of
3185 // inst points in the function after every independent set of alterations is
3186 // applied. This is nto done for performance reasons - rather all the inst
3187 // points in the function are relocated at once - based on the alteration sets.
3188 // As such, the peephole alterations are NOT implemented so as to be strictly
3189 // independent. An example of this is the interaction of the code for attaching
3190 // tail-call optimization PAs based on inst points (NOT function instructions),
3191 // above. Anyway, the net efffect is that by the time flow-of-control reaches
3192 // here, the tail-call optimization has (hopefully) been rewritten, but the
3193 // inst points pointing to it have not been updated. As such, check here to
3194 // make sure that the overalpping inst points aren't really part of a tail-call
3195 // optimization. This introduces some lack of locality of reference - sorry....
3197 if ((this_inst_point->ipType == callSite) &&
3198 (next_inst_point->ipType == functionExit)) {
3199 instr = this_inst_point->insnAtPoint();
3200 nexti = this_inst_point->insnAfterPoint();
3201 if (CallRestoreTC(instr, nexti) ||
3202 JmpNopTC(instr, nexti, this_inst_point->iPgetAddress(), this)) {
3204 // This tail call optimization will be rewritten, eliminating the
3205 // overlap, so we don't have to worry here about rewriting this
3206 // as overlapping instPoints.
3207 // Also, I added the i++ because we don't have to bother looking
3208 // for an overlap between the next_inst_point, and any instPoints
3209 // that may follow it (even one that is located at the very next
3210 // instruction). The reason for this is that when we relocate the
3211 // function, we will call installBaseTrampSpecial to generate the
3212 // base tramp. The distance between the base tramp and the
3213 // relocated function will then be within the range
3214 // of a branch insn, and only one instruction at the instPoint
3215 // will be relocated to the baseTramp (since the restore or nop
3216 // will not also have a delay slot insn) which means that there
3217 // will be no conflict with next_inst_point overlapping another
3225 // check if inst point overlaps with next inst point....
3226 int overlap = ((this_inst_point->iPgetAddress() +
3227 this_inst_point->Size()) - next_inst_point->iPgetAddress());
3229 // Inst point overlaps with next one. Insert
3230 // InsertNops into PA Set AFTER instruction pointed to
3231 // by inst point (Making sure that this does NOT break up
3232 // an insn and its delay slot).....
3233 // ALRT ALRT : This is NOT designed to handle the case where
3234 // 2 inst points are located at exactly the same place or
3235 // 1 is located in the delay slot of the other - it will NOT
3236 // break up the 2 inst points in that case....
3238 int offset = (this_inst_point->insnAddress() - getAddress(0));
3240 if (IS_DELAYED_INST(loadedCode[offset/sizeof(instruction)])) {
3241 offset += sizeof(instruction);
3243 InsertNops *nops = new InsertNops(this, offset, overlap);
3244 p->AddAlteration(nops);
3246 #ifdef DEBUG_PA_INST
3247 cerr << " detected overlapping inst points : offset " << offset <<
3248 " overlap " << overlap << endl;