2 * Copyright (c) 1996 Barton P. Miller
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance. We reserve the right to update, modify,
7 * or discontinue this software at any time. We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
11 * This license is for research uses. For such uses, there is no
12 * charge. We define "research use" to mean you may freely use it
13 * inside your organization for whatever purposes you see fit. But you
14 * may not re-distribute Paradyn or parts of Paradyn, in any form
15 * source or binary (including derivatives), electronic or otherwise,
16 * to any other organization or entity without our permission.
18 * (for other uses, please contact us at paradyn@cs.wisc.edu)
20 * All warranties, including without limitation, any warranty of
21 * merchantability or fitness for a particular purpose, are hereby
24 * By your use of Paradyn, you understand and agree that we (or any
25 * other person or entity with proprietary rights in Paradyn) are
26 * under no obligation to provide either maintenance services,
27 * update services, notices of latent defects, or correction of
28 * defects for Paradyn.
30 * Even if advised of the possibility of such damages, under no
31 * circumstances shall we (or any other person or entity with
32 * proprietary rights in the software licensed hereunder) be liable
33 * to you or any third party for direct, indirect, or consequential
34 * damages of any character regardless of type of action, including,
35 * without limitation, loss of profits, loss of use, loss of good
36 * will, or computer failure or malfunction. You agree to indemnify
37 * us (and any other person or entity with proprietary rights in the
38 * software licensed hereunder) for any and all liability it may
39 * incur to third parties resulting from your use of Paradyn.
43 * inst-x86.C - x86 dependent functions and code generator
44 * $Id: inst-x86.C,v 1.86 2001/07/11 21:19:58 gurari Exp $
50 #include "common/h/headers.h"
52 #ifndef BPATCH_LIBRARY
53 #include "rtinst/h/rtinst.h"
55 #include "common/h/Dictionary.h"
56 #include "dyninstAPI/src/symtab.h"
57 #include "dyninstAPI/src/process.h"
58 #include "dyninstAPI/src/inst.h"
59 #include "dyninstAPI/src/instP.h"
60 #include "dyninstAPI/src/ast.h"
61 #include "dyninstAPI/src/util.h"
62 #include "dyninstAPI/src/stats.h"
63 #include "dyninstAPI/src/os.h"
64 #include "dyninstAPI/src/showerror.h"
66 #include "dyninstAPI/src/arch-x86.h"
67 #include "dyninstAPI/src/inst-x86.h"
68 #include "dyninstAPI/src/instPoint.h" // includes instPoint-x86.h
69 #include "dyninstAPI/src/instP.h" // class returnInstance
71 // for function relocation
72 #include "dyninstAPI/src/func-reloc.h"
73 #include "dyninstAPI/src/LocalAlteration.h"
75 class ExpandInstruction;
78 extern bool relocateFunction(process *proc, instPoint *&location);
79 extern void modifyInstPoint(instPoint *&location,process *proc);
81 extern bool isPowerOf2(int value, int &result);
82 void BaseTrampTrapHandler(int); //siginfo_t*, ucontext_t*);
84 instruction NEW_INSTR[NEW_INSTR_ARRAY_LEN];
85 unsigned char OLD_CODE[NEW_INSTR_ARRAY_LEN];
87 // The general machine registers.
88 // These values are taken from the Pentium manual and CANNOT be changed.
106 // Size of a jump rel32 instruction
107 #define JUMP_REL32_SZ (5)
109 // Size of a call rel32 instruction
110 #define CALL_REL32_SZ (5)
112 #define PUSH_RM_OPC1 (0xFF)
113 #define PUSH_RM_OPC2 (6)
114 #define CALL_RM_OPC1 (0xFF)
115 #define CALL_RM_OPC2 (2)
116 #define PUSH_EBP (0x50+EBP)
117 #define SUB_REG_IMM32 (5)
121 Function arguments are in the stack and are addressed with a displacement
122 from EBP. EBP points to the saved EBP, EBP+4 is the saved return address,
123 EBP+8 is the first parameter.
124 TODO: what about far calls?
127 #define PARAM_OFFSET (8)
130 // number of virtual registers
131 #define NUM_VIRTUAL_REGISTERS (32)
133 // offset from EBP of the saved EAX for a tramp
134 #define SAVED_EAX_OFFSET (-NUM_VIRTUAL_REGISTERS*4-4)
136 /****************************************************************************/
137 /****************************************************************************/
138 /****************************************************************************/
140 class NonRecursiveTrampTemplate : public trampTemplate
145 int guardOnPre_beginOffset;
146 int guardOnPre_endOffset;
148 int guardOffPre_beginOffset;
149 int guardOffPre_endOffset;
151 int guardOnPost_beginOffset;
152 int guardOnPost_endOffset;
154 int guardOffPost_beginOffset;
155 int guardOffPost_endOffset;
159 /****************************************************************************/
160 /****************************************************************************/
161 /****************************************************************************/
163 void emitOpRMImm8( unsigned opcode1, unsigned opcode2, Register base, int disp, char imm,
164 unsigned char * & insn );
165 void emitMovImmToMem( Address maddr, int imm,
166 unsigned char * & insn );
168 /****************************************************************************/
169 /****************************************************************************/
170 /****************************************************************************/
173 checkInstructions: check that there are no known jumps to the instructions
174 before and after the point.
176 void instPoint::checkInstructions() {
177 Address currAddr = addr_;
178 unsigned OKinsns = 0;
180 // if jumpAddr_ is not zero, this point has been checked already
185 unsigned maxSize = JUMP_SZ;
186 if (address() == func()->getAddress(0)) // entry point
188 tSize = insnAtPoint_.size();
190 if (!owner()->isJumpTarget(currAddr)) {
191 // check instructions before point
192 unsigned insnsBefore_ = insnsBefore();
193 for (unsigned u = 0; u < insnsBefore_; u++) {
195 tSize += (*insnBeforePt_)[u].size();
196 currAddr -= (*insnBeforePt_)[u].size();
197 if (owner()->isJumpTarget(currAddr)) {
198 // must remove instruction from point
199 // fprintf(stderr, "check instructions point 0x%lx, jmp to 0x%lx\n",
206 (*insnBeforePt_).resize(OKinsns);
208 // this is the address where we insert the jump
209 jumpAddr_ = currAddr;
211 // check instructions after point
212 currAddr = addr_ + insnAtPoint_.size();
214 unsigned insnsAfter_ = insnsAfter();
215 for (unsigned u = 0; tSize < maxSize && u < insnsAfter_; u++) {
216 if (owner()->isJumpTarget(currAddr))
219 unsigned size = (*insnAfterPt_)[u].size();
224 (*insnAfterPt_).resize(OKinsns);
227 if (tSize < maxSize) {
228 tSize = insnAtPoint_.size();
230 if (insnBeforePt_) (*insnBeforePt_).resize(0);
231 if (insnAfterPt_) (*insnAfterPt_).resize(0);
236 /* PT is an instrumentation point. ENTRY is the entry point for the
237 same function, and EXITS are the exit instrumentation points for
238 the function. Returns true if this function supports an extra slot
239 and PT can use it. */
241 _canUseExtraSlot(const instPoint *pt, const instPoint *entry,
242 const vector<instPoint*> &exits)
244 if (entry->size() < 2*JUMP_SZ)
247 // We get 10 bytes for the entry points, instead of the usual five,
248 // so that we have space for an extra jump. We can then insert a
249 // jump to the basetramp in the second slot of the base tramp
250 // and use a short 2-byte jump from the point to the second jump.
251 // We adopt the following rule: Only one point in the function
252 // can use the indirect jump, and this is the first return point
253 // with a size that is less than five bytes
255 for (unsigned u = 0; u < exits.size(); u++)
256 if (exits[u] == pt) {
259 } else if (exits[u]->size() < JUMP_SZ)
264 /* The entry has a slot, the point can be used for a slot,
265 now see if the point can reach the slot. */
266 int displacement = entry->jumpAddr() + 5 - pt->jumpAddr();
267 assert(displacement < 0);
268 if (pt->size() >= 2 && (displacement-2) > SCHAR_MIN)
275 Returns true if we can use the extra slot for a jump at the entry point
276 to insert a jump to a base tramp at this point. */
277 bool instPoint::canUseExtraSlot(process *proc) const
279 return _canUseExtraSlot(this,
280 func()->funcEntry(proc),
281 func()->funcExits(proc));
284 /* ENTRY and EXITS are the entry and exit points of a function. PT
285 must be a point in the same function. Return true if PT requires a
286 trap to instrument. */
288 _usesTrap(const instPoint *pt,
289 const instPoint *entry,
290 const vector<instPoint*> &exits)
292 /* If this point is big enough to hold a 32-bit jump to any
293 basetramp, it doesn't need a trap. */
294 if (pt->size() >= JUMP_REL32_SZ)
297 /* If it can use the extra slot, it doesn't need a trap. */
298 if (_canUseExtraSlot(pt, entry, exits)) {
301 /* Otherwise it needs a trap. */
305 bool instPoint::usesTrap(process *proc) const
307 return _usesTrap(this, func()->funcEntry(proc), func()->funcExits(proc));
310 /**************************************************************
312 * machine dependent methods of pdFunction
314 **************************************************************/
316 // Determine if the called function is a "library" function or a "user" function
317 // This cannot be done until all of the functions have been seen, verified, and
320 void pd_Function::checkCallPoints() {
325 vector<instPoint*> non_lib;
327 for (i=0; i<calls.size(); ++i) {
328 /* check to see where we are calling */
332 if (!p->insnAtPoint().isCallIndir()) {
333 loc_addr = p->insnAtPoint().getTarget(p->address());
334 file()->exec()->addJumpTarget(loc_addr);
335 pd_Function *pdf = (file_->exec())->findFunction(loc_addr);
339 non_lib.push_back(p);
341 // if this is a call outside the fuction, keep it
342 if((loc_addr < getAddress(0))||(loc_addr > (getAddress(0)+size()))){
343 non_lib.push_back(p);
350 // Indirect call -- be conservative, assume it is a call to
351 // an unnamed user function
352 //assert(!p->callee());
354 non_lib.push_back(p);
361 // this function is not needed
362 Address pd_Function::newCallPoint(Address, const instruction,
363 const image *, bool &)
364 { assert(0); return 0; }
367 // see if we can recognize a jump table and skip it
368 // return the size of the table in tableSz.
369 bool checkJumpTable(image *im, instruction insn, Address addr,
374 const unsigned char *instr = insn.ptr();
377 the instruction usually used for jump tables is
378 jmp dword ptr [REG*4 + ADDR]
379 where ADDR is an immediate following the SIB byte.
380 The opcode is 0xFF and the MOD/RM byte is 0x24.
381 The SS field (bits 7 and 6) of SIB is 2, and the
382 base ( bits 2, 1, 0) is 5. The index bits (5,4,3)
385 if (instr[0] == 0xFF && instr[1] == 0x24 &&
386 ((instr[2] & 0xC0)>>6) == 2 && (instr[2] & 0x7) == 5) {
387 const Address tableBase = *(const Address *)(instr+3);
388 //fprintf(stderr, "Found jump table at 0x%lx 0x%lx\n",addr, tableBase);
389 // check if the table is right after the jump and inside the current function
390 if (tableBase > funcBegin && tableBase < funcEnd) {
391 // table is within function code
392 if (tableBase < addr+insn.size()) {
393 fprintf(stderr, "bad indirect jump at 0x%lx\n", addr);
395 } else if (tableBase > addr+insn.size()) {
396 // jump table may be at the end of the function code - adjust funcEnd
400 // skip the jump table
401 for (const unsigned *ptr = (const unsigned *)im->getPtrToInstruction(tableBase);
402 *ptr >= funcBegin && *ptr <= funcEnd; ptr++) {
403 //fprintf(stderr, " jump table entry = 0x%lx\n", *(unsigned *)ptr);
404 tableSz += sizeof(int);
408 // fprintf(stderr, "Ignoring external jump table at 0x%lx.\n", tableBase);
414 void checkIfRelocatable(instruction insn, bool &canBeRelocated) {
415 const unsigned char *instr = insn.ptr();
417 // Check if REG bits of ModR/M byte are 100 or 101 (Possible jump
419 if (instr[0] == 0xFF &&
420 ( ((instr[1] & 0x38)>>3) == 4 || ((instr[1] & 0x38)>>3) == 5 )) {
422 // function should not be relocated
423 canBeRelocated = false;
427 /* auxiliary data structures for function findInstPoints */
428 enum { EntryPt, CallPt, ReturnPt };
431 point_(): point(0), index(0), type(0) {};
432 point_(instPoint *p, unsigned i, unsigned t): point(p), index(i), type(t) {};
439 bool pd_Function::findInstPoints(const image *i_owner) {
440 // sorry this this hack, but this routine can modify the image passed in,
441 // which doesn't occur on other platforms --ari
442 image *owner = const_cast<image *>(i_owner); // const cast
445 //fprintf(stderr,"Function %s, size = %d\n", prettyName().string_of(), size());
449 #if defined(i386_unknown_solaris2_5)
450 /* On Solaris, this function is called when a signal handler
451 returns. If it requires trap-based instrumentation, it can foul
452 the handler return mechanism. So, better exclude it. */
453 if (prettyName() == "_setcontext" || prettyName() == "setcontext")
455 #endif /* i386_unknown_solaris2_5 */
457 // XXXXX kludge: these functions are called by DYNINSTgetCPUtime,
458 // they can't be instrumented or we would have an infinite loop
459 if (prettyName() == "gethrvtime" || prettyName() == "_divdi3"
460 || prettyName() == "GetProcessTimes")
463 point_ *points = new point_[size()];
464 //point_ *points = (point_ *)alloca(size()*sizeof(point));
465 unsigned npoints = 0;
467 const unsigned char *instr = (const unsigned char *)owner->getPtrToInstruction(getAddress(0));
468 Address adr = getAddress(0);
469 unsigned numInsns = 0;
474 // keep a buffer with all the instructions in this function
475 instruction *allInstr = new instruction[size()+5];
476 //instruction *allInstr = (instruction *)alloca((size()+5)*sizeof(instruction));
478 // define the entry point
479 insnSize = insn.getNextInstruction(instr);
480 instPoint *p = new instPoint(this, owner, adr, insn);
482 points[npoints++] = point_(p, numInsns, EntryPt);
484 // check if the entry point contains another point
485 if (insn.isJumpDir()) {
486 Address target = insn.getTarget(adr);
487 owner->addJumpTarget(target);
488 if (target < getAddress(0) || target >= getAddress(0) + size()) {
489 // jump out of function
490 // this is an empty function
495 } else if (insn.isReturn()) {
496 // this is an empty function
500 } else if (insn.isCall()) {
501 // TODO: handle calls at entry point
502 // call at entry point
503 //instPoint *p = new instPoint(this, owner, adr, insn);
505 //points[npoints++] = point_(p, numInsns, CallPt);
506 //fprintf(stderr,"Function %s, call at entry point\n", prettyName().string_of());
513 allInstr[numInsns] = insn;
518 // get all the instructions for this function, and define the instrumentation
519 // points. For now, we only add one instruction to each point.
520 // Additional instructions, for the points that need them, will be added later.
522 #ifdef BPATCH_LIBRARY
523 if (BPatch::bpatch->hasForcedRelocation_NP()) {
528 // checkJumpTable will set canBeRelocated = false if their is a jump to a
529 // jump table inside this function.
530 bool canBeRelocated = true;
532 Address funcEnd = getAddress(0) + size();
533 for ( ; adr < funcEnd; instr += insnSize, adr += insnSize) {
534 insnSize = insn.getNextInstruction(instr);
535 assert(insnSize > 0);
537 if (adr + insnSize > funcEnd) {
541 if (insn.isJumpIndir()) {
542 unsigned jumpTableSz;
544 // check if function should be allowed to be relocated
545 checkIfRelocatable(insn, canBeRelocated);
547 // check for jump table. This may update funcEnd
548 if (!checkJumpTable(owner, insn, adr, getAddress(0), funcEnd, jumpTableSz)) {
551 //fprintf(stderr,"Function %s, size = %d, bad jump table\n",
552 // prettyName().string_of(), size());
556 // process the jump instruction
557 allInstr[numInsns] = insn;
560 if (jumpTableSz > 0) {
561 // skip the jump table
562 // insert an illegal instruction with the size of the jump table
563 insn = instruction(instr, ILLEGAL, jumpTableSz);
564 allInstr[numInsns] = insn;
566 insnSize += jumpTableSz;
568 } else if (insn.isJumpDir()) {
569 // check for jumps out of this function
570 Address target = insn.getTarget(adr);
571 owner->addJumpTarget(target);
572 if (target < getAddress(0) || target >= getAddress(0) + size()) {
573 // jump out of function
574 instPoint *p = new instPoint(this, owner, adr, insn);
575 funcReturns.push_back(p);
576 points[npoints++] = point_(p, numInsns, ReturnPt);
578 } else if (insn.isReturn()) {
579 instPoint *p = new instPoint(this, owner, adr, insn);
580 funcReturns.push_back(p);
581 points[npoints++] = point_(p, numInsns, ReturnPt);
583 } else if (insn.isCall()) {
584 // calls to adr+5 are not really calls, they are used in dynamically linked
585 // libraries to get the address of the code.
586 // We skip them here.
587 if (insn.getTarget(adr) != adr + 5) {
588 instPoint *p = new instPoint(this, owner, adr, insn);
590 points[npoints++] = point_(p, numInsns, CallPt);
592 // Temporary: Currently we can't relocate a function if it
593 // contains a call to adr+5
594 //canBeRelocated = false;
598 allInstr[numInsns] = insn;
600 assert(npoints < size());
601 assert(numInsns <= size());
605 // there are often nops after the end of the function. We get them here,
606 // since they may be usefull to instrument the return point
607 for (u = 0; u < 4; u++) {
608 if (owner->isValidAddress(adr)) {
609 insnSize = insn.getNextInstruction(instr);
611 allInstr[numInsns] = insn;
613 assert(numInsns < size()+5);
623 // add extra instructions to the points that need it.
624 unsigned lastPointEnd = 0;
625 unsigned thisPointEnd = 0;
626 for (u = 0; u < npoints; u++) {
627 instPoint *p = points[u].point;
628 unsigned index = points[u].index;
629 unsigned type = points[u].type;
630 lastPointEnd = thisPointEnd;
631 thisPointEnd = index;
633 // add instructions before the point
634 unsigned size = p->size();
635 for (int u1 = index-1; size < JUMP_SZ && u1 >= 0 && u1 > (int)lastPointEnd; u1--) {
636 if (!allInstr[u1].isCall()) {
637 p->addInstrBeforePt(allInstr[u1]);
638 size += allInstr[u1].size();
643 lastPointEnd = index;
645 // add instructions after the point
646 if (type == ReturnPt && p->address() == funcEnd-1) {
648 /* If an instrumentation point at the end of the function does
649 not end on a 4-byte boundary, we claim the bytes up to the
650 next 4-byte boundary as "bonus bytes" for the point, since
651 the next function will (should) begin at or past the
652 boundary. We tried 8 byte boundaries and found some
653 functions did not begin on 8-byte aligned addresses, so 8 is
656 #ifndef i386_unknown_nt4_0
657 bonus = (funcEnd % 4) ? (4 - (funcEnd % 4)) : 0;
659 /* Unfortunately, the Visual C++ compiler generates functions
660 that begin at unaligned boundaries, so forget this scheme on
663 #endif /* i386_unknown_nt4_0 */
664 //p->setBonusBytes(bonus);
666 for (u1 = index+1 + bonus; u1 < index+JUMP_SZ-1 && u1 < numInsns; u1++) {
667 if (allInstr[u1].isNop() || *(allInstr[u1].ptr()) == 0xCC) {
668 //p->addInstrAfterPt(allInstr[u1]);
675 p->setBonusBytes(bonus);
676 } else if (type == ReturnPt) {
677 // normally, we would not add instructions after the return, but the
678 // compilers often add nops after the return, and we can use them if necessary
679 for (unsigned u1 = index+1; u1 < index+JUMP_SZ-1 && u1 < numInsns; u1++) {
680 if (allInstr[u1].isNop() || *(allInstr[u1].ptr()) == 0xCC) {
681 p->addInstrAfterPt(allInstr[u1]);
689 unsigned maxSize = JUMP_SZ;
690 if (type == EntryPt) maxSize = 2*JUMP_SZ;
691 for (unsigned u1 = index+1; size < maxSize && u1 <= numInsns; u1++) {
692 if (((u+1 == npoints) || (u+1 < npoints && points[u+1].index > u1))
693 && !allInstr[u1].isCall()) {
694 p->addInstrAfterPt(allInstr[u1]);
695 size += allInstr[u1].size();
705 for (u = 0; u < npoints; u++)
706 points[u].point->checkInstructions();
708 // create and sort vector of instPoints
709 vector<instPoint*> foo;
710 sorted_ips_vector(foo);
712 for (unsigned i=0;i<foo.size();i++) {
714 if (_usesTrap(foo[i], funcEntry_, funcReturns) && size() >= 5) {
719 // if the function contains a jump to a jump table, we can't relocate
721 if ( !canBeRelocated ) {
723 // Function would have needed relocation
724 if (relocatable_ == true) {
726 #ifdef DEBUG_FUNC_RELOC
727 cerr << prettyName() << endl;
728 cerr << "Jump Table: Can't relocate function" << endl;
731 relocatable_ = false;
743 * Given an instruction, relocate it to a new address, patching up
744 * any relative addressing that is present.
745 * The instruction may need to be replaced with a different size instruction
746 * or with multiple instructions.
747 * Return the size of the new instruction(s)
749 unsigned relocateInstruction(instruction insn,
750 int origAddr, int newAddr,
751 unsigned char *&newInsn)
754 Relative address instructions need to be modified. The relative address
755 can be a 8, 16, or 32-byte displacement relative to the next instruction.
756 Since we are relocating the instruction to a different area, we have
757 to replace 8 and 16-byte displacements with 32-byte displacements.
759 All relative address instructions are one or two-byte opcode followed
760 by a displacement relative to the next instruction:
762 CALL rel16 / CALL rel32
763 Jcc rel8 / Jcc rel16 / Jcc rel32
764 JMP rel8 / JMP rel16 / JMP rel32
766 The only two-byte opcode instructions are the Jcc rel16/rel32,
767 all others have one byte opcode.
769 The instruction JCXZ/JECXZ rel8 does not have an equivalent with rel32
770 displacement. We must generate code to emulate this instruction:
776 A0: JCXZ 2 (jump to A4)
777 A2: JMP 5 (jump to A9)
778 A4: JMP rel32 (relocated displacement)
783 const unsigned char *origInsn = insn.ptr();
784 unsigned insnType = insn.type();
785 unsigned insnSz = insn.size();
786 unsigned char *first = newInsn;
791 if (insnType & REL_B) {
792 /* replace with rel32 instruction, opcode is one byte. */
793 if (*origInsn == JCXZ) {
794 oldDisp = (int)*(const char *)(origInsn+1);
795 newDisp = (origAddr + 2) + oldDisp - (newAddr + 9);
796 *newInsn++ = *origInsn; *(newInsn++) = 2; // jcxz 2
797 *newInsn++ = 0xEB; *newInsn++ = 5; // jmp 5
798 *newInsn++ = 0xE9; // jmp rel32
799 *((int *)newInsn) = newDisp;
800 newInsn += sizeof(int);
803 unsigned newSz=UINT_MAX;
804 if (insnType & IS_JCC) {
805 /* Change a Jcc rel8 to Jcc rel32.
806 Must generate a new opcode: a 0x0F followed by (old opcode + 16) */
807 unsigned char opcode = *origInsn++;
809 *newInsn++ = opcode + 0x10;
812 else if (insnType & IS_JUMP) {
813 /* change opcode to 0xE9 */
818 assert(newSz!=UINT_MAX);
819 oldDisp = (int)*(const char *)origInsn;
820 newDisp = (origAddr + 2) + oldDisp - (newAddr + newSz);
821 *((int *)newInsn) = newDisp;
822 newInsn += sizeof(int);
825 else if (insnType & REL_W) {
827 if (insnType & PREFIX_OPR)
829 if (insnType & PREFIX_SEG)
831 /* opcode is unchanged, just relocate the displacement */
832 if (*origInsn == (unsigned char)0x0F)
833 *newInsn++ = *origInsn++;
834 *newInsn++ = *origInsn++;
835 oldDisp = *((const short *)origInsn);
836 newDisp = (origAddr + 5) + oldDisp - (newAddr + 3);
837 *((int *)newInsn) = newDisp;
838 newInsn += sizeof(int);
839 } else if (insnType & REL_D) {
841 unsigned nPrefixes = 0;
842 if (insnType & PREFIX_OPR)
844 if (insnType & PREFIX_SEG)
846 for (unsigned u = 0; u < nPrefixes; u++)
847 *newInsn++ = *origInsn++;
849 /* opcode is unchanged, just relocate the displacement */
850 if (*origInsn == 0x0F)
851 *newInsn++ = *origInsn++;
852 *newInsn++ = *origInsn++;
853 oldDisp = *((const int *)origInsn);
854 newDisp = (origAddr + insnSz) + oldDisp - (newAddr + insnSz);
855 *((int *)newInsn) = newDisp;
856 newInsn += sizeof(int);
859 /* instruction is unchanged */
860 for (unsigned u = 0; u < insnSz; u++)
861 *newInsn++ = *origInsn++;
864 return (newInsn - first);
869 * Relocate a conditional jump and change the target to newTarget.
870 * The new target must be within 128 bytes from the new address
871 * Size of instruction is unchanged.
872 * Returns the old target
874 unsigned changeConditionalJump(instruction insn,
875 int origAddr, int newAddr, int newTargetAddr,
876 unsigned char *&newInsn)
879 const unsigned char *origInsn = insn.ptr();
880 unsigned insnType = insn.type();
881 unsigned insnSz = insn.size();
886 if (insnType & REL_B) {
887 /* one byte opcode followed by displacement */
888 /* opcode is unchanged */
890 *newInsn++ = *origInsn++;
891 oldDisp = (int)*(const char *)origInsn;
892 newDisp = newTargetAddr - (newAddr + insnSz);
893 *newInsn++ = (char)newDisp;
895 else if (insnType & REL_W) {
897 if (insnType & PREFIX_OPR)
898 *newInsn++ = *origInsn++;
899 if (insnType & PREFIX_SEG)
900 *newInsn++ = *origInsn++;
902 assert(*origInsn==0x0F);
903 *newInsn++ = *origInsn++; // copy the 0x0F
904 *newInsn++ = *origInsn++; // second opcode byte
906 oldDisp = *((const short *)origInsn);
907 newDisp = newTargetAddr - (newAddr + insnSz);
908 *((short *)newInsn) = (short)newDisp;
909 newInsn += sizeof(short);
911 else if (insnType & REL_D) {
913 if (insnType & PREFIX_OPR)
914 *newInsn++ = *origInsn++;
915 if (insnType & PREFIX_SEG)
916 *newInsn++ = *origInsn++;
918 assert(*origInsn==0x0F);
919 *newInsn++ = *origInsn++; // copy the 0x0F
920 *newInsn++ = *origInsn++; // second opcode byte
922 oldDisp = *((const int *)origInsn);
923 newDisp = newTargetAddr - (newAddr + insnSz);
924 *((int *)newInsn) = (int)newDisp;
925 newInsn += sizeof(int);
928 assert (oldDisp!=-1);
929 return (origAddr+insnSz+oldDisp);
934 unsigned getRelocatedInstructionSz(instruction insn)
936 const unsigned char *origInsn = insn.ptr();
937 unsigned insnType = insn.type();
938 unsigned insnSz = insn.size();
940 if (insnType & REL_B) {
941 if (*origInsn == JCXZ)
944 if (insnType & IS_JCC)
946 else if (insnType & IS_JUMP) {
951 else if (insnType & REL_W) {
958 registerSpace *regSpace;
961 bool registerSpace::readOnlyRegister(Register) {
966 We don't use the machine registers to store temporaries,
967 but "virtual registers" that are located on the stack.
968 The stack frame for a tramp is:
970 ebp-> saved ebp (4 bytes)
971 ebp-4: 128-byte space for 32 virtual registers (32*4 bytes)
972 ebp-132: saved registers (8*4 bytes)
973 ebp-164: saved flags registers (4 bytes)
975 The temporaries are assigned numbers from 1 so that it is easier
976 to refer to them: -(reg*4)[ebp]. So the first reg is -4[ebp].
978 We are using a fixed number of temporaries now (32), but we could
979 change to using an arbitrary number.
982 Register deadList[NUM_VIRTUAL_REGISTERS];
983 int deadListSize = sizeof(deadList);
987 static bool inited=false;
992 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
993 for (unsigned u = 0; u < NUM_VIRTUAL_REGISTERS-1; u++) {
995 for (unsigned u = 0; u < NUM_VIRTUAL_REGISTERS; u++) {
1000 regSpace = new registerSpace(deadListSize/sizeof(Register), deadList,
1005 void emitJump(unsigned disp32, unsigned char *&insn);
1006 void emitSimpleInsn(unsigned opcode, unsigned char *&insn);
1007 void emitMovRegToReg(Register dest, Register src, unsigned char *&insn);
1008 void emitAddMemImm32(Address dest, int imm, unsigned char *&insn);
1009 void emitAddRegImm32(Register dest, int imm, unsigned char *&insn);
1010 void emitOpRegImm(int opcode, Register dest, int imm, unsigned char *&insn);
1011 void emitMovRegToRM(Register base, int disp, Register src, unsigned char *&insn);
1012 void emitMovRMToReg(Register dest, Register base, int disp, unsigned char *&insn);
1013 void emitCallRel32(unsigned disp32, unsigned char *&insn);
1017 * change the insn at addr to be a branch to newAddr.
1018 * Used to add multiple tramps to a point.
1020 void generateBranch(process *proc, Address fromAddr, Address newAddr)
1022 unsigned char inst[JUMP_REL32_SZ+1];
1023 unsigned char *insn = inst;
1024 emitJump(newAddr - (fromAddr + JUMP_REL32_SZ), insn);
1025 proc->writeTextSpace((caddr_t)fromAddr, JUMP_REL32_SZ, (caddr_t)inst);
1029 bool insertInTrampTable(process *proc, unsigned key, unsigned val) {
1032 // check for overflow of the tramp table.
1033 // stop at 95% capacicty to ensure good performance
1034 if (proc->trampTableItems == (TRAMPTABLESZ - TRAMPTABLESZ/20))
1036 proc->trampTableItems++;
1037 for (u = HASH1(key); proc->trampTable[u].key != 0;
1038 u = (u + HASH2(key)) % TRAMPTABLESZ)
1040 proc->trampTable[u].key = key;
1041 proc->trampTable[u].val = val;
1043 #if !defined(i386_unknown_nt4_0)
1045 Address addr = proc->findInternalAddress("DYNINSTtrampTable",true, err);
1047 return proc->writeDataSpace((caddr_t)addr+u*sizeof(trampTableEntry),
1048 sizeof(trampTableEntry),
1049 (caddr_t)&(proc->trampTable[u]));
1055 /* Generate a jump to a base tramp. Return the size of the instruction
1056 generated at the instrumentation point. */
1057 unsigned generateBranchToTramp(process *proc, const instPoint *point,
1058 Address baseAddr, Address imageBaseAddr,
1059 unsigned char *insn, bool &deferred)
1061 /* There are three ways to get to the base tramp:
1062 1. Ordinary 5-byte jump instruction.
1063 2. 2-byte jump to the extra slot in the entry point
1064 3. Trap instruction.
1067 /* Ordinary 5-byte jump */
1068 if (point->size() >= JUMP_REL32_SZ) {
1069 // replace instructions at point with jump to base tramp
1070 emitJump(baseAddr - (point->jumpAddr() + imageBaseAddr + JUMP_REL32_SZ), insn);
1071 return JUMP_REL32_SZ;
1075 if (point->canUseExtraSlot(proc)) {
1076 pd_Function *f = point->func();
1077 const instPoint *the_entry = f->funcEntry(proc);
1079 int displacement = the_entry->jumpAddr() + 5 - point->jumpAddr();
1080 assert(displacement < 0);
1081 assert((displacement-2) > SCHAR_MIN);
1082 assert(point->size() >= 2);
1083 #ifdef INST_TRAP_DEBUG
1084 cerr << "Using extra slot in entry of " << f->prettyName()
1085 << " to avoid need for trap @" << (void*)point->address() << endl;
1088 instPoint *nonConstEntry = const_cast<instPoint *>(the_entry);
1089 returnInstance *retInstance;
1091 trampTemplate *entryBase =
1092 findAndInstallBaseTramp(proc, nonConstEntry,
1093 retInstance, false, false, deferred);
1096 retInstance->installReturnInstance(proc);
1098 generateBranch(proc, the_entry->jumpAddr()+imageBaseAddr+5, baseAddr);
1100 *insn++ = (char)(displacement-2);
1105 #ifdef INST_TRAP_DEBUG
1106 cerr << "Warning: unable to insert jump in function "
1107 << point->func()->prettyName() << " @" << (void*)point->address()
1108 << ". Using trap!" << endl;
1110 if (!insertInTrampTable(proc, point->jumpAddr()+imageBaseAddr, baseAddr))
1116 /****************************************************************************/
1117 /****************************************************************************/
1118 /****************************************************************************/
1120 #if defined(i386_unknown_solaris2_5) || \
1121 defined(i386_unknown_linux2_0) || \
1122 defined(i386_unknown_nt4_0)
1123 int guard_code_before_pre_instr_size = 19; /* size in bytes */
1124 int guard_code_after_pre_instr_size = 10; /* size in bytes */
1125 int guard_code_before_post_instr_size = 19; /* size in bytes */
1126 int guard_code_after_post_instr_size = 10; /* size in bytes */
1128 int guard_code_before_pre_instr_size = 0; /* size in bytes */
1129 int guard_code_after_pre_instr_size = 0; /* size in bytes */
1130 int guard_code_before_post_instr_size = 0; /* size in bytes */
1131 int guard_code_after_post_instr_size = 0; /* size in bytes */
1134 /****************************************************************************/
1135 /****************************************************************************/
1136 /****************************************************************************/
1138 unsigned int get_guard_code_size() /* total size in bytes of the four
1139 guard code fragments*/
1142 guard_code_before_pre_instr_size +
1143 guard_code_after_pre_instr_size +
1144 guard_code_before_post_instr_size +
1145 guard_code_after_post_instr_size;
1148 /****************************************************************************/
1149 /****************************************************************************/
1150 /****************************************************************************/
1152 void emitJccR8( int condition_code,
1154 unsigned char * & instruction )
1156 *instruction++ = condition_code;
1157 *instruction++ = jump_offset;
1160 /****************************************************************************/
1161 /****************************************************************************/
1162 /****************************************************************************/
1164 #if defined(i386_unknown_solaris2_5) || \
1165 defined(i386_unknown_linux2_0) || \
1166 defined(i386_unknown_nt4_0)
1167 void generate_guard_code( unsigned char * buffer,
1168 const NonRecursiveTrampTemplate & base_template,
1169 Address /* base_address */,
1170 Address guard_flag_address )
1172 unsigned char * instruction;
1174 /* guard-on code before pre instr */
1177 * cmpl $0x0, (guard flag address)
1178 * je <after guard-off code>
1179 * movl $0x0, (guard flag address)
1181 instruction = buffer + base_template.guardOnPre_beginOffset;
1182 /* CMP_ (memory address)__ 0 */
1183 emitOpRMImm8( 0x83, 0x07, Null_Register, guard_flag_address, 0, instruction );
1184 emitJccR8( JE_R8, buffer + base_template.guardOffPre_endOffset - ( instruction + 2 ), instruction );
1185 emitMovImmToMem( guard_flag_address, 0, instruction );
1187 /* guard-off code after pre instr */
1190 * movl $0x1, (guard flag address)
1192 instruction = buffer + base_template.guardOffPre_beginOffset;
1193 emitMovImmToMem( guard_flag_address, 1, instruction );
1195 /* guard-on code before post instr */
1196 instruction = buffer + base_template.guardOnPost_beginOffset;
1197 emitOpRMImm8( 0x83, 0x07, Null_Register, guard_flag_address, 0x00, instruction );
1198 emitJccR8( JE_R8, buffer + base_template.guardOffPost_endOffset - ( instruction + 2 ), instruction );
1199 emitMovImmToMem( guard_flag_address, 0, instruction );
1201 /* guard-off code after post instr */
1202 instruction = buffer + base_template.guardOffPost_beginOffset;
1203 emitMovImmToMem( guard_flag_address, 1, instruction );
1206 void generate_guard_code( unsigned char * /* buffer */,
1207 const NonRecursiveTrampTemplate & /* base_template */,
1208 Address /* base_address */,
1209 Address /* guard_flag_address */ )
1214 /****************************************************************************/
1215 /****************************************************************************/
1216 /****************************************************************************/
1219 * Install a base tramp, relocating the instructions at location
1220 * The pre and post jumps are filled with a 'jump 0'
1221 * Return a descriptor for the base tramp.
1225 trampTemplate *installBaseTramp( const instPoint *location,
1228 bool trampRecursiveDesired = true
1235 addr instruction cost
1236 0: <relocated instructions before point>
1237 a = size of relocated instructions before point
1238 a+0: jmp a+30 <skip pre insn> 1
1241 a+8: subl esp, 0x80 1
1244 a+16: jmp <global pre inst> 1
1245 a+21: jmp <local pre inst> 1
1249 a+29: add costAddr, cost 3
1250 a+39: <relocated instructions at point>
1252 b = a +30 + size of relocated instructions at point
1253 b+0: jmp b+30 <skip post insn>
1259 b+16: jmp <global post inst>
1260 b+21: jmp <local post inst>
1264 b+29: <relocated instructions after point>
1266 c: jmp <return to user code>
1268 tramp size = 2*23 + 10 + 5 + size of relocated instructions
1269 Make sure to update the size if the tramp is changed
1271 cost of pre and post instrumentation is (1+1+1+5+9+1+1+15+5+3) = 42
1272 cost of rest of tramp is (1+3+1+1)
1274 [mihai Wed Apr 12 00:22:03 CDT 2000]
1275 Additionally, if a guarded template is generated
1276 (i.e. trampRecursiveDesired = false), four more code fragments are inserted:
1277 - code to turn the guard on (19 bytes): between a+15 and a+16, and
1278 between b+15 and b+16
1279 - code to turn the guard off (10 bytes): between a+21 and a+26, and
1280 between b+21 and b+26
1281 A total of 58 bytes are added to the base tramp.
1285 trampTemplate *ret = 0;
1286 if( trampRecursiveDesired )
1288 ret = new trampTemplate;
1292 ret = new NonRecursiveTrampTemplate;
1296 unsigned jccTarget = 0; // used when the instruction at the point is a cond. jump
1297 unsigned auxJumpOffset = 0;
1299 // compute the tramp size
1300 // if there are any changes to the tramp, the size must be updated.
1301 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1302 unsigned trampSize = 73+2*27 + 66;
1304 unsigned trampSize = 73;
1307 if( ! trampRecursiveDesired )
1309 trampSize += get_guard_code_size(); // NOTE-131 with Relocation
1312 for (u = 0; u < location->insnsBefore(); u++) {
1313 trampSize += getRelocatedInstructionSz(location->insnBeforePt(u));
1315 if (location->insnAtPoint().type() & IS_JCC)
1316 trampSize += location->insnAtPoint().size() + 2 * JUMP_SZ;
1318 trampSize += getRelocatedInstructionSz(location->insnAtPoint());
1319 for (u = 0; u < location->insnsAfter(); u++) {
1320 trampSize += getRelocatedInstructionSz(location->insnAfterPt(u));
1323 Address imageBaseAddr;
1324 if (!proc->getBaseAddress(location->owner(), imageBaseAddr)) {
1328 Address costAddr = 0; // for now...
1331 costAddr = (Address)proc->getObsCostLowAddrInApplicSpace();
1334 // get address of DYNINSTobsCostLow to update observed cost
1336 costAddr = proc->findInternalAddress("DYNINSTobsCostLow",
1338 assert(costAddr && !err);
1342 ret->size = trampSize;
1343 Address baseAddr = inferiorMalloc(proc, trampSize, textHeap);
1344 // cout << "installBaseTramp(): trampoline base address = 0x"
1345 // << setw( 8 ) << setfill( '0' ) << hex << baseAddr << dec << endl;
1346 ret->baseAddr = baseAddr;
1348 unsigned char *code = new unsigned char[2*trampSize];
1349 unsigned char *insn = code;
1350 Address currAddr = baseAddr;
1352 // get the current instruction that is being executed. If the PC is at a
1353 // instruction that is being relocated, we must change the PC.
1354 Address currentPC = proc->currentPC();
1356 // emulate the instructions before the point
1357 Address origAddr = location->jumpAddr() + imageBaseAddr;
1358 for (u = location->insnsBefore(); u > 0; ) {
1360 if (currentPC == origAddr) {
1361 //fprintf(stderr, "changed PC: 0x%lx to 0x%lx\n", currentPC, currAddr);
1362 proc->setNewPC(currAddr);
1365 unsigned newSize = relocateInstruction(location->insnBeforePt(u), origAddr, currAddr, insn);
1366 aflag=(newSize == getRelocatedInstructionSz(location->insnBeforePt(u)));
1368 currAddr += newSize;
1369 origAddr += location->insnBeforePt(u).size();
1374 If the instruction at the point is a conditional jump, we relocate it to
1375 the top of the base tramp, and change the code so that the tramp is executed
1376 only if the branch is taken.
1389 T2: relocated instructions after point
1391 then later at the base tramp, at the point where we relocate the instruction
1392 at the point, we insert a jump to target
1394 if (location->insnAtPoint().type() & IS_JCC) {
1395 currAddr = baseAddr + (insn - code);
1396 assert(origAddr == location->address() + imageBaseAddr);
1397 origAddr = location->address() + imageBaseAddr;
1398 if (currentPC == origAddr &&
1399 currentPC != (location->jumpAddr() + imageBaseAddr)) {
1400 //fprintf(stderr, "changed PC: 0x%lx to 0x%lx\n", currentPC, currAddr);
1401 proc->setNewPC(currAddr);
1404 jccTarget = changeConditionalJump(location->insnAtPoint(), origAddr, currAddr,
1405 currAddr+location->insnAtPoint().size()+5, insn);
1406 currAddr += location->insnAtPoint().size();
1407 auxJumpOffset = insn-code;
1409 origAddr += location->insnAtPoint().size();
1413 // skip pre instrumentation
1414 ret->skipPreInsOffset = insn-code;
1417 // save registers and create a new stack frame for the tramp
1418 ret->savePreInsOffset = insn-code;
1419 emitSimpleInsn(PUSH_EBP, insn); // push ebp
1420 emitMovRegToReg(EBP, ESP, insn); // mov ebp, esp (2-byte instruction)
1421 // allocate space for temporaries (virtual registers)
1422 emitOpRegImm(5, ESP, 128, insn); // sub esp, 128
1423 emitSimpleInsn(PUSHAD, insn); // pushad
1424 emitSimpleInsn(PUSHFD, insn); // pushfd
1426 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1427 // generate preamble for MT version
1429 generateMTpreamble((char *)insn, base, proc);
1433 if( ! trampRecursiveDesired )
1435 NonRecursiveTrampTemplate * temp_ret = ( NonRecursiveTrampTemplate * )ret;
1436 temp_ret->guardOnPre_beginOffset = insn - code;
1437 for( int i = 0; i < guard_code_before_pre_instr_size; i++ )
1438 emitSimpleInsn( 0x90, insn );
1439 temp_ret->guardOnPre_endOffset = insn - code;
1442 // global pre branch
1443 ret->globalPreOffset = insn-code;
1447 ret->localPreOffset = insn-code;
1450 ret->localPreReturnOffset = insn-code;
1452 if( ! trampRecursiveDesired )
1454 NonRecursiveTrampTemplate * temp_ret = ( NonRecursiveTrampTemplate * )ret;
1455 temp_ret->guardOffPre_beginOffset = insn - code;
1456 for( int i = 0; i < guard_code_after_pre_instr_size; i++ )
1457 emitSimpleInsn( 0x90, insn );
1458 temp_ret->guardOffPre_endOffset = insn - code;
1461 // restore registers
1462 emitSimpleInsn(POPFD, insn); // popfd
1463 emitSimpleInsn(POPAD, insn); // popad
1464 ret->restorePreInsOffset = insn-code;
1465 emitSimpleInsn(LEAVE, insn); // leave
1468 // update cost -- a 10-byte instruction
1469 ret->updateCostOffset = insn-code;
1470 currAddr = baseAddr + (insn-code);
1471 ret->costAddr = currAddr;
1473 emitAddMemImm32(costAddr, 88, insn); // add (costAddr), cost
1476 // minor hack: we still need to fill up the rest of the 10 bytes, since
1477 // assumptions are made about the positioning of instructions that follow.
1478 // (This could in theory be fixed)
1479 // So, 10 NOP instructions (each 1 byte)
1480 for (unsigned foo=0; foo < 10; foo++)
1481 emitSimpleInsn(0x90, insn); // NOP
1485 if (!(location->insnAtPoint().type() & IS_JCC)) {
1486 // emulate the instruction at the point
1487 ret->emulateInsOffset = insn-code;
1488 currAddr = baseAddr + (insn - code);
1489 assert(origAddr == location->address() + imageBaseAddr);
1490 origAddr = location->address() + imageBaseAddr;
1491 if (currentPC == origAddr &&
1492 currentPC != (location->jumpAddr() + imageBaseAddr)) {
1493 //fprintf(stderr, "changed PC: 0x%lx to 0x%lx\n", currentPC, currAddr);
1494 proc->setNewPC(currAddr);
1497 unsigned newSize = relocateInstruction(location->insnAtPoint(), origAddr, currAddr, insn);
1498 aflag=(newSize == getRelocatedInstructionSz(location->insnAtPoint()));
1500 currAddr += newSize;
1501 origAddr += location->insnAtPoint().size();
1503 // instruction at point is a conditional jump.
1504 // The instruction was relocated to the beggining of the tramp (see comments above)
1505 // We must generate a jump to the original target here
1506 assert(jccTarget > 0);
1507 currAddr = baseAddr + (insn - code);
1508 emitJump(jccTarget-(currAddr+JUMP_SZ), insn);
1509 currAddr += JUMP_SZ;
1513 // skip post instrumentation
1514 ret->skipPostInsOffset = insn-code;
1518 // save registers and create a new stack frame for the tramp
1519 ret->savePostInsOffset = insn-code;
1520 emitSimpleInsn(PUSH_EBP, insn); // push ebp
1521 emitMovRegToReg(EBP, ESP, insn); // mov ebp, esp
1522 // allocate space for temporaries (virtual registers)
1523 emitOpRegImm(5, ESP, 128, insn); // sub esp, 128
1524 emitSimpleInsn(PUSHAD, insn); // pushad
1525 emitSimpleInsn(PUSHFD, insn); // pushfd
1527 #if defined(SHM_SAMPLING) && defined(MT_THREAD)
1528 // generate preamble for MT version
1530 generateMTpreamble((char *)insn, base, proc);
1534 if( ! trampRecursiveDesired )
1536 NonRecursiveTrampTemplate * temp_ret = ( NonRecursiveTrampTemplate * )ret;
1537 temp_ret->guardOnPost_beginOffset = insn - code;
1538 for( int i = 0; i < guard_code_before_post_instr_size; i++ )
1539 emitSimpleInsn( 0x90, insn );
1540 temp_ret->guardOnPost_endOffset = insn - code;
1543 // global post branch
1544 ret->globalPostOffset = insn-code;
1547 // local post branch
1548 ret->localPostOffset = insn-code;
1551 ret->localPostReturnOffset = insn-code;
1553 if( ! trampRecursiveDesired )
1555 NonRecursiveTrampTemplate * temp_ret = ( NonRecursiveTrampTemplate * )ret;
1556 temp_ret->guardOffPost_beginOffset = insn - code;
1557 for( int i = 0; i < guard_code_after_post_instr_size; i++ )
1558 emitSimpleInsn( 0x90, insn );
1559 temp_ret->guardOffPost_endOffset = insn - code;
1562 // restore registers
1563 emitSimpleInsn(POPFD, insn); // popfd
1564 emitSimpleInsn(POPAD, insn); // popad
1565 ret->restorePostInsOffset = insn-code;
1566 emitSimpleInsn(LEAVE, insn); // leave
1568 // emulate the instructions after the point
1569 ret->returnInsOffset = insn-code;
1570 currAddr = baseAddr + (insn - code);
1571 assert(origAddr == location->address() + imageBaseAddr + location->insnAtPoint().size());
1572 origAddr = location->address() + imageBaseAddr + location->insnAtPoint().size();
1573 for (u = 0; u < location->insnsAfter(); u++) {
1574 if (currentPC == origAddr) {
1575 //fprintf(stderr, "changed PC: 0x%lx to 0x%lx\n", currentPC, currAddr);
1576 proc->setNewPC(currAddr);
1578 unsigned newSize = relocateInstruction(location->insnAfterPt(u), origAddr, currAddr, insn);
1579 aflag=(newSize == getRelocatedInstructionSz(location->insnAfterPt(u)));
1581 currAddr += newSize;
1582 origAddr += location->insnAfterPt(u).size();
1585 // return to user code
1586 currAddr = baseAddr + (insn - code);
1587 emitJump(location->returnAddr()+imageBaseAddr - (currAddr+JUMP_SZ), insn);
1588 #ifdef INST_TRAP_DEBUG
1589 cerr << "installBaseTramp jump back to " <<
1590 (void*)( location->returnAddr() + imageBaseAddr ) << endl;
1593 assert((unsigned)(insn-code) == trampSize);
1595 // update the jumps to skip pre and post instrumentation
1596 unsigned char *ip = code + ret->skipPreInsOffset;
1597 emitJump(ret->updateCostOffset - (ret->skipPreInsOffset+JUMP_SZ), ip);
1598 ip = code + ret->skipPostInsOffset;
1599 emitJump(ret->returnInsOffset - (ret->skipPostInsOffset+JUMP_SZ), ip);
1601 if (auxJumpOffset > 0) {
1602 ip = code + auxJumpOffset;
1603 emitJump(ret->returnInsOffset - (auxJumpOffset+JUMP_SZ), ip);
1606 if( ! trampRecursiveDesired )
1608 /* prepare guard flag memory, if needed */
1609 Address guardFlagAddress = proc->getTrampGuardFlagAddr();
1610 if( guardFlagAddress == 0 )
1612 int initial_value = 1;
1614 guardFlagAddress = inferiorMalloc( proc, sizeof( int ), dataHeap );
1615 // cout << "installBaseTramp(): flag address = 0x"
1616 // << setw( 8 ) << setfill( '0' ) << hex << guardFlagAddress << dec << endl;
1618 /* Initialize the new value */
1619 proc->writeDataSpace( ( void * )guardFlagAddress, sizeof( int ), & initial_value );
1621 proc->setTrampGuardFlagAddr( guardFlagAddress );
1624 NonRecursiveTrampTemplate * temp_ret = ( NonRecursiveTrampTemplate * )ret;
1625 generate_guard_code( code, * temp_ret, baseAddr, guardFlagAddress );
1628 // put the tramp in the application space
1629 proc->writeDataSpace((caddr_t)baseAddr, insn-code, (caddr_t) code);
1635 // The cost for generateMTpreamble is 25 for pre and post instrumentation:
1636 // movl $0x80570ec,%eax 1
1638 // movl %eax,0xfffffffc(%ebp) 1
1639 // shll $0x2,0xfffffffc(%ebp) 12
1640 // addl $0x84ac670,0xfffffffc(%ebp) 4
1641 // movl 0xfffffffc(%ebp),%eax 1
1642 // movl %eax,0xffffff80(%ebp) 1
1644 ret->prevBaseCost = 42+25;
1645 ret->postBaseCost = 42+25;
1646 ret->prevInstru = false;
1647 ret->postInstru = false;
1652 // This function is used to clear a jump from base to minitramps
1653 // For the x86 platform, we generate a jump to the next instruction
1654 void generateNoOp(process *proc, Address addr)
1656 static unsigned char jump0[5] = { 0xE9, 0, 0, 0, 0 };
1657 proc->writeDataSpace((caddr_t) addr, 5, (caddr_t)jump0);
1661 trampTemplate* findAndInstallBaseTramp(process *proc,
1662 instPoint *&location,
1663 returnInstance *&retInstance,
1664 bool trampRecursiveDesired,
1671 pd_Function *f = location->func();
1673 // location may not have been updated since relocation of function
1674 if (f->needsRelocation() && f->isInstalled(proc)) {
1675 f->modifyInstPoint(const_cast<const instPoint *&>(location), proc);
1678 if (!proc->baseMap.defines(location)) {
1680 // if function needs relocation
1681 if (f->needsRelocation()) {
1683 // if function has not already been relocated
1684 if (!f->isInstalled(proc)) {
1685 bool relocated = f->relocateFunction(proc, location, deferred);
1688 assert(relocated || true);
1690 #ifndef BPATCH_LIBRARY
1691 if (!relocated) return NULL;
1696 ret = installBaseTramp(location, proc, noCost, trampRecursiveDesired);
1697 proc->baseMap[location] = ret;
1699 // generate branch from instrumentation point to base tramp
1700 Address imageBaseAddr;
1701 if (!proc->getBaseAddress(location->owner(), imageBaseAddr))
1703 unsigned char *insn = new unsigned char[JUMP_REL32_SZ];
1704 unsigned size = generateBranchToTramp(proc, location, ret->baseAddr,
1705 imageBaseAddr, insn, deferred);
1708 retInstance = new returnInstance(location->insns(),
1709 new instruction(insn, 0, size), size,
1710 location->jumpAddr() + imageBaseAddr,
1714 ret = proc->baseMap[location];
1721 * Install a single mini-tramp.
1724 void installTramp(instInstance *inst, char *code, int codeSize)
1727 //insnGenerated += codeSize/sizeof(int);
1728 (inst->proc)->writeDataSpace((caddr_t)inst->trampBase, codeSize, code);
1730 if (inst->when == callPreInsn) {
1731 if (inst->baseInstance->prevInstru == false) {
1732 atAddr = inst->baseInstance->baseAddr+inst->baseInstance->skipPreInsOffset;
1733 inst->baseInstance->cost += inst->baseInstance->prevBaseCost;
1734 inst->baseInstance->prevInstru = true;
1735 generateNoOp(inst->proc, atAddr);
1739 if (inst->baseInstance->postInstru == false) {
1740 atAddr = inst->baseInstance->baseAddr+inst->baseInstance->skipPostInsOffset;
1741 inst->baseInstance->cost += inst->baseInstance->postBaseCost;
1742 inst->baseInstance->postInstru = true;
1743 generateNoOp(inst->proc, atAddr);
1749 /**************************************************************
1751 * code generator for x86
1753 **************************************************************/
1758 #define MAX_BRANCH (0x1<<31)
1760 Address getMaxBranch() {
1761 return (Address)MAX_BRANCH;
1765 bool doNotOverflow(int)
1768 // this should be changed by the correct code. If there isn't any case to
1769 // be checked here, then the function should return TRUE. If there isn't
1770 // any immediate code to be generated, then it should return FALSE - naim
1772 // any int value can be an immediate on the pentium
1778 /* build the MOD/RM byte of an instruction */
1779 inline unsigned char makeModRMbyte(unsigned Mod, unsigned Reg, unsigned RM) {
1780 return ((Mod & 0x3) << 6) + ((Reg & 0x7) << 3) + (RM & 0x7);
1784 Emit the ModRM byte and displacement for addressing modes.
1785 base is a register (EAX, ECX, EDX, EBX, EBP, ESI, EDI)
1786 disp is a displacement
1787 reg_opcode is either a register or an opcode
1789 void emitAddressingMode(Register base, RegValue disp, int reg_opcode,
1790 unsigned char *&insn) {
1791 assert(base != ESP);
1792 if (base == Null_Register) {
1793 *insn++ = makeModRMbyte(0, reg_opcode, 5);
1794 *((int *)insn) = disp;
1795 insn += sizeof(int);
1796 } else if (disp == 0 && base != EBP) {
1797 *insn++ = makeModRMbyte(0, reg_opcode, base);
1798 } else if (disp >= -128 && disp <= 127) {
1799 *insn++ = makeModRMbyte(1, reg_opcode, base);
1800 *((char *)insn++) = (char) disp;
1802 *insn++ = makeModRMbyte(2, reg_opcode, base);
1803 *((int *)insn) = disp;
1804 insn += sizeof(int);
1809 /* emit a simple one-byte instruction */
1810 void emitSimpleInsn(unsigned op, unsigned char *&insn) {
1814 // emit a simple register to register instruction: OP dest, src
1815 // opcode is one or two byte
1816 void emitOpRegReg(unsigned opcode, Register dest, Register src,
1817 unsigned char *&insn) {
1821 *insn++ = opcode >> 8;
1822 *insn++ = opcode & 0xFF;
1824 // ModRM byte define the operands: Mod = 3, Reg = dest, RM = src
1825 *insn++ = makeModRMbyte(3, dest, src);
1829 void emitOpRegRM(unsigned opcode, Register dest, Register base, int disp,
1830 unsigned char *&insn) {
1831 if (opcode <= 0xff) {
1834 *insn++ = opcode >> 8;
1835 *insn++ = opcode & 0xff;
1837 emitAddressingMode(base, disp, dest, insn);
1841 void emitOpRMReg(unsigned opcode, Register base, int disp, Register src,
1842 unsigned char *&insn) {
1844 emitAddressingMode(base, disp, src, insn);
1847 // emit OP reg, imm32
1848 void emitOpRegImm(int opcode, Register dest, int imm, unsigned char *&insn) {
1850 *insn++ = makeModRMbyte(3, opcode, dest);
1851 *((int *)insn) = imm;
1856 // emit OP r/m, imm32
1857 void emitOpRMImm(unsigned opcode, Register base, int disp, int imm,
1858 unsigned char *&insn) {
1860 emitAddressingMode(base, disp, opcode, insn);
1861 *((int *)insn) = imm;
1862 insn += sizeof(int);
1866 // emit OP r/m, imm32
1867 void emitOpRMImm(unsigned opcode1, unsigned opcode2,
1868 Register base, int disp, int imm, unsigned char *&insn) {
1870 emitAddressingMode(base, disp, opcode2, insn);
1871 *((int *)insn) = imm;
1872 insn += sizeof(int);
1875 // emit OP r/m, imm8
1876 void emitOpRMImm8(unsigned opcode1, unsigned opcode2,
1877 Register base, int disp, char imm, unsigned char *&insn) {
1879 emitAddressingMode(base, disp, opcode2, insn);
1883 // emit OP reg, r/m, imm32
1884 void emitOpRegRMImm(unsigned opcode, Register dest,
1885 Register base, int disp, int imm, unsigned char *&insn) {
1887 emitAddressingMode(base, disp, dest, insn);
1888 *((int *)insn) = imm;
1889 insn += sizeof(int);
1892 // emit MOV reg, reg
1893 void emitMovRegToReg(Register dest, Register src, unsigned char *&insn) {
1895 *insn++ = makeModRMbyte(3, dest, src);
1898 // emit MOV reg, r/m
1899 void emitMovRMToReg(Register dest, Register base, int disp, unsigned char *&insn) {
1901 emitAddressingMode(base, disp, dest, insn);
1904 // emit MOV r/m, reg
1905 void emitMovRegToRM(Register base, int disp, Register src, unsigned char *&insn) {
1907 emitAddressingMode(base, disp, src, insn);
1911 void emitMovRegToM(int disp, Register src, unsigned char *&insn) {
1913 emitAddressingMode(Null_Register, disp, src, insn);
1917 void emitMovMToReg(Register dest, int disp, unsigned char *&insn) {
1919 emitAddressingMode(Null_Register, disp, dest, insn);
1922 // emit MOV reg, imm32
1923 void emitMovImmToReg(Register dest, int imm, unsigned char *&insn) {
1924 *insn++ = 0xB8 + dest;
1925 *((int *)insn) = imm;
1926 insn += sizeof(int);
1929 // emit MOV r/m32, imm32
1930 void emitMovImmToRM(Register base, int disp, int imm, unsigned char *&insn) {
1932 emitAddressingMode(base, disp, 0, insn);
1933 *((int*)insn) = imm;
1934 insn += sizeof(int);
1937 // emit MOV mem32, imm32
1938 void emitMovImmToMem(Address maddr, int imm, unsigned char *&insn) {
1940 // emit the ModRM byte: we use a 32-bit displacement for the address,
1941 // the ModRM value is 0x05
1943 *((unsigned *)insn) = maddr;
1944 insn += sizeof(unsigned);
1945 *((int*)insn) = imm;
1946 insn += sizeof(int);
1950 // emit Add dword ptr DS:[addr], imm
1951 void emitAddMemImm32(Address addr, int imm, unsigned char *&insn) {
1954 *((unsigned *)insn) = addr;
1955 insn += sizeof(unsigned);
1956 *((int *)insn) = imm;
1957 insn += sizeof(int);
1960 // emit Add reg, imm32
1961 void emitAddRegImm32(Register reg, int imm, unsigned char *&insn) {
1963 *insn++ = makeModRMbyte(3, 0, reg);
1964 *((int *)insn) = imm;
1965 insn += sizeof(int);
1969 void emitJump(unsigned disp32, unsigned char *&insn) {
1970 if ((signed)disp32 >= 0)
1971 assert (disp32 < unsigned(1<<31));
1973 assert ((unsigned)(-(signed)disp32) < unsigned(1<<31));
1975 *((int *)insn) = disp32;
1976 insn += sizeof(int);
1980 void emitCallRel32(unsigned disp32, unsigned char *&insn) {
1982 *((int *)insn) = disp32;
1983 insn += sizeof(int);
1986 // set dest=1 if src1 op src2, otherwise dest = 0
1987 void emitRelOp(unsigned op, Register dest, Register src1, Register src2,
1988 unsigned char *&insn) {
1989 //fprintf(stderr,"Relop dest = %d, src1 = %d, src2 = %d\n", dest, src1, src2);
1990 emitOpRegReg(0x29, ECX, ECX, insn); // clear ECX
1991 emitMovRMToReg(EAX, EBP, -(src1*4), insn); // mov eax, -(src1*4)[ebp]
1992 emitOpRegRM(0x3B, EAX, EBP, -(src2*4), insn); // cmp eax, -(src2*4)[ebp]
1993 unsigned char opcode;
1995 case eqOp: opcode = JNE_R8; break;
1996 case neOp: opcode = JE_R8; break;
1997 case lessOp: opcode = JGE_R8; break;
1998 case leOp: opcode = JG_R8; break;
1999 case greaterOp: opcode = JLE_R8; break;
2000 case geOp: opcode = JL_R8; break;
2003 *insn++ = opcode; *insn++ = 1; // jcc 1
2004 emitSimpleInsn(0x40+ECX, insn); // inc ECX
2005 emitMovRegToRM(EBP, -(dest*4), ECX, insn); // mov -(dest*4)[ebp], ecx
2009 // set dest=1 if src1 op src2imm, otherwise dest = 0
2010 void emitRelOpImm(unsigned op, Register dest, Register src1, int src2imm,
2011 unsigned char *&insn) {
2012 //fprintf(stderr,"Relop dest = %d, src1 = %d, src2 = %d\n", dest, src1, src2);
2013 emitOpRegReg(0x29, ECX, ECX, insn); // clear ECX
2014 emitMovRMToReg(EAX, EBP, -(src1*4), insn); // mov eax, -(src1*4)[ebp]
2015 emitOpRegImm(0x3D, EAX, src2imm, insn); // cmp eax, src2
2016 unsigned char opcode;
2018 case eqOp: opcode = JNE_R8; break;
2019 case neOp: opcode = JE_R8; break;
2020 case lessOp: opcode = JGE_R8; break;
2021 case leOp: opcode = JG_R8; break;
2022 case greaterOp: opcode = JLE_R8; break;
2023 case geOp: opcode = JL_R8; break;
2026 *insn++ = opcode; *insn++ = 1; // jcc 1
2027 emitSimpleInsn(0x40+ECX, insn); // inc ECX
2028 emitMovRegToRM(EBP, -(dest*4), ECX, insn); // mov -(dest*4)[ebp], ecx
2032 void emitEnter(short imm16, unsigned char *&insn) {
2034 *((short*)insn) = imm16;
2035 insn += sizeof(short);
2041 Register emitFuncCall(opCode op,
2043 char *ibuf, Address &base,
2044 const vector<AstNode *> &operands,
2045 const string &callee, process *proc,
2046 bool noCost, const function_base *calleefunc)
2048 assert(op == callOp);
2051 vector <Register> srcs;
2054 addr = calleefunc->getEffectiveAddress(proc);
2056 addr = proc->findInternalAddress(callee, false, err);
2058 function_base *func = proc->findOneFunction(callee);
2060 ostrstream os(errorLine, 1024, ios::out);
2061 os << "Internal error: unable to find addr of " << callee << endl;
2063 showErrorCallback(80, (const char *) errorLine);
2066 addr = func->getEffectiveAddress(proc);
2069 for (unsigned u = 0; u < operands.size(); u++)
2070 srcs.push_back((Register)operands[u]->generateCode(proc, rs, ibuf, base, noCost, false));
2072 unsigned char *insn = (unsigned char *) ((void*)&ibuf[base]);
2073 unsigned char *first = insn;
2075 // push arguments in reverse order, last argument first
2076 // must use int instead of unsigned to avoid nasty underflow problem:
2077 for (int i=srcs.size() - 1 ; i >= 0; i--) {
2078 emitOpRMReg(PUSH_RM_OPC1, EBP, -(srcs[i]*4), PUSH_RM_OPC2, insn);
2079 rs->freeRegister(srcs[i]);
2083 // we are using an indirect call here because we don't know the
2084 // address of this instruction, so we can't use a relative call.
2085 // TODO: change this to use a direct call
2086 emitMovImmToReg(EAX, addr, insn); // mov eax, addr
2087 emitOpRegReg(CALL_RM_OPC1, CALL_RM_OPC2, EAX, insn); // call *(eax)
2089 // reset the stack pointer
2090 if (srcs.size() > 0)
2091 emitOpRegImm(0, ESP, srcs.size()*4, insn); // add esp, srcs.size()*4
2093 // allocate a (virtual) register to store the return value
2094 Register ret = rs->allocateRegister((char *)insn, base, noCost);
2095 emitMovRegToRM(EBP, -(ret*4), EAX, insn);
2097 base += insn - first;
2104 * emit code for op(src1,src2, dest)
2105 * ibuf is an instruction buffer where instructions are generated
2106 * base is the next free position on ibuf where code is to be generated
2109 Address emitA(opCode op, Register src1, Register /*src2*/, Register dest,
2110 char *ibuf, Address &base, bool /*noCost*/)
2112 //fprintf(stderr,"emitA(op=%d,src1=%d,src2=XX,dest=%d)\n",op,src1,dest);
2114 unsigned char *insn = (unsigned char *) (&ibuf[base]);
2115 unsigned char *first = insn;
2119 // if src1 == 0 jump to dest
2120 // src1 is a temporary
2121 // dest is a target address
2122 emitOpRegReg(0x29, EAX, EAX, insn); // sub EAX, EAX ; clear EAX
2123 emitOpRegRM(0x3B, EAX, EBP, -(src1*4), insn); // cmp -(src1*4)[EBP], EAX
2127 *((int *)insn) = dest;
2128 insn += sizeof(int);
2133 emitJump(dest - JUMP_REL32_SZ, insn);
2134 base += JUMP_REL32_SZ;
2135 return(base - JUMP_REL32_SZ);
2137 case trampTrailer: {
2138 // generate the template for a jump -- actual jump is generated elsewhere
2139 emitJump(0, insn); // jump xxxx
2140 // return the offset of the previous jump
2141 base += insn - first;
2142 return(base - JUMP_REL32_SZ);
2144 case trampPreamble: {
2145 base += insn - first;
2146 return(0); // let's hope this is expected!
2149 abort(); // unexpected op for this emit!
2151 return(0); // should never reach here!
2154 Register emitR(opCode op, Register src1, Register /*src2*/, Register dest,
2155 char *ibuf, Address &base, bool /*noCost*/)
2157 //fprintf(stderr,"emitR(op=%d,src1=%d,src2=XX,dest=%d)\n",op,src1,dest);
2159 unsigned char *insn = (unsigned char *) (&ibuf[base]);
2160 unsigned char *first = insn;
2164 // dest is a register where we can store the value
2165 // the return value is in the saved EAX
2166 emitMovRMToReg(EAX, EBP, SAVED_EAX_OFFSET, insn);
2167 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2168 base += insn - first;
2172 // src1 is the number of the argument
2173 // dest is a register where we can store the value
2174 // Parameters are addressed by a positive offset from ebp,
2175 // the first is PARAM_OFFSET[ebp]
2176 emitMovRMToReg(EAX, EBP, PARAM_OFFSET + src1*4, insn);
2177 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2178 base += insn - first;
2182 abort(); // unexpected op for this emit!
2184 return(Null_Register); // should never be reached!
2187 void emitVload(opCode op, Address src1, Register /*src2*/, Register dest,
2188 char *ibuf, Address &base, bool /*noCost*/, int /* size */)
2190 unsigned char *insn = (unsigned char *) (&ibuf[base]);
2191 unsigned char *first = insn;
2193 if (op == loadConstOp) {
2194 // dest is a temporary
2195 // src1 is an immediate value
2196 // dest = src1:imm32
2197 emitMovImmToRM(EBP, -(dest*4), src1, insn);
2198 base += insn - first;
2200 } else if (op == loadOp) {
2201 // dest is a temporary
2202 // src1 is the address of the operand
2204 emitMovMToReg(EAX, src1, insn); // mov eax, src1
2205 emitMovRegToRM(EBP, -(dest*4), EAX, insn); // mov -(dest*4)[ebp], eax
2206 base += insn - first;
2208 } else if (op == loadFrameRelativeOp) {
2209 // dest is a temporary
2210 // src1 is the offset of the from the frame of the variable
2211 // eax = [eax] - saved sp
2212 // dest = [eax](src1)
2213 emitMovRMToReg(EAX, EBP, 0, insn); // mov (%ebp), %eax
2214 emitMovRMToReg(EAX, EAX, src1, insn); // mov <offset>(%eax), %eax
2215 emitMovRegToRM(EBP, -(dest*4), EAX, insn); // mov -(dest*4)[ebp], eax
2216 base += insn - first;
2218 } else if (op == loadFrameAddr) {
2219 emitMovRMToReg(EAX, EBP, 0, insn); // mov (%ebp), %eax
2220 emitAddRegImm32(EAX, src1, insn); // add #<offset>, %eax
2221 emitMovRegToRM(EBP, -(dest*4), EAX, insn); // mov -(dest*4)[ebp], eax
2222 base += insn - first;
2225 abort(); // unexpected op for this emit!
2229 void emitVstore(opCode op, Register src1, Register src2, Address dest,
2230 char *ibuf, Address &base, bool /*noCost*/, int /* size */)
2232 unsigned char *insn = (unsigned char *) (&ibuf[base]);
2233 unsigned char *first = insn;
2235 if (op == storeOp) {
2237 // dest has the address where src1 is to be stored
2238 // src1 is a temporary
2239 // src2 is a "scratch" register, we don't need it in this architecture
2240 emitMovRMToReg(EAX, EBP, -(src1*4), insn); // mov eax, -(src1*4)[ebp]
2241 emitMovRegToM(dest, EAX, insn); // mov dest, eax
2242 base += insn - first;
2244 } else if (op == storeFrameRelativeOp) {
2245 // src1 is a temporary
2246 // src2 is a "scratch" register, we don't need it in this architecture
2247 // dest is the frame offset
2249 // src2 = [ebp] - saved sp
2250 // (dest)[src2] = src1
2251 emitMovRMToReg(src2, EBP, 0, insn); // mov src2, (ebp)
2252 emitMovRMToReg(EAX, EBP, -(src1*4), insn); // mov eax, -(src1*4)[ebp]
2253 emitMovRegToRM(src2, dest, EAX, insn); // mov (dest)[src2], eax
2254 base += insn - first;
2257 abort(); // unexpected op for this emit!
2261 void emitVupdate(opCode op, RegValue src1, Register /*src2*/, Address dest,
2262 char *ibuf, Address &base, bool noCost)
2264 unsigned char *insn = (unsigned char *) (&ibuf[base]);
2265 unsigned char *first = insn;
2267 if (op == updateCostOp) {
2268 // src1 is the cost value
2270 // dest is the address of observed cost
2273 // update observed cost
2274 // dest = address of DYNINSTobsCostLow
2276 emitAddMemImm32(dest, src1, insn); // ADD (dest), src1
2279 return; //return base; // never seem to ever need this
2281 abort(); // unexpected op for this emit!
2285 void emitV(opCode op, Register src1, Register src2, Register dest,
2286 char *ibuf, Address &base, bool /*noCost*/, int /* size */)
2288 //fprintf(stderr,"emitV(op=%d,src1=%d,src2=%d,dest=%d)\n",op,src1,src2,dest);
2290 assert ((op!=branchOp) && (op!=ifOp) &&
2291 (op!=trampTrailer) && (op!=trampPreamble)); // !emitA
2292 assert ((op!=getRetValOp) && (op!=getParamOp)); // !emitR
2293 assert ((op!=loadOp) && (op!=loadConstOp)); // !emitVload
2294 assert ((op!=storeOp)); // !emitVstore
2295 assert ((op!=updateCostOp)); // !emitVupdate
2297 unsigned char *insn = (unsigned char *) (&ibuf[base]);
2298 unsigned char *first = insn;
2300 if (op == loadIndirOp) {
2301 // same as loadOp, but the value to load is already in a register
2302 emitMovRMToReg(EAX, EBP, -(src1*4), insn); // mov eax, -(src1*4)[ebp]
2303 emitMovRMToReg(EAX, EAX, 0, insn); // mov eax, [eax]
2304 emitMovRegToRM(EBP, -(dest*4), EAX, insn); // mov -(dest*4)[ebp], eax
2307 else if (op == storeIndirOp) {
2308 // same as storeOp, but the address where to store is already in a
2310 emitMovRMToReg(EAX, EBP, -(src1*4), insn); // mov eax, -(src1*4)[ebp]
2311 emitMovRMToReg(ECX, EBP, -(dest*4), insn); // mov ecx, -(dest*4)[ebp]
2312 emitMovRegToRM(ECX, 0, EAX, insn); // mov [ecx], eax
2314 } else if (op == noOp) {
2315 emitSimpleInsn(NOP, insn); // nop
2317 } else if (op == saveRegOp) {
2318 // should not be used on this platform
2322 unsigned opcode = 0;//initialize to placate gcc warnings
2326 // dest = src1 + src2
2330 opcode = 0x03; // ADD
2334 opcode = 0x2B; // SUB
2338 opcode = 0x0FAF; // IMUL
2342 // dest = src1 div src2
2344 // cdq ; edx = sign extend of eax
2345 // idiv eax, src2 ; eax = edx:eax div src2, edx = edx:eax mod src2
2347 emitMovRMToReg(EAX, EBP, -(src1*4), insn);
2348 emitSimpleInsn(0x99, insn);
2349 emitOpRegRM(0xF7, 0x7 /*opcode extension*/, EBP, -(src2*4), insn);
2350 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2357 opcode = 0x0B; // OR
2361 opcode = 0x23; // AND
2365 // dest = src1 relop src2
2372 emitRelOp(op, dest, src1, src2, insn);
2382 emitMovRMToReg(EAX, EBP, -(src1*4), insn);
2383 emitOpRegRM(opcode, EAX, EBP, -(src2*4), insn);
2384 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2386 base += insn - first;
2391 void emitImm(opCode op, Register src1, RegValue src2imm, Register dest,
2392 char *ibuf, Address &base, bool)
2394 unsigned char *insn = (unsigned char *) (&ibuf[base]);
2395 unsigned char *first = insn;
2397 if (op == storeOp) {
2399 // dest has the address where src1 is to be stored
2400 // src1 is an immediate value
2401 // src2 is a "scratch" register, we don't need it in this architecture
2402 emitMovImmToReg(EAX, dest, insn);
2403 emitMovImmToRM(EAX, 0, src1, insn);
2411 opcode2 = 0x0; // ADD
2416 opcode2 = 0x5; // SUB
2421 if (isPowerOf2(src2imm, result) && result <= MAX_IMM8) {
2423 emitMovRMToReg(EAX, EBP, -(src1*4), insn);
2424 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2427 emitOpRMImm8(0xC1, 4, EBP, -(dest*4), result, insn);
2430 // imul EAX, -(src1*4)[ebp], src2imm
2431 emitOpRegRMImm(0x69, EAX, EBP, -(src1*4), src2imm, insn);
2432 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2441 if (isPowerOf2(src2imm, result) && result <= MAX_IMM8) {
2443 emitMovRMToReg(EAX, EBP, -(src1*4), insn);
2444 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2447 emitOpRMImm8(0xC1, 7, EBP, -(dest*4), result, insn);
2450 // dest = src1 div src2imm
2452 // cdq ; edx = sign extend of eax
2454 // idiv eax, ebx ; eax = edx:eax div src2, edx = edx:eax mod src2
2456 emitMovRMToReg(EAX, EBP, -(src1*4), insn);
2457 emitSimpleInsn(0x99, insn);
2458 emitMovImmToReg(EBX, src2imm, insn);
2460 emitOpRegReg(0xF7, 0x7 /*opcode extension*/, EBX, insn);
2461 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2471 opcode2 = 0x1; // OR
2476 opcode2 = 0x4; // AND
2480 // dest = src1 relop src2
2487 emitRelOpImm(op, dest, src1, src2imm, insn);
2497 emitMovRMToReg(EAX, EBP, -(src1*4), insn);
2498 emitMovRegToRM(EBP, -(dest*4), EAX, insn);
2500 emitOpRMImm(opcode1, opcode2, EBP, -(dest*4), src2imm, insn);
2502 base += insn - first;
2508 int getInsnCost(opCode op)
2510 if (op == loadConstOp) {
2512 } else if (op == loadOp) {
2514 } else if (op == loadIndirOp) {
2516 } else if (op == storeOp) {
2518 } else if (op == storeIndirOp) {
2520 } else if (op == ifOp) {
2522 } else if (op == whileOp) {
2523 return(1+2+1+1); /* Need to find out about this */
2524 } else if (op == branchOp) {
2525 return(1); /* XXX Need to find out what value this should be. */
2526 } else if (op == callOp) {
2527 // cost of call only
2529 } else if (op == updateCostOp) {
2531 } else if (op == trampPreamble) {
2533 } else if (op == trampTrailer) {
2535 } else if (op == noOp) {
2537 } else if (op == getRetValOp) {
2539 } else if (op == getParamOp) {
2550 return(1+1+2+1+1+1);
2562 return(0); // doesn't add anything to operand
2573 bool process::heapIsOk(const vector<sym_data> &find_us) {
2578 // find the main function
2579 // first look for main or _main
2580 #if !defined(i386_unknown_nt4_0)
2581 if (!((mainFunction = findOneFunction("main"))
2582 || (mainFunction = findOneFunction("_main")))) {
2583 string msg = "Cannot find main. Exiting.";
2584 statusLine(msg.string_of());
2585 showErrorCallback(50, msg);
2589 if (!((mainFunction = findOneFunction("main"))
2590 || (mainFunction = findOneFunction("_main"))
2591 || (mainFunction = findOneFunction("WinMain"))
2592 || (mainFunction = findOneFunction("_WinMain")))) {
2593 string msg = "Cannot find main or WinMain. Exiting.";
2594 statusLine(msg.string_of());
2595 showErrorCallback(50, msg);
2600 for (unsigned i=0; i<find_us.size(); i++) {
2601 str = find_us[i].name;
2602 if (!getSymbolInfo(str, sym, baseAddr)) {
2603 string str1 = string("_") + str.string_of();
2604 if (!getSymbolInfo(str1, sym, baseAddr) && find_us[i].must_find) {
2606 msg = string("Cannot find ") + str + string(". Exiting");
2607 statusLine(msg.string_of());
2608 showErrorCallback(50, msg);
2614 // string ghb = GLOBAL_HEAP_BASE;
2615 // if (!getSymbolInfo(ghb, sym, baseAddr)) {
2616 // ghb = U_GLOBAL_HEAP_BASE;
2617 // if (!getSymbolInfo(ghb, symm baseAddr)) {
2619 // msg = string("Cannot find ") + str + string(". Exiting");
2620 // statusLine(msg.string_of());
2621 // showErrorCallback(50, msg);
2625 // Address instHeapEnd = sym.addr()+baseAddr;
2626 // addInternalSymbol(ghb, instHeapEnd);
2628 #if !defined(i386_unknown_nt4_0)
2629 string tt = "DYNINSTtrampTable";
2630 if (!getSymbolInfo(tt, sym, baseAddr)) {
2632 msg = string("Cannot find ") + tt + string(". Cannot use this application");
2633 statusLine(msg.string_of());
2634 showErrorCallback(50, msg);
2644 dictionary_hash<string, unsigned> funcFrequencyTable(string::hash);
2647 // initDefaultPointFrequencyTable - define the expected call frequency of
2648 // procedures. Currently we just define several one shots with a
2649 // frequency of one, and provide a hook to read a file with more accurate
2652 void initDefaultPointFrequencyTable()
2658 funcFrequencyTable["main"] = 1;
2659 funcFrequencyTable["DYNINSTsampleValues"] = 1;
2660 funcFrequencyTable[EXIT_NAME] = 1;
2662 // try to read file.
2663 fp = fopen("freq.input", "r");
2667 printf("found freq.input file\n");
2670 fscanf(fp, "%s %f\n", name, &value);
2671 funcFrequencyTable[name] = (int) value;
2672 printf("adding %s %f\n", name, value);
2678 * Get an estimate of the frequency for the passed instPoint.
2679 * This is not (always) the same as the function that contains the point.
2681 * The function is selected as follows:
2683 * If the point is an entry or an exit return the function name.
2684 * If the point is a call and the callee can be determined, return the called
2686 * else return the funcation containing the point.
2688 * WARNING: This code contains arbitrary values for func frequency (both user
2689 * and system). This should be refined over time.
2691 * Using 1000 calls sec to be one SD from the mean for most FPSPEC apps.
2695 float getPointFrequency(instPoint *point)
2698 pd_Function *func = point->callee();
2701 func = point->func();
2703 if (!funcFrequencyTable.defines(func->prettyName())) {
2704 // Changing this value from 250 to 100 because predictedCost was
2705 // too high - naim 07/18/96
2708 return ((float)funcFrequencyTable[func->prettyName()]);
2713 // return cost in cycles of executing at this point. This is the cost
2714 // of the base tramp if it is the first at this point or 0 otherwise.
2716 int getPointCost(process *proc, const instPoint *point)
2718 if (proc->baseMap.defines(point)) {
2721 if (point->usesTrap(proc))
2722 return 9000; // estimated number of cycles for traps
2730 bool returnInstance::checkReturnInstance(const vector<Address> &stack, u_int &index) {
2733 // If false (unsafe) is returned, then 'index' is set to the first unsafe call stack
2735 for (u_int i=0; i < stack.size(); i++) {
2738 if (stack[i] >= addr_ && stack[i] < addr_+size_)
2745 void returnInstance::installReturnInstance(process *proc) {
2746 assert(instructionSeq);
2747 proc->writeTextSpace((void *)addr_, instSeqSize, instructionSeq->ptr());
2748 delete instructionSeq;
2753 void returnInstance::addToReturnWaitingList(Address , process *) {
2758 void generateBreakPoint(instruction &insn) {
2759 insn = instruction ((const unsigned char*)"\017\013", ILLEGAL, 2);
2762 void instWaitingList::cleanUp(process *, Address ) {
2765 proc->writeTextSpace((caddr_t)pc, relocatedInstruction.size(),
2766 (caddr_t&)(relocatedInstruction.ptr()));
2767 proc->writeTextSpace((caddr_t)addr_, instSeqSize,
2768 (caddr_t)instructionSeq);
2772 /* ***************************************************** */
2774 bool process::emitInferiorRPCheader(void *void_insnPtr, Address &baseBytes) {
2775 unsigned char *insnPtr = (unsigned char *)void_insnPtr;
2776 unsigned char *origInsnPtr = insnPtr;
2777 insnPtr += baseBytes;
2779 // We emit the following here (to set up a fresh stack frame):
2780 // pushl %ebp (0x55)
2781 // movl %esp, %ebp (0x89 0xe5)
2785 emitSimpleInsn(PUSH_EBP, insnPtr);
2786 emitMovRegToReg(EBP, ESP, insnPtr);
2787 // allocate space for temporaries (virtual registers)
2788 emitOpRegImm(5, ESP, 128, insnPtr); // sub esp, 128
2789 emitSimpleInsn(PUSHAD, insnPtr);
2790 emitSimpleInsn(PUSHFD, insnPtr);
2792 baseBytes += (insnPtr - origInsnPtr);
2797 bool process::emitInferiorRPCtrailer(void *void_insnPtr, Address &baseBytes,
2798 unsigned &breakOffset,
2799 bool shouldStopForResult,
2800 unsigned &stopForResultOffset,
2801 unsigned &justAfter_stopForResultOffset) {
2802 unsigned char *insnPtr = (unsigned char *)void_insnPtr;
2803 // unsigned char * is the most natural to work with on x86, since instructions
2804 // are always an integral # of bytes. Besides, it makes the following line easy:
2805 insnPtr += baseBytes; // start off in the right spot
2807 if (shouldStopForResult) {
2808 // illegal insn: 0x0f0b does the trick.
2809 stopForResultOffset = baseBytes;
2814 justAfter_stopForResultOffset = baseBytes;
2817 // Sequence: popfd, popad, leave (0xc9), call DYNINSTbreakPoint(), illegal
2819 emitSimpleInsn(POPFD, insnPtr); // popfd
2820 emitSimpleInsn(POPAD, insnPtr); // popad
2821 emitSimpleInsn(LEAVE, insnPtr); // leave
2822 baseBytes += 3; // all simple insns are 1 byte
2824 // We can't do a SIGTRAP since SIGTRAP is reserved in x86.
2825 // So we do a SIGILL instead.
2826 breakOffset = baseBytes;
2831 // Here, we should generate an illegal insn, or something.
2832 // A two-byte insn, 0x0f0b, should do the trick. The idea is that
2833 // the code should never be executed.
2841 // process::replaceFunctionCall
2843 // Replace the function call at the given instrumentation point with a call to
2844 // a different function, or with a NOOP. In order to replace the call with a
2845 // NOOP, pass NULL as the parameter "func."
2846 // Returns true if sucessful, false if not. Fails if the site is not a call
2847 // site, or if the site has already been instrumented using a base tramp.
2849 // Note that right now we can only replace a call instruction that is five
2850 // bytes long (like a call to a 32-bit relative address).
2851 bool process::replaceFunctionCall(const instPoint *point,
2852 const function_base *func) {
2853 // Must be a call site
2854 if (!point->insnAtPoint().isCall())
2857 // Cannot already be instrumented with a base tramp
2858 if (baseMap.defines(point))
2862 if (func == NULL) { // Replace with NOOPs
2863 unsigned char *newInsn = new unsigned char[point->insnAtPoint().size()];
2864 unsigned char *p = newInsn;
2865 for (unsigned i = 0; i < point->insnAtPoint().size(); i++)
2866 emitSimpleInsn(NOP, p);
2867 writeTextSpace((void *)point->iPgetAddress(),
2868 point->insnAtPoint().size(), newInsn);
2869 } else { // Replace with a call to a different function
2870 // XXX Right only, call has to be 5 bytes -- sometime, we should make
2871 // it work for other calls as well.
2872 assert(point->insnAtPoint().size() == CALL_REL32_SZ);
2873 unsigned char *newInsn = new unsigned char[CALL_REL32_SZ];
2874 unsigned char *p = newInsn;
2875 emitCallRel32(func->addr() - (point->iPgetAddress()+CALL_REL32_SZ), p);
2876 writeTextSpace((void *)point->iPgetAddress(), CALL_REL32_SZ, newInsn);
2882 // Emit code to jump to function CALLEE without linking. (I.e., when
2883 // CALLEE returns, it returns to the current caller.)
2884 void emitFuncJump(opCode /*op*/,
2885 char * /*i*/, Address & /*base*/,
2886 const function_base * /*callee*/, process * /*proc*/)
2888 /* Unimplemented on this platform! */
2892 void emitLoadPreviousStackFrameRegister(Address register_num,
2898 //Previous stack frame register is stored on the stack,
2899 //it was stored there at the begining of the base tramp.
2901 //Calculate the register's offset from the frame pointer in EBP
2902 unsigned offset = SAVED_EAX_OFFSET - (register_num * 4);
2904 unsigned char *in = (unsigned char *) (&insn[base]);
2905 unsigned char *first = in;
2907 emitMovRMToReg(EAX, EBP, offset, in); //mov eax, offset[ebp]
2908 emitMovRegToRM(EBP, -(dest*4), EAX, in); //mov dest, 0[eax]
2913 #ifndef BPATCH_LIBRARY
2914 bool process::isDynamicCallSite(instPoint *callSite){
2915 function_base *temp;
2916 if(!findCallee(*(callSite),temp)){
2922 bool process::MonitorCallSite(instPoint *callSite){
2923 Register base_reg, index_reg;
2930 instruction i = callSite->insnAtPoint();
2931 vector<AstNode *> the_args(2);
2932 if(i.isCallIndir()){
2933 addr_mode = get_instruction_operand(i.ptr(), base_reg, index_reg,
2934 displacement, scale, Mod);
2937 case REGISTER_DIRECT:
2938 the_args[0] = new AstNode(AstNode::PreviousStackFrameDataReg,
2940 the_args[1] = new AstNode(AstNode::Constant,
2941 (void *) callSite->iPgetAddress());
2942 func = new AstNode("DYNINSTRegisterCallee", the_args);
2943 addInstFunc(this, callSite, func, callPreInsn,
2944 orderFirstAtPoint, true,false);
2946 case REGISTER_INDIRECT:
2948 AstNode *prevReg = new AstNode(AstNode::PreviousStackFrameDataReg,
2950 the_args[0] = new AstNode(AstNode::DataIndir, prevReg);
2951 the_args[1] = new AstNode(AstNode::Constant,
2952 (void *) callSite->iPgetAddress());
2953 func = new AstNode("DYNINSTRegisterCallee", the_args);
2954 addInstFunc(this, callSite, func, callPreInsn,
2955 orderFirstAtPoint, true,false);
2958 case REGISTER_INDIRECT_DISPLACED:
2960 AstNode *prevReg = new AstNode(AstNode::PreviousStackFrameDataReg,
2962 AstNode *derefPrevReg = new AstNode(AstNode::DataIndir, prevReg );
2964 AstNode *offset = new AstNode(AstNode::Constant,
2965 (void *) displacement);
2966 AstNode *sum = new AstNode(plusOp, derefPrevReg, offset);
2968 the_args[0] = new AstNode(AstNode::DataIndir, sum);
2969 the_args[1] = new AstNode(AstNode::Constant,
2970 (void *) callSite->iPgetAddress());
2971 func = new AstNode("DYNINSTRegisterCallee", the_args);
2972 addInstFunc(this, callSite, func, callPreInsn,
2973 orderFirstAtPoint, true,false);
2978 AstNode *offset = new AstNode(AstNode::Constant,
2979 (void *) displacement);
2980 the_args[0] = new AstNode(AstNode::DataIndir, offset);
2981 the_args[1] = new AstNode(AstNode::Constant,
2982 (void *) callSite->iPgetAddress());
2983 func = new AstNode("DYNINSTRegisterCallee", the_args);
2984 addInstFunc(this, callSite, func, callPreInsn,
2985 orderFirstAtPoint, true, false);
2990 AstNode *effective_address;
2991 if(index_reg != 4) { //We use a scaled index
2992 bool useBaseReg = true;
2993 if(Mod == 0 && base_reg == 5){
2994 cerr << "Inserting untested call site monitoring "
2995 "instrumentation at address " << hex <<
2996 callSite->iPgetAddress() << dec << endl;
3000 AstNode *index = new AstNode(AstNode::PreviousStackFrameDataReg,
3001 (void *) index_reg);
3002 AstNode *base = new AstNode(AstNode::PreviousStackFrameDataReg,
3005 AstNode *disp = new AstNode(AstNode::Constant,
3006 (void *) displacement);
3008 if(scale == 1){ //No need to do the multiplication
3010 AstNode *base_index_sum = new AstNode(plusOp, index, base);
3011 effective_address = new AstNode(plusOp, base_index_sum,
3015 effective_address = new AstNode(plusOp, index, disp);
3017 the_args[0] = new AstNode(AstNode::DataIndir, effective_address);
3019 the_args[1] = new AstNode(AstNode::Constant,
3020 (void *) callSite->iPgetAddress());
3021 func = new AstNode("DYNINSTRegisterCallee", the_args);
3022 addInstFunc(this, callSite, func, callPreInsn,
3023 orderFirstAtPoint, true, false);
3026 AstNode *scale_factor
3027 = new AstNode(AstNode::Constant, (void *) scale);
3028 AstNode *index_scale_product = new AstNode(timesOp, index,
3031 AstNode *base_index_sum = new AstNode(plusOp,
3032 index_scale_product,
3034 effective_address = new AstNode(plusOp, base_index_sum,
3038 effective_address = new AstNode(plusOp,
3039 index_scale_product,
3041 the_args[0] = new AstNode(AstNode::DataIndir, effective_address);
3043 the_args[1] = new AstNode(AstNode::Constant,
3044 (void *) callSite->iPgetAddress());
3045 func = new AstNode("DYNINSTRegisterCallee", the_args);
3046 addInstFunc(this, callSite, func, callPreInsn,
3047 orderFirstAtPoint, true,false);
3050 else { //We do not use a scaled index.
3051 cerr << "Inserting untested call site monitoring "
3052 "instrumentation at address " << hex <<
3053 callSite->iPgetAddress() << dec << endl;
3054 AstNode *base = new AstNode(AstNode::PreviousStackFrameDataReg,
3056 AstNode *disp = new AstNode(AstNode::Constant,
3057 (void *) displacement);
3058 AstNode *effective_address = new AstNode(plusOp, base,
3060 the_args[0] = new AstNode(AstNode::DataIndir, effective_address);
3062 the_args[1] = new AstNode(AstNode::Constant,
3063 (void *) callSite->iPgetAddress());
3064 func = new AstNode("DYNINSTRegisterCallee", the_args);
3065 addInstFunc(this, callSite, func, callPreInsn,
3066 orderFirstAtPoint, true, false);
3072 cerr << "Unexpected addressing type in MonitorCallSite at addr:"
3073 << hex << callSite->iPgetAddress() << dec
3074 << "The daemon declines the monitoring request of this call site."
3079 else if(i.isCall()){
3080 //Regular callees are statically determinable, so no need to
3085 cerr << "Unexpected instruction in MonitorCallSite()!!!\n";
3091 #if (defined(i386_unknown_solaris2_5) || defined(i386_unknown_linux2_0))
3092 #include <sys/signal.h>
3093 //#include <sys/ucontext.h>
3096 BaseTrampTrapHandler (int)//, siginfo_t*, ucontext_t*)
3098 cout << "In BaseTrampTrapHandler()" << endl;
3099 // unset trap handler, so that DYNINSTtrapHandler can take place
3100 if (sigaction(SIGTRAP, NULL, NULL) != 0) {
3101 perror("sigaction(SIGTRAP)");
3109 #ifdef BPATCH_LIBRARY
3111 * createInstructionInstPoint
3113 * Create a BPatch_point instrumentation point at the given address, which
3114 * is guaranteed not be one of the "standard" inst points.
3116 * proc The process in which to create the inst point.
3117 * address The address for which to create the point.
3119 BPatch_point *createInstructionInstPoint(process* /* proc */, void *address)
3121 BPatch_reportError(BPatchSerious, 109,
3122 "BPatch_image::createInstPointAtAddr unimplemented on this platform");
3127 * BPatch_point::getDisplacedInstructions
3129 * Returns the instructions to be relocated when instrumentation is inserted
3130 * at this point. Returns the number of bytes taken up by these instructions.
3132 * maxSize The maximum number of bytes of instructions to return.
3133 * insns A pointer to a buffer in which to return the instructions.
3136 int BPatch_point::getDisplacedInstructions(int maxSize, void* insns)
3140 unsigned int count = 0;
3142 if (point->insnsBefore()) {
3143 for (unsigned int i=0; i < point->insnsBefore(); i++) {
3144 code = const_cast<unsigned char *>(point->insnBeforePt(i).ptr());
3145 memcpy(©Out[count], code, point->insnBeforePt(i).size());
3146 count += point->insnBeforePt(i).size();
3147 assert(count < sizeof(copyOut));
3151 code = const_cast<unsigned char *>(point->insnAtPoint().ptr());
3152 memcpy(©Out[count], code, point->insnAtPoint().size());
3153 count += point->insnAtPoint().size();
3154 assert(count < sizeof(copyOut));
3156 if (point->insnsAfter()) {
3157 for (unsigned int i=0; i < point->insnsAfter(); i++) {
3158 code = const_cast<unsigned char *>(point->insnAfterPt(i).ptr());
3159 memcpy(©Out[count], code, point->insnAfterPt(i).size());
3160 count += point->insnAfterPt(i).size();
3161 assert(count < sizeof(copyOut));
3165 if (count <= (unsigned) maxSize) {
3166 memcpy(insns, copyOut, count);
3175 /****************************************************************************/
3176 /****************************************************************************/
3178 /* pd_Function Code for function relocation */
3180 // Check if an instruction is a relative addressed jump instruction
3182 bool pd_Function::isNearBranchInsn(const instruction insn) {
3183 if (insn.isJumpDir())
3188 // Check if an instruction is a relative addressed call instruction
3190 bool pd_Function::isTrueCallInsn(const instruction insn) {
3191 if (insn.isCall() && !insn.isCallIndir())
3196 /****************************************************************************/
3197 /****************************************************************************/
3201 // Create a buffer of x86 instructon objects. These x86 instructions will
3202 // contain pointers to the machine code
3204 bool pd_Function::loadCode(const image* /* owner */, process *proc,
3205 instruction *&oldCode,
3206 unsigned &numberOfInstructions,
3207 Address &firstAddress) {
3209 vector<instruction> insnVec;
3210 unsigned type, insnSize, totalSize = 0;
3211 instruction *insn = 0;
3213 #ifdef DEBUG_FUNC_RELOC
3214 cerr << "pd_Function::loadCode" << endl;
3217 // copy function to be relocated from application into OLD_CODE
3218 proc->readDataSpace((caddr_t)firstAddress, size(), OLD_CODE, true);
3220 // first address of function
3221 unsigned char *p = OLD_CODE;
3223 // last address of function
3224 unsigned end_of_function = (unsigned)(p + size());
3226 // iterate over all instructions in function
3227 while ( (unsigned) p < end_of_function ) {
3229 // new instruction object
3230 insnSize = get_instruction(p, type);
3231 insn = new instruction(p, type, insnSize);
3232 insnVec.push_back(*insn);
3235 // check for the following instruction sequence:
3237 // call (0) (PC relative address, where the target of call (0)
3238 // is the next instruction
3239 // pop %ebx (pops return address of call instruction off of the
3240 // stack and places it in the ebx reg. The value in
3241 // ebx becomes the address of the pop instruction
3243 // This sequence is used to get the address of the currently
3244 // executing instruction. Presently we don't relocate a function
3245 // with this sequence of instructions
3247 // A call instruction whose target is the next instruction, is generally
3248 // used to obtain the address of the next instruction.
3249 // Presently we don't relocate a functions with such calls
3250 if ( isTrueCallInsn((const instruction)(*insn)) &&
3251 get_disp(insn) == 0 && *(p + insnSize) == 0x5b ) {
3253 relocatable_ = false;
3260 // update p so it points to the next machine code instruction
3262 totalSize += insnSize;
3267 // Occasionally a function's size is not calculated correctly for
3268 // pd_Function. In such cases, the last few bytes in the function
3269 // are "garbage", and the parsing done in the above while loop
3270 // interprets those bytes as an instruction. If the last byte of that
3271 // "garbage" instruction is outside the bounds of the function,
3272 // the sum of the insnSizes of the insn's that were parsed above will be
3273 // greater than the size of the function being relocating. To keep the
3274 // sum of the insnSizes equal to the size of the pd_Function, we replace
3275 // the "garbage" bytes with nop instructions, and ignore drop those bytes
3276 // that are outside of the function.
3278 // # bytes of "garbage" at the end of the function
3279 int garbage = (unsigned)p - end_of_function;
3281 // if "garbage" bytes are found
3284 // create a nop machine instruction
3285 unsigned char *nop = new unsigned char;
3286 emitSimpleInsn(0x90, nop);
3289 // create an x86 instruction for a nop
3290 insn = new instruction(nop, 0, 1);
3292 // replace "garbage" x86 instruction with a nop instruction
3293 insnVec[insnVec.size() - 1] = *insn;
3295 // replace all "garbage" bytes up to the end of the function with nops
3296 for (int i = 0; i < garbage; i++) {
3297 insnVec.push_back(*insn);
3301 // buffer of x86 instructions
3302 oldCode = new instruction[insnVec.size()];
3304 // if unable to allocate array, dump warn and return false....
3305 if (oldCode == NULL) {
3306 cerr << "WARN : unable to allocate array (" << insnVec.size() << " bytes) to read in " \
3307 << "instructions for function" << prettyName().string_of() << " unable to instrument" \
3312 // copy vector of instructions into buffer of instructions
3313 for (unsigned i = 0; i < insnVec.size(); i++) {
3314 oldCode[i] = insnVec[i];
3316 numberOfInstructions = insnVec.size();
3321 /****************************************************************************/
3322 /****************************************************************************/
3324 // Copy machine code from one location (in mutator) to another location
3325 // (also in the mutator)
3326 // Also updates the corresponding buffer of x86 instructions
3328 void pd_Function::copyInstruction(instruction &newInsn, instruction &oldInsn,
3329 unsigned &codeOffset) {
3331 unsigned insnSize = oldInsn.size();
3332 const unsigned char *oldPtr = oldInsn.ptr();
3333 unsigned tmp = codeOffset;
3335 #ifdef DEBUG_FUNC_RELOC
3336 cerr << "pd_Function::copyInstruction" << endl;
3339 // iterate over each byte of the machine instruction, copying it
3340 for (unsigned i = 0; i < insnSize; i++) {
3341 relocatedCode[codeOffset] = *(oldPtr + i);
3345 // update x86 instruction corresponding to machine code instruction
3346 newInsn = *(new instruction(&relocatedCode[tmp], oldInsn.type(), insnSize));
3349 /****************************************************************************/
3350 /****************************************************************************/
3352 // update displacement of expanded instruction
3354 int pd_Function::expandInstructions(LocalAlterationSet &alteration_set,
3357 instruction &newCodeInsn) {
3360 int oldDisp = 0, newDisp = 0, extra_offset = 0;
3361 unsigned char *oldInsn = 0, *newInsn = 0;
3363 unsigned insnType = insn.type();
3365 // location (in mutator) instruction was originally located at
3366 oldInsn = const_cast<unsigned char *> (insn.ptr());
3368 // location (in mutator) instruction is being relocated to (temporarily)
3369 newInsn = const_cast<unsigned char *> (newCodeInsn.ptr());
3371 // old displacement from instruction to target
3372 oldDisp = get_disp(&insn);
3374 // change in displacement of target
3375 extra_offset = alteration_set.getShift(offset + oldDisp) -
3376 alteration_set.getShift(offset);
3378 if (insnType & REL_B) {
3379 /* replace with rel32 instruction, opcode is one byte. */
3380 if (*oldInsn == JCXZ) {
3381 *newInsn++ = *oldInsn; *newInsn++ = 2; // jcxz 2
3382 *newInsn++ = 0xEB; *newInsn++ = 5; // jmp 5
3383 *newInsn++ = 0xE9; // jmp rel32
3384 *((int *)newInsn) = oldDisp + extra_offset - 7; // change in insn size is 7
3385 newInsn += sizeof(int);
3388 unsigned newSz=UINT_MAX;
3389 if (insnType & IS_JCC) {
3390 /* Change a Jcc rel8 to Jcc rel32.
3391 Must generate a new opcode: a 0x0F followed by (old opcode + 16) */
3392 unsigned char opcode = *oldInsn++;
3394 *newInsn++ = opcode + 0x10;
3395 newDisp = oldDisp + extra_offset - 4; // change in insn size is 4
3398 if (insnType & IS_JUMP) {
3399 /* change opcode to 0xE9 */
3402 newDisp = oldDisp + extra_offset - 3; // change in insn size is 3
3405 assert(newSz!=UINT_MAX);
3406 *((int *)newInsn) = newDisp;
3407 newInsn += sizeof(int);
3412 if (insnType & REL_W) {
3413 assert(insnType & PREFIX_OPR);
3414 if (insnType & PREFIX_SEG)
3415 *newInsn++ = *oldInsn++;
3417 /* opcode is unchanged, just relocate the displacement */
3419 if (*oldInsn == (unsigned char)0x0F)
3420 *newInsn++ = *oldInsn++;
3421 *newInsn++ = *oldInsn++;
3422 newDisp = oldDisp + extra_offset - 1; // change in insn size is 1
3423 *((int *)newInsn) = newDisp;
3424 newInsn += sizeof(int);
3427 // should never get here
3428 assert (insnType & REL_D);
3436 /****************************************************************************/
3437 /****************************************************************************/
3439 // given the Address adr, calculate the offset in the buffer code[],
3440 // of the x86 instruction that begins at adr. Return -1 if adr is
3441 // a byte in the middle of an instruction and not the first byte of
3444 int pd_Function::getArrayOffset(Address adr, instruction code[]) {
3447 Address insnAdr = addressOfMachineInsn(&code[0]);
3449 #ifdef DEBUG_FUNC_RELOC
3450 cerr << "pd_Function::getArrayOffset" << endl;
3453 assert(adr >= insnAdr && adr <= insnAdr + size());
3455 // find the instruction that contains the byte at Address adr
3456 for (i = 0; insnAdr < adr; i++) {
3457 insnAdr += ((instruction)code[i]).size();
3460 // if adr is the first byte of the instruction, return the offset in
3461 // the buffer of the instruction
3462 if (insnAdr == adr) return i;
3466 /****************************************************************************/
3467 /****************************************************************************/
3469 // update the before and after insns of x86 instPoint p
3471 void pd_Function::instrAroundPt(instPoint *p, instruction allInstr[],
3472 int numBefore, int numAfter,
3473 unsigned type, int index) {
3475 // add instructions before the point
3476 unsigned size = (p->insnAtPoint()).size();
3477 for (int u1 = index-1; size < JUMP_SZ && u1 >= 0 &&
3478 u1 > (index - 1) - numBefore; u1--) {
3479 if (!allInstr[u1].isCall()) {
3480 p->addInstrBeforePt(allInstr[u1]);
3481 size += allInstr[u1].size();
3487 // add instructions after the point
3488 if (type == ReturnPt) {
3490 for (int u1 = index+1; size < JUMP_SZ && u1 < (index + 1) + numAfter; u1++) {
3492 if (allInstr[u1].isNop() || *(allInstr[u1].ptr()) == 0xCC) {
3493 p->addInstrAfterPt(allInstr[u1]);
3494 size += allInstr[u1].size();
3502 unsigned maxSize = JUMP_SZ;
3503 if (type == EntryPt) maxSize = 2*JUMP_SZ;
3504 for (int u1 = index+1; size < maxSize && u1 < (index + 1) + numAfter; u1++) {
3505 if (!allInstr[u1].isCall()) {
3506 p->addInstrAfterPt(allInstr[u1]);
3507 size += allInstr[u1].size();
3516 /****************************************************************************/
3517 /****************************************************************************/
3518 // originalOffset: offset (in bytes) of the machine insn from the
3519 // beginning of the original function
3521 // newOffset: offset (in bytes) of the machine insn from the
3522 // beginning of the relocated and expanded function
3524 // originalArrayOffset: offset (in # of instructions) of the x86 instruction
3525 // corresponding to the instPoint, from the beginning of
3526 // the buffer corresponding to the original function.
3528 // newArrayOffset: offset (in # of instructions) of the x86 instruction
3529 // corresponding to the instPoint, from the beginning of
3530 // the buffer corresponding to the expanded and relocate
3533 // oldJumpOffset: offset (in bytes) from the beginning of the original
3534 // function, at which the jump to the baseTramp would be
3537 // newJumpOffset: offset (in bytes) from the beginning of the expanded
3538 // and relocated function, at which the jump to the
3539 // baseTramp would be placed.
3541 // newJumpAddr: absolute Address of the jump to the baseTramp,
3542 // in the expanded and relocated function would be placed.
3544 // newJumpAddr: absolute Address of the instruction in the expanded
3545 // and relocated function.
3548 #define CALC_OFFSETS(ip) \
3549 originalOffset = ((ip->iPgetAddress() + imageBaseAddr) - mutatee); \
3550 originalArrayOffset = getArrayOffset(originalOffset + mutator, oldCode); \
3551 if (originalArrayOffset < 0) return false; \
3552 newOffset = originalOffset + alteration_set.getShift(originalOffset); \
3553 newArrayOffset = originalArrayOffset + \
3554 alteration_set.getInstPointShift(originalOffset); \
3555 oldJumpOffset = (ip->jumpAddr() + imageBaseAddr) - mutatee; \
3556 newJumpOffset = oldJumpOffset + alteration_set.getShift(oldJumpOffset); \
3557 newJumpAddr = newAdr + newJumpOffset; \
3558 adr = newAdr + newOffset;
3560 /****************************************************************************/
3561 /****************************************************************************/
3563 // update info about instrumentation points
3565 bool pd_Function::fillInRelocInstPoints(
3566 const image *owner, process *proc,
3567 instPoint *&location,
3568 relocatedFuncInfo *&reloc_info, Address mutatee,
3569 Address mutator,instruction oldCode[],
3570 Address newAdr,instruction newCode[],
3571 LocalAlterationSet &alteration_set) {
3573 unsigned retId = 0, callId = 0,arbitraryId = 0;
3574 int originalOffset, newOffset, originalArrayOffset, newArrayOffset;
3575 int oldJumpOffset, newJumpOffset;
3576 Address adr, newJumpAddr;
3578 instPoint *point = 0;
3582 #ifdef DEBUG_FUNC_RELOC
3583 cerr << "pd_Function::fillInRelocInstPoints called" <<endl;
3584 cerr << " mutator = " << mutator << " mutatee = " << mutatee
3585 << " newAdr = " << hex << newAdr << endl;
3588 Address imageBaseAddr;
3589 if (!proc->getBaseAddress(owner, imageBaseAddr))
3592 alteration_set.Collapse();
3594 // Add inst point corresponding to func entry....
3595 // Assumes function has single entry point
3596 if (funcEntry_ != NULL) {
3598 // figure out how far entry inst point is from beginning of function..
3599 CALC_OFFSETS(funcEntry_)
3601 point = new instPoint(this, owner, adr-imageBaseAddr, newCode[newArrayOffset]);
3603 #ifdef DEBUG_FUNC_RELOC
3604 cerr << " added entry point at originalOffset " << originalOffset
3605 << " newOffset " << newOffset << endl;
3608 assert(point != NULL);
3610 point->setRelocated();
3612 point->setJumpAddr(newJumpAddr-imageBaseAddr);
3614 instrAroundPt(point, newCode, funcEntry_->insnsBefore(),
3615 funcEntry_->insnsAfter() +
3616 alteration_set.numInstrAddedAfter(originalOffset),
3617 EntryPt, newArrayOffset);
3619 if (location == funcEntry_) {
3623 // update reloc_info with new instPoint
3624 reloc_info->addFuncEntry(point);
3625 assert(reloc_info->funcEntry());
3629 // Add inst points corresponding to func exits....
3630 for(retId=0;retId < funcReturns.size(); retId++) {
3632 CALC_OFFSETS(funcReturns[retId])
3634 point = new instPoint(this, owner, adr-imageBaseAddr, newCode[newArrayOffset]);
3636 #ifdef DEBUG_FUNC_RELOC
3637 cerr << " added return point at originalOffset " << originalOffset
3638 << " newOffset " << newOffset&n